diff --git a/.env.template b/.env.template
new file mode 100644
index 00000000..adcc8e7e
--- /dev/null
+++ b/.env.template
@@ -0,0 +1,248 @@
+# ============================================================================
+# Jan Server Environment Configuration Template
+# ============================================================================
+# Copy this file to .env and customize for your environment
+# 
+# Quick Start:
+#   make env-create              # Creates .env from this template
+#   make env-switch ENV=hybrid   # Switch to hybrid development mode
+#
+# Available Environments:
+#   - development  (all services in Docker)      - config/development.env
+#   - hybrid       (native services + Docker)    - config/hybrid.env
+#   - testing      (integration testing)         - config/testing.env
+#   - production   (production deployment)       - config/production.env.example
+#
+# Documentation: See config/README.md for detailed environment guide
+# ============================================================================
+
+# ============================================================================
+# Docker Compose Configuration
+# ============================================================================
+# Profile selection for docker-compose services
+# Profiles are comma-separated and control which services start:
+#   infra - Infrastructure (PostgreSQL, Keycloak, Kong) - always needed
+#   api   - API services (llm-api, media-api, response-api) - always needed
+#   mcp   - MCP Tools and Vector Store - always included
+#   full  - Includes vLLM GPU inference (for local GPU provider)
+# 
+# Examples:
+#   COMPOSE_PROFILES=infra,api,mcp,full  - Local vLLM (full setup with GPU)
+#   COMPOSE_PROFILES=infra,api,mcp       - Remote API provider (no vLLM)
+COMPOSE_PROFILES=infra,api,mcp,full
+
+# ============================================================================
+# REQUIRED: Secrets & API Keys
+# ============================================================================
+# You MUST set these values before running the services
+
+# HuggingFace API token (required for model downloads). The local vLLM provider needs this token.
+# If you plan to use a remote provider instead, comment this out and edit
+# services/llm-api/config/providers.yml to point at the provider you want.
+# Get from: https://huggingface.co/settings/tokens
+HF_TOKEN=your_huggingface_token_here
+
+# Serper API key (required for MCP google_search tool)
+# Get from: https://serper.dev
+SERPER_API_KEY=your_serper_api_key_here
+
+# Security secrets (CHANGE THESE in production!)
+POSTGRES_PASSWORD=jan_password
+KEYCLOAK_ADMIN_PASSWORD=admin
+BACKEND_CLIENT_SECRET=backend-secret
+MODEL_PROVIDER_SECRET=jan-model-provider-secret-2024
+VLLM_INTERNAL_KEY=changeme
+
+# ============================================================================
+# PostgreSQL Database
+# ============================================================================
+POSTGRES_USER=jan_user
+POSTGRES_DB=jan_llm_api
+POSTGRES_PORT=5432
+KEYCLOAK_DB_PORT=5433
+
+# Database connection (varies by environment)
+# Development:  postgres://jan_user:password@api-db:5432/jan_llm_api
+# Hybrid:       postgres://jan_user:password@localhost:5432/jan_llm_api
+DB_DSN=postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@api-db:5432/${POSTGRES_DB}?sslmode=disable
+DATABASE_URL=postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@api-db:5432/${POSTGRES_DB}?sslmode=disable
+
+# ============================================================================
+# LLM API Service
+# ============================================================================
+HTTP_PORT=8080
+LOG_LEVEL=info
+LOG_FORMAT=json
+AUTO_MIGRATE=true
+
+# ============================================================================
+# Authentication (Keycloak)
+# ============================================================================
+# Note: JWKS_URL uses internal Docker hostname, ISSUER uses localhost for external clients
+KEYCLOAK_ADMIN=admin
+KEYCLOAK_HTTP_PORT=8085
+KEYCLOAK_REALM=jan
+
+# OAuth/OIDC Configuration (varies by environment)
+# Development (inside Docker): http://keycloak:8085
+# External/Hybrid: set KEYCLOAK_PUBLIC_HOST to your host (e.g., localhost or LAN IP) and KEYCLOAK_PUBLIC_PORT to the exposed port.
+KEYCLOAK_BASE_URL=http://keycloak:8085
+KEYCLOAK_PUBLIC_HOST=localhost
+KEYCLOAK_PUBLIC_PORT=${KEYCLOAK_HTTP_PORT:-8085}
+KEYCLOAK_PUBLIC_URL=http://${KEYCLOAK_PUBLIC_HOST}:${KEYCLOAK_PUBLIC_PORT}
+KEYCLOAK_ADMIN_URL=http://${KEYCLOAK_PUBLIC_HOST}:${KEYCLOAK_PUBLIC_PORT}
+JWKS_URL=http://keycloak:8085/realms/jan/protocol/openid-connect/certs
+ISSUER=${KEYCLOAK_PUBLIC_URL}/realms/${KEYCLOAK_REALM}
+AUDIENCE=account
+REFRESH_JWKS_INTERVAL=5m
+
+# OAuth2 redirect URI (must match Keycloak client configuration)
+# This is where Keycloak redirects after authentication
+OAUTH_REDIRECT_URI=http://localhost:8000/auth/callback
+
+# Guest provisioning
+BACKEND_CLIENT_ID=backend
+TARGET_CLIENT_ID=jan-client
+GUEST_ROLE=guest
+
+# ============================================================================
+# API Gateway (Kong)
+# ============================================================================
+KONG_HTTP_PORT=8000
+
+# ============================================================================
+# Inference (vLLM)
+# ============================================================================
+VLLM_PORT=8101
+VLLM_MODEL=Qwen/Qwen2.5-0.5B-Instruct
+VLLM_SERVED_NAME=qwen2.5-0.5b-instruct
+VLLM_GPU_UTIL=0.66
+VLLM_TOOL_SUPPORT=false
+# Model provider configuration
+JAN_PROVIDER_CONFIGS=true
+JAN_PROVIDER_CONFIGS_FILE=config/providers.yml
+JAN_PROVIDER_CONFIG_SET=default
+PROMPT_ORCHESTRATION_ENABLED=true
+PROMPT_ORCHESTRATION_TEMPLATES=true
+# Legacy fallback (single provider) - leave disabled unless needed
+# JAN_DEFAULT_NODE_SETUP=true
+# JAN_DEFAULT_NODE_URL=http://vllm-jan-gpu:8001/v1
+# JAN_DEFAULT_NODE_API_KEY=${VLLM_INTERNAL_KEY}
+
+# ============================================================================
+# MCP Tools Service
+# ============================================================================
+MCP_TOOLS_HTTP_PORT=8091
+
+# Search engine configuration
+SEARCH_ENGINE=serper                 # Options: serper, searxng
+SERPER_DOMAIN_FILTER=                # Optional: domain filter for search results
+SERPER_LOCATION_HINT=                # Optional: location hint for search
+SERPER_OFFLINE_MODE=false            # Set true for offline testing
+
+# MCP Provider URLs (vary by environment)
+# Development:  http://searxng:8080
+# Hybrid:       http://localhost:8086
+SEARXNG_PORT=8086
+SEARXNG_URL=http://searxng:8080
+VECTOR_STORE_PORT=3015
+VECTOR_STORE_URL=http://vector-store:3015
+SANDBOXFUSION_PORT=3010
+SANDBOXFUSION_URL=http://sandboxfusion:3010
+SANDBOX_FUSION_REQUIRE_APPROVAL=true
+MCP_ENABLE_PYTHON_EXEC=true
+MCP_ENABLE_MEMORY_RETRIEVE=true
+
+# Browser automation & code execution
+CODE_SANDBOX_ENABLED=true
+CODE_SANDBOX_URL=http://code-sandbox-mcp:3000/mcp
+PLAYWRIGHT_ENABLED=true
+PLAYWRIGHT_URL=http://playwright-mcp:3000
+
+# MCP debugging
+MCP_PROVIDER_DEBUG=false
+
+# ============================================================================
+# Observability (OpenTelemetry, Prometheus, Jaeger, Grafana)
+# ============================================================================
+OTEL_ENABLED=false                   # Enable telemetry collection
+OTEL_SERVICE_NAME=llm-api
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
+OTEL_HTTP_PORT=4318
+OTEL_GRPC_PORT=4317
+
+# Monitoring stack ports
+PROMETHEUS_PORT=9090
+JAEGER_UI_PORT=16686
+GRAFANA_PORT=3001
+GRAFANA_ADMIN_USER=admin
+GRAFANA_ADMIN_PASSWORD=admin
+
+# ============================================================================
+# Media API Configuration
+# ============================================================================
+MEDIA_API_ENABLED=true
+MEDIA_API_PORT=8285
+MEDIA_SERVICE_KEY=changeme-media-key
+MEDIA_API_KEY=changeme-media-key
+
+# Storage backend selection: "s3" or "local"
+MEDIA_STORAGE_BACKEND=local
+
+# Local Storage Configuration (used when MEDIA_STORAGE_BACKEND=local)
+MEDIA_LOCAL_STORAGE_PATH=./media-data
+MEDIA_LOCAL_STORAGE_BASE_URL=http://localhost:8285/v1/files
+
+# S3 Storage Configuration (used when MEDIA_STORAGE_BACKEND=s3)
+# MEDIA_S3_ENDPOINT=https://s3.menlo.ai
+# MEDIA_S3_PUBLIC_ENDPOINT=
+# MEDIA_S3_REGION=us-west-2
+# MEDIA_S3_BUCKET=platform-dev
+# MEDIA_S3_ACCESS_KEY_ID=7N33WPTUI1KN99MFILQS
+# MEDIA_S3_SECRET_ACCESS_KEY=ppxQsHpnfDSewYZD065aGjQeEQ0nTFA7c2aHNPz5
+# MEDIA_S3_USE_PATH_STYLE=true
+# MEDIA_S3_PRESIGN_TTL=5m
+
+# Media service URLs
+MEDIA_API_URL=http://media-api:8285
+MEDIA_RESOLVE_URL=http://media-api:8285/v1/media/resolve
+MEDIA_RESOLVE_TIMEOUT=5s
+
+# Media processing settings
+MEDIA_MAX_BYTES=20971520
+MEDIA_PROXY_DOWNLOAD=true
+MEDIA_RETENTION_DAYS=30
+MEDIA_REMOTE_FETCH_TIMEOUT=15s
+
+# ============================================================================
+# Template API (copy when running the scaffold locally)
+# ============================================================================
+# TEMPLATE_DATABASE_URL=postgres://postgres:postgres@localhost:5432/template_api?sslmode=disable
+# SERVICE_NAME=template-api
+# ENVIRONMENT=development
+# HTTP_PORT=8185
+# LOG_LEVEL=info
+# ENABLE_TRACING=false
+# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
+# SHUTDOWN_TIMEOUT=10s
+# DB_MAX_IDLE_CONNS=5
+# DB_MAX_OPEN_CONNS=15
+# DB_CONN_MAX_LIFETIME=30m
+# AUTH_ENABLED=false
+# AUTH_ISSUER=http://localhost:8080/realms/jan
+# AUTH_AUDIENCE=account
+# AUTH_JWKS_URL=http://localhost:8080/realms/jan/protocol/openid-connect/certs
+
+# ============================================================================
+# Environment-Specific Overrides
+# ============================================================================
+# The above values work for Docker development (all services in containers)
+# 
+# For other environments, use:
+#   make env-switch ENV=hybrid      # Run services natively
+#   make env-switch ENV=testing     # Integration testing
+#
+# Or manually copy from config/<environment>.env to .env
+#
+# See config/README.md for detailed environment configuration guide
+# ============================================================================
diff --git a/.github/ISSUE_TEMPLATE/1-bug-report.md b/.github/ISSUE_TEMPLATE/1-bug-report.md
deleted file mode 100644
index 495adb30..00000000
--- a/.github/ISSUE_TEMPLATE/1-bug-report.md
+++ /dev/null
@@ -1,24 +0,0 @@
----
-name: 🐛 Bug Report
-about: If something isn't working as expected 🤔
-title: 'bug: '
-type: Bug
----
-
-**Version:** e.g. 0.5.x-xxx
-
-## Describe the Bug
-<!-- A clear & concise description of the bug -->
-
-
-## Steps to Reproduce
-1.
-
-## Screenshots / Logs
-<!-- You can find logs in: Setting -> General -> Data Folder -> App Logs -->
-
-
-## Operating System
-- [ ] MacOS
-- [ ] Windows
-- [ ] Linux
diff --git a/.github/ISSUE_TEMPLATE/2-feature-request.md b/.github/ISSUE_TEMPLATE/2-feature-request.md
deleted file mode 100644
index 4da9e845..00000000
--- a/.github/ISSUE_TEMPLATE/2-feature-request.md
+++ /dev/null
@@ -1,12 +0,0 @@
----
-name: 🚀 Feature Request
-about: Suggest an idea for this project 😻!
-title: 'idea: '
-type: Idea
----
-
-## Problem Statement
-<!-- Describe the problem you're facing -->
-
-## Feature Idea
-<!-- Describe what you want instead. Examples are welcome! -->
diff --git a/.github/ISSUE_TEMPLATE/3-epic.md b/.github/ISSUE_TEMPLATE/3-epic.md
deleted file mode 100644
index 3e05d8dd..00000000
--- a/.github/ISSUE_TEMPLATE/3-epic.md
+++ /dev/null
@@ -1,27 +0,0 @@
----
-name: 🌟 Epic
-about: User stories and specs
-title: 'epic: '
-type: Epic
----
-
-## User Stories
-
-- As a [user type], I can [do something] so that [outcome]
-
-## Not in scope
-
-- 
-
-## User Flows & Designs
-
-- Key user flows
-- Figma link
-- Edge cases
-- Error states
-
-## Engineering Decisions
-
-- **Technical Approach:** Brief outline of the solution.
-- **Key Trade-offs:** What’s been considered/rejected and why.
-- **Dependencies:** APIs, services, libraries, teams.
diff --git a/.github/ISSUE_TEMPLATE/4-goal.md b/.github/ISSUE_TEMPLATE/4-goal.md
deleted file mode 100644
index 6de52fb1..00000000
--- a/.github/ISSUE_TEMPLATE/4-goal.md
+++ /dev/null
@@ -1,24 +0,0 @@
----
-name: 🎯 Goal
-about: Roadmap goals for our users
-title: 'goal: '
-type: Goal
----
-
-## 🎯 Goal  
-<!-- Short description of our goal -->
-
-## 📖 Context  
-<!-- Give a description of our current context -->
-
-## ✅ Scope  
-<!-- High lever description of what we are going to deliver -->
-
-## ❌ Out of Scope  
-<!-- What we are not targeting / delivering / discussing in this goal -->
-
-## 🛠 Deliverables  
-<!-- What we are the tangible deliverables for this goal -->
-
-## ❓Open questions
-<!-- What are we not sure about and need to discuss more -->
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
deleted file mode 100644
index e530b1ed..00000000
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-blank_issues_enabled: true
-contact_links:
-  - name: Jan Server Discussions
-    url: https://github.com/orgs/menloresearch/discussions/categories/q-a
-    about: Get help, discuss features & roadmap, and share your projects
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
deleted file mode 100644
index b9768df4..00000000
--- a/.github/pull_request_template.md
+++ /dev/null
@@ -1,42 +0,0 @@
-## Pull Request Title: [Brief, Descriptive Title]
-
-**Description:**
-
-_Please provide a detailed description of the changes in this pull request. What problem does it solve? What new features does it add?_
-
----
-
-### Change-Specific Checklist
-
-_Please check off the sections below that are relevant to this PR and complete the corresponding details._
-
-**Domain Changes**
-
-- [ ] This PR introduces changes to the domain models or business logic.
-  - **List of Changed Domains:**
-    - `[Domain 1]`
-    - `[Domain 2]`
-
----
-
-**Database Migration**
-
-- [ ] This PR includes a database migration.
-  - **Table Changes:**
-    - _e.g., `users` table: Added `last_login_at` column._
-    - _e.g., `products` table: Removed `old_price` column._
-  - **Migration Strategy:**
-    - _e.g., Adding a non-nullable column: Is a default value provided?_
-    - _e.g., Dropping a column: Is the data backed up?_
-
----
-
-**Endpoint Changes**
-
-- [ ] This PR adds, modifies, or removes API endpoints.
-  - **List of Changed Endpoints:**
-    - `GET /api/v1/new-resource` (New)
-    - `PUT /api/v1/old-resource/{id}` (Modified)
-  - [ ] **Swagger/API Docs** have been updated for all changed endpoints.
-  - [ ] **Breaking Change:** Does this PR introduce a breaking change to an existing API? (Yes/No)
-    - If yes, please explain the impact: _[Explanation]_
\ No newline at end of file
diff --git a/.github/workflows/config-drift.yml b/.github/workflows/config-drift.yml
new file mode 100644
index 00000000..d8c2592a
--- /dev/null
+++ b/.github/workflows/config-drift.yml
@@ -0,0 +1,61 @@
+name: Config Drift Check
+
+on:
+  pull_request:
+    paths:
+      - 'pkg/config/**'
+      - 'config/schema/**'
+      - 'config/defaults.yaml'
+      - 'cmd/jan-cli/cmd_config.go'
+  push:
+    branches:
+      - main
+    paths:
+      - 'pkg/config/**'
+      - 'config/schema/**'
+      - 'config/defaults.yaml'
+      - 'cmd/jan-cli/cmd_config.go'
+
+jobs:
+  check-drift:
+    name: Check Configuration Drift
+    runs-on: ubuntu-latest
+    
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version: '1.21'
+          cache: true
+          
+      - name: Install dependencies
+        run: go mod download
+        
+      - name: Generate configuration artifacts
+        run: make config-generate
+        
+      - name: Check for drift
+        run: |
+          if git diff --exit-code config/schema config/defaults.yaml; then
+            echo "✓ No drift detected - generated files match source"
+          else
+            echo "✗ Configuration drift detected!"
+            echo ""
+            echo "Generated files differ from committed versions."
+            echo "Someone manually edited generated files or forgot to run 'make config-generate'."
+            echo ""
+            echo "To fix:"
+            echo "1. Run: make config-generate"
+            echo "2. Commit the changes"
+            echo "3. Never manually edit files in config/schema/ or config/defaults.yaml"
+            echo ""
+            echo "Differences:"
+            git diff config/schema config/defaults.yaml
+            exit 1
+          fi
+          
+      - name: Run config tests
+        run: make config-test
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 12ed1a78..92c3c6df 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -1,31 +1,144 @@
-name: CI - jan-api-gateway
+name: CI - Microservices
 on:
   push:
     branches:
-      - dev
+      - main
     paths:
-      - "apps/jan-api-gateway/**"
+      - "services/llm-api/**"
+      - "services/mcp-tools/**"
+      - "services/media-api/**"
+      - "services/response-api/**"
+      - "services/memory-tools/**"
       - .github/workflows/dev.yml
       - .github/workflows/template-docker.yml
   pull_request:
     branches:
-      - dev
       - main
     paths:
-      - "apps/jan-api-gateway/**"
-      - .github/workflows/dev-jan-api-gateway
+      - "services/llm-api/**"
+      - "services/mcp-tools/**"
+      - "services/media-api/**"
+      - "services/response-api/**"
+      - "services/memory-tools/**"
+      - .github/workflows/dev.yml
       - .github/workflows/template-docker.yml
 
 jobs:
-  build-docker-x64:
+  changes:
+    runs-on: ubuntu-latest
+    outputs:
+      llm-api: ${{ steps.filter.outputs.llm-api }}
+      mcp-tools: ${{ steps.filter.outputs.mcp-tools }}
+      media-api: ${{ steps.filter.outputs.media-api }}
+      response-api: ${{ steps.filter.outputs.response-api }}
+      memory-tools: ${{ steps.filter.outputs.memory-tools }}
+      vector-store: ${{ steps.filter.outputs.vector-store }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            llm-api:
+              - 'services/llm-api/**'
+            mcp-tools:
+              - 'services/mcp-tools/**'
+              - '!services/mcp-tools/tools/vector-store-service/**'
+            media-api:
+              - 'services/media-api/**'
+            response-api:
+              - 'services/response-api/**'
+            memory-tools:
+              - 'services/memory-tools/**'
+            vector-store:
+              - 'services/mcp-tools/tools/vector-store-service/**'
+
+  build-llm-api:
+    needs: changes
+    if: needs.changes.outputs.llm-api == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/llm-api/Dockerfile
+      context: services/llm-api
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/llm-api:dev-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=dev-${{ github.sha }}
+
+  build-mcp-tools:
+    needs: changes
+    if: needs.changes.outputs.mcp-tools == 'true'
     uses: ./.github/workflows/template-docker.yml
     secrets: inherit
     with:
       runs-on: ubuntu-24-04-docker
-      docker-file: apps/jan-api-gateway/Dockerfile
-      context: apps/jan-api-gateway
+      docker-file: services/mcp-tools/Dockerfile
+      context: services/mcp-tools
       registry-url: registry.menlo.ai
-      tags: registry.menlo.ai/jan-server/jan-api-gateway:dev-${{ github.sha }}
+      tags: registry.menlo.ai/jan-server/mcp-tools:dev-${{ github.sha }}
       is_push: ${{ github.event_name == 'push' }}
       build-args: |
         VERSION_TAG=dev-${{ github.sha }}
+
+  build-media-api:
+    needs: changes
+    if: needs.changes.outputs.media-api == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/media-api/Dockerfile
+      context: services/media-api
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/media-api:dev-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=dev-${{ github.sha }}
+
+  build-response-api:
+    needs: changes
+    if: needs.changes.outputs.response-api == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/response-api/Dockerfile
+      context: services/response-api
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/response-api:dev-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=dev-${{ github.sha }}
+
+  build-memory-tools:
+    needs: changes
+    if: needs.changes.outputs.memory-tools == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/memory-tools/Dockerfile
+      context: services/memory-tools
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/memory-tools:dev-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=dev-${{ github.sha }}
+
+  build-vector-store:
+    needs: changes
+    if: needs.changes.outputs.vector-store == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/mcp-tools/tools/vector-store-service/Dockerfile
+      context: services/mcp-tools/tools/vector-store-service
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/vector-store-service:dev-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=dev-${{ github.sha }}
\ No newline at end of file
diff --git a/.github/workflows/load-test.yml b/.github/workflows/load-test.yml
deleted file mode 100644
index 0d8e1dea..00000000
--- a/.github/workflows/load-test.yml
+++ /dev/null
@@ -1,221 +0,0 @@
-name: Load Test
-
-on:
-  # Manual trigger
-  workflow_dispatch:
-    inputs:
-      test_case:
-        description: 'Test case to run (leave empty to run all tests)'
-        required: false
-        default: ''
-        type: choice
-        options:
-          - ''
-          - test-completion-standard
-          - test-completion-conversation
-          - test-responses
-      base_url:
-        description: 'Base URL for testing'
-        required: true
-        default: 'https://api-stag.jan.ai'
-        type: string
-      model:
-        description: 'Model to test'
-        required: true
-        default: 'jan-v1-4b'
-        type: string
-      duration_minutes:
-        description: 'Test duration in minutes'
-        required: true
-        default: '5'
-        type: string
-      nonstream_rps:
-        description: 'Non-streaming requests per second'
-        required: true
-        default: '2'
-        type: string
-      stream_rps:
-        description: 'Streaming requests per second'
-        required: true
-        default: '1'
-        type: string
-  
-
-env:
-  # Test configuration - use inputs for workflow_dispatch, defaults for push
-  BASE: ${{ github.event.inputs.base_url || 'https://api-stag.jan.ai' }}
-  MODEL: ${{ github.event.inputs.model || 'jan-v1-4b' }}
-  DURATION_MIN: ${{ github.event.inputs.duration_minutes || '2' }}
-  NONSTREAM_RPS: ${{ github.event.inputs.nonstream_rps || '2' }}
-  STREAM_RPS: ${{ github.event.inputs.stream_rps || '1' }}
-  
-  # Cloudflare load test token (required for API access)
-  LOADTEST_TOKEN: ${{ secrets.LOADTEST_TOKEN }}
-  
-  # Guest authentication - no API keys needed
-  # Tests automatically use guest login
-  
-  # Prometheus remote write configuration (k6 standard env vars)
-  K6_PROMETHEUS_RW_SERVER_URL: ${{ secrets.K6_PROMETHEUS_RW_SERVER_URL }}
-  K6_PROMETHEUS_RW_USERNAME: ${{ secrets.K6_PROMETHEUS_RW_USERNAME }}
-  K6_PROMETHEUS_RW_PASSWORD: ${{ secrets.K6_PROMETHEUS_RW_PASSWORD }}
-  K6_PROMETHEUS_RW_TREND_STATS: ${{ vars.K6_PROMETHEUS_RW_TREND_STATS || 'p(95),p(99),min,max' }}
-  K6_PROMETHEUS_RW_PUSH_INTERVAL: ${{ vars.K6_PROMETHEUS_RW_PUSH_INTERVAL || '5s' }}
-
-jobs:
-  load-test:
-    runs-on: ubuntu-latest
-    
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        
-      - name: Setup k6
-        uses: grafana/setup-k6-action@v1
-        
-      - name: Clear k6 defaults
-        run: |
-          # Clear any potential k6 config files that might have localhost defaults
-          rm -f ~/.k6rc || true
-          rm -f .k6rc || true
-          # Unset any k6 environment variables that might interfere
-          unset K6_OUT || true
-          unset K6_PROMETHEUS_URL || true
-        
-      - name: Install jq for metrics parsing
-        run: sudo apt-get update && sudo apt-get install -y jq
-        
-      - name: Validate inputs
-        run: |
-          echo "🚀 Load Test Execution"
-          echo "Trigger: ${{ github.event_name }}"
-          echo ""
-          echo "Test Configuration:"
-          if [[ -n "${{ github.event.inputs.test_case }}" ]]; then
-            echo "  Test Case: ${{ github.event.inputs.test_case }} (specific test)"
-          else
-            echo "  Test Case: ALL TESTS (manual trigger)"
-          fi
-          echo "  Base URL: ${{ github.event.inputs.base_url }}"
-          echo "  Model: ${{ github.event.inputs.model }}"
-          echo "  Duration: ${{ github.event.inputs.duration_minutes }} minutes"
-          echo "  Non-stream RPS: ${{ github.event.inputs.nonstream_rps }}"
-          echo "  Stream RPS: ${{ github.event.inputs.stream_rps }}"
-          
-          # Cloudflare load test token validation
-          if [[ -n "$LOADTEST_TOKEN" ]]; then
-            echo "✅ Cloudflare load test token configured: [CONFIGURED]"
-          else
-            echo "❌ ERROR: LOADTEST_TOKEN is required for Cloudflare API access"
-            echo "Please configure LOADTEST_TOKEN secret in GitHub repository settings"
-            exit 1
-          fi
-          
-          # Guest authentication - no secrets needed
-          echo "✅ Using guest authentication (no API keys required)"
-          
-          echo ""
-          echo "Prometheus Configuration:"
-          if [[ -n "$K6_PROMETHEUS_RW_SERVER_URL" ]]; then
-            echo "✅ k6 Prometheus remote write endpoint configured: [CONFIGURED]"
-            if [[ -n "$K6_PROMETHEUS_RW_USERNAME" ]]; then
-              echo "✅ k6 Prometheus username configured: [CONFIGURED]"
-            else
-              echo "⚠️  k6 Prometheus username not configured"
-            fi
-            if [[ -n "$K6_PROMETHEUS_RW_PASSWORD" ]]; then
-              echo "✅ k6 Prometheus password configured: [HIDDEN]"
-            else
-              echo "⚠️  k6 Prometheus password not configured"
-            fi
-            echo "📊 Trend stats: $K6_PROMETHEUS_RW_TREND_STATS"
-            echo "⏱️  Push interval: $K6_PROMETHEUS_RW_PUSH_INTERVAL"
-          else
-            echo "⚠️  Warning: K6_PROMETHEUS_RW_SERVER_URL is not configured"
-          fi
-        
-      - name: Run load test
-        id: loadtest
-        run: |
-          cd tests
-          if [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.test_case }}" ]]; then
-            echo "Running specific test case: ${{ github.event.inputs.test_case }}"
-            ./run-loadtest.sh ${{ github.event.inputs.test_case }}
-          else
-            echo "Running all test cases"
-            ./run-loadtest.sh
-          fi
-        
-      - name: Parse test results
-        id: parse_results
-        if: always()
-        run: |
-          cd tests/results
-          
-          # Find the latest results file
-          LATEST_FILE=$(ls -t *_*.json 2>/dev/null | head -1 || echo "")
-          
-          if [[ -n "$LATEST_FILE" && -f "$LATEST_FILE" ]]; then
-            echo "results_file=$LATEST_FILE" >> $GITHUB_OUTPUT
-            
-            # Extract key metrics using jq
-            if command -v jq &> /dev/null; then
-              echo "=== Load Test Results ===" >> results_summary.txt
-              if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
-                echo "Test Case: ${{ github.event.inputs.test_case || 'All Tests' }}" >> results_summary.txt
-                echo "Duration: ${{ github.event.inputs.duration_minutes || '2' }} minutes" >> results_summary.txt
-              else
-                echo "Test Case: All Tests (auto-triggered)" >> results_summary.txt
-                echo "Duration: $DURATION_MIN minutes" >> results_summary.txt
-              fi
-              echo "Trigger: ${{ github.event_name }}" >> results_summary.txt
-              echo "Date: $(date)" >> results_summary.txt
-              echo "" >> results_summary.txt
-              
-              # Parse metrics
-              jq -r '.metrics | to_entries[] | select(.key | contains("completion_") or contains("conversation_") or contains("response_") or contains("guest_") or contains("refresh_")) | "\(.key): avg=\(.value.avg // "N/A"), min=\(.value.min // "N/A"), max=\(.value.max // "N/A"), p95=\(.value.p95 // "N/A")"' "$LATEST_FILE" >> results_summary.txt 2>/dev/null || echo "Failed to parse detailed metrics" >> results_summary.txt
-              
-              # Check for errors
-              ERROR_COUNT=$(jq -r '.metrics.completion_errors.count // .metrics.conversation_errors.count // .metrics.response_errors.count // 0' "$LATEST_FILE" 2>/dev/null || echo "0")
-              echo "" >> results_summary.txt
-              echo "Error Count: $ERROR_COUNT" >> results_summary.txt
-              
-              # Set output for next steps
-              echo "error_count=$ERROR_COUNT" >> $GITHUB_OUTPUT
-              
-              # Display summary
-              echo "=== Test Results Summary ==="
-              cat results_summary.txt
-            else
-              echo "jq not available, skipping detailed metrics parsing"
-            fi
-          else
-            echo "No results file found"
-            echo "error_count=999" >> $GITHUB_OUTPUT
-          fi
-        
-      - name: Upload test results
-        uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: loadtest-results-${{ github.event_name == 'workflow_dispatch' && github.event.inputs.test_case || 'all-tests' }}-${{ github.run_number }}
-          path: |
-            tests/results/
-
-      - name: Fail job if errors detected
-        if: steps.parse_results.outputs.error_count != '0'
-        run: |
-          echo "❌ Load test detected ${{ steps.parse_results.outputs.error_count }} errors"
-          exit 1
-          
-      - name: Success notification
-        if: success()
-        run: |
-          echo "✅ Load test completed successfully!"
-          echo "Trigger: ${{ github.event_name }}"
-          if [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.test_case }}" ]]; then
-            echo "Test case: ${{ github.event.inputs.test_case }}"
-          else
-            echo "Test case: All tests"
-          fi
-          echo "Error count: ${{ steps.parse_results.outputs.error_count }}"
diff --git a/.github/workflows/prod.yml b/.github/workflows/prod.yml
index a12e4549..e90818b5 100644
--- a/.github/workflows/prod.yml
+++ b/.github/workflows/prod.yml
@@ -1,29 +1,134 @@
-name: CI - jan-server
-
+name: CI - Microservices
 on:
   push:
-    tags:
-      - 'v[0-9]+.[0-9]+.[0-9]+'
+    branches:
+      - release
+    paths:
+      - "services/llm-api/**"
+      - "services/mcp-tools/**"
+      - "services/media-api/**"
+      - "services/response-api/**"
+      - "services/memory-tools/**"
+      - .github/workflows/prod.yml
+      - .github/workflows/template-docker.yml
 
 jobs:
-  build-docker-x64:
-    strategy:
-      fail-fast: true
-      matrix:
-        include:
-          - docker-file: apps/jan-api-gateway/Dockerfile
-            context: apps/jan-api-gateway
-            registry-url: registry.menlo.ai
-            tags: registry.menlo.ai/jan-server/jan-api-gateway:${{ github.ref_name }}
-            is_push: true
+  changes:
+    runs-on: ubuntu-latest
+    outputs:
+      llm-api: ${{ steps.filter.outputs.llm-api }}
+      mcp-tools: ${{ steps.filter.outputs.mcp-tools }}
+      media-api: ${{ steps.filter.outputs.media-api }}
+      response-api: ${{ steps.filter.outputs.response-api }}
+      memory-tools: ${{ steps.filter.outputs.memory-tools }}
+      vector-store: ${{ steps.filter.outputs.vector-store }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          base: release
+          filters: |
+            llm-api:
+              - 'services/llm-api/**'
+            mcp-tools:
+              - 'services/mcp-tools/**'
+              - '!services/mcp-tools/tools/vector-store-service/**'
+            media-api:
+              - 'services/media-api/**'
+            response-api:
+              - 'services/response-api/**'
+            memory-tools:
+              - 'services/memory-tools/**'
+            vector-store:
+              - 'services/mcp-tools/tools/vector-store-service/**'
+
+  build-llm-api:
+    needs: changes
+    if: needs.changes.outputs.llm-api == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/llm-api/Dockerfile
+      context: services/llm-api
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/llm-api:prod-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=prod-${{ github.sha }}
+
+  build-mcp-tools:
+    needs: changes
+    if: needs.changes.outputs.mcp-tools == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/mcp-tools/Dockerfile
+      context: services/mcp-tools
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/mcp-tools:prod-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=prod-${{ github.sha }}
+
+  build-media-api:
+    needs: changes
+    if: needs.changes.outputs.media-api == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/media-api/Dockerfile
+      context: services/media-api
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/media-api:prod-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=prod-${{ github.sha }}
+
+  build-response-api:
+    needs: changes
+    if: needs.changes.outputs.response-api == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/response-api/Dockerfile
+      context: services/response-api
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/response-api:prod-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=prod-${{ github.sha }}
+
+  build-memory-tools:
+    needs: changes
+    if: needs.changes.outputs.memory-tools == 'true'
+    uses: ./.github/workflows/template-docker.yml
+    secrets: inherit
+    with:
+      runs-on: ubuntu-24-04-docker
+      docker-file: services/memory-tools/Dockerfile
+      context: services/memory-tools
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/memory-tools:prod-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
+      build-args: |
+        VERSION_TAG=prod-${{ github.sha }}
+
+  build-vector-store:
+    needs: changes
+    if: needs.changes.outputs.vector-store == 'true'
     uses: ./.github/workflows/template-docker.yml
     secrets: inherit
     with:
       runs-on: ubuntu-24-04-docker
-      docker-file: ${{ matrix.docker-file }}
-      context: ${{ matrix.context }}
-      registry-url: ${{ matrix.registry-url }}
-      tags: ${{ matrix.tags }}
-      is_push: ${{ matrix.is_push }}
+      docker-file: services/mcp-tools/tools/vector-store-service/Dockerfile
+      context: services/mcp-tools/tools/vector-store-service
+      registry-url: registry.menlo.ai
+      tags: registry.menlo.ai/jan-server/vector-store-service:prod-${{ github.sha }}
+      is_push: ${{ github.event_name == 'push' }}
       build-args: |
-        VERSION_TAG=${{ github.ref_name }}
\ No newline at end of file
+        VERSION_TAG=prod-${{ github.sha }}
\ No newline at end of file
diff --git a/.github/workflows/stag.yml b/.github/workflows/stag.yml
deleted file mode 100644
index 54eca073..00000000
--- a/.github/workflows/stag.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: CI - jan-api-gateway
-on:
-  push:
-    branches:
-      - stag
-    paths:
-      - "apps/jan-api-gateway/**"
-      - .github/workflows/stag.yml
-      - .github/workflows/template-docker.yml
-
-jobs:
-  build-docker-x64:
-    uses: ./.github/workflows/template-docker.yml
-    secrets: inherit
-    with:
-      runs-on: ubuntu-24-04-docker
-      docker-file: apps/jan-api-gateway/Dockerfile
-      context: apps/jan-api-gateway
-      registry-url: registry.menlo.ai
-      tags: registry.menlo.ai/jan-server/jan-api-gateway:stag-${{ github.sha }}
-      is_push: ${{ github.event_name == 'push' }}
-      build-args: |
-        VERSION_TAG=stag-${{ github.sha }}
diff --git a/.github/workflows/template-docker.yml b/.github/workflows/template-docker.yml
index 3773f3f6..fda78dfe 100644
--- a/.github/workflows/template-docker.yml
+++ b/.github/workflows/template-docker.yml
@@ -64,8 +64,8 @@ jobs:
         with:
           submodules: "recursive"
 
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
+      # - name: Set up QEMU
+      #   uses: docker/setup-qemu-action@v3
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
diff --git a/.gitignore b/.gitignore
index d2ebdcfa..b128e654 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+# Copilots
+.github/copilot-instructions.md
 # Logs
 *.log
 # Temporary files
@@ -7,8 +9,20 @@
 *.bak
 # Environment files (containing secrets, API keys, credentials)
 .env
-*.env
 .env.*
+*.env
+!*.env.example
+!config/*.env.example
+
+# Production and secrets (never commit these!)
+config/production.env
+config/secrets.env
+config/*.local.env
+
+# Kubernetes secret overrides (never commit these!)
+k8s/**/values-*.local.yaml
+k8s/**/secrets.yaml
+k8s/**/*-secret.yaml.local
 
 # Local configuration that shouldn't be shared
 *.local
@@ -17,10 +31,7 @@
 # Visual Studio Code editor settings and workspace files
 
 # Visual Studio Code
-.vscode/*
-!.vscode/settings.json
-!.vscode/tasks.json
-!.vscode/launch.json
+.vscode
 
 !.vscode/extensions.json
 !.vscode/*.code-snippets
@@ -85,7 +96,7 @@ out/
 .idea/golangProjectSettings.xml
 .idea/goLibraries.xml
 go.work
-go.sum
+
 
 .idea/goprojector.xml
 .idea/go.imports.xml
@@ -108,7 +119,6 @@ downloads/
 
 eggs/
 .eggs/
-lib/
 lib64/
 parts/
 
@@ -237,8 +247,6 @@ vendor/
 *.out
 go.work
 
-go.sum
-
 ### Typescript ###
 # typescript specific files
 
@@ -246,10 +254,28 @@ go.sum
 node_modules/
 dist/
 
-# helm
-charts/jan-server/charts/*.tgz
+.DS_Store
+cmd/server/__debug_bin*
+
+# Downloaded models (large files - root directory only)
+/models/
+
+# API Keys and credentials (catch-all)
+**/credentials.json
+**/service-account.json
+**/*.key
+**/*.pem
+**/*.p12
+**/*.pfx
+**/secrets/
+**/.secrets/
+
+# Docker and Kubernetes secrets
+docker-compose.override.yml
+docker-compose.*.local.yml
 
-# Migration
-apps/jan-api-gateway/application/cmd/codegen/dbmigration/tmp/*
+# jan-cli generated config artifacts
+cmd/jan-cli/config/
 
-.DS_Store
\ No newline at end of file
+# jan-cli binary
+cmd/jan-cli/jan-cli
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..38e6fe42
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,291 @@
+# Changelog
+
+All notable changes to Jan Server will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [0.2.0] - 2025-11-10
+
+### Target Major Architectural Changes
+
+This release represents a **complete architectural overhaul** from a Kubernetes-native monolithic platform to a microservices-first architecture with Docker Compose and enhanced developer experience.
+
+### Added
+
+#### Building New Microservices Architecture
+- **Response API Service** (Port 8082) - Multi-step tool orchestration with configurable execution depth and timeout
+- **Media API Service** (Port 8285) - S3-integrated media ingestion and resolution with `jan_*` ID system
+- **MCP Tools Service** (Port 8091) - Model Context Protocol integration for external tools
+- **Service Template System** - Reusable Go microservice skeleton with standardized structure
+  - `scripts/new-service-from-template.ps1` - Automated service generation script
+  - Complete template with config, logging, tracing, HTTP server, Makefile, and Dockerfile
+
+#### Tools Developer Experience
+- **100+ Makefile commands** organized into 10 sections:
+  - Environment management (setup, clean, health checks)
+  - Infrastructure management (Docker Compose profiles)
+  - Service management (build, run, logs)
+  - Database operations (migrations, reset)
+  - Testing suite (auth, conversations, media, responses, MCP, E2E)
+  - Hybrid development mode (native execution with hot reload)
+  - Monitoring stack (Prometheus, Grafana, Jaeger)
+  - Build automation and utilities
+- **Quick Start** - One-command setup: `make setup && make up-full`
+- **Health Check Utilities** - `make health-check` for service monitoring
+
+#### Test Comprehensive Testing Infrastructure
+- **6 jan-cli api-test collections** in `tests/automation/`:
+  - `auth-postman-scripts.json` - Authentication tests
+  - `conversations-postman-scripts.json` - Conversation API tests
+  - `responses-postman-scripts.json` - Response API tests
+  - `media-postman-scripts.json` - Media API tests
+  - `mcp-postman-scripts.json` - MCP tools tests
+  - `test-all.postman.json` - Complete E2E test suite
+- **Test commands**:
+  - `make test-all` - Run all test suites
+  - `make test-auth` - Authentication tests
+  - `make test-conversations` - Conversation tests
+  - `make test-response` - Response API tests
+  - `make test-media` - Media API tests
+  - `make test-mcp` - MCP tools tests
+  - `make test-e2e` - Gateway E2E tests
+
+#### Stats Enhanced Monitoring & Observability
+- **Complete observability stack** with Docker Compose profiles:
+  - **Grafana** dashboards (http://localhost:3331, admin/admin)
+  - **Prometheus** metrics collection (http://localhost:9090)
+  - **Jaeger** distributed tracing (http://localhost:16686)
+  - **OpenTelemetry Collector** for telemetry aggregation
+- **Service-specific log viewing** - `make logs-llm-api`, `make logs-mcp`, etc.
+- **Profile-based monitoring** - `make monitor-up` to start monitoring stack
+
+#### Settings Configuration Management
+- **Multiple environment configurations**:
+  - `config/defaults.env` - Base configuration for all environments
+  - `config/development.env` - Docker internal DNS configuration
+  - `config/testing.env` - localhost URLs for jan-cli api-test
+  - `config/secrets.env.example` - Secrets template
+- **Profile-based deployment**:
+  - `make up-full` - Full stack with all services
+  - `make up-gpu` - With GPU inference support
+  - `make up-cpu` - CPU-only inference
+  - `make monitor-up` - With monitoring stack
+
+#### Lock Authentication Enhancements
+- **Guest authentication** - Quick access via `/llm/auth/guest-login` endpoint
+- **Keycloak OIDC integration** - Full OAuth/OIDC support
+- **Simplified token management** - Streamlined authentication flow
+
+#### Docs Documentation Overhaul
+- **Comprehensive documentation structure**:
+  - `docs/getting-started/README.md` - Setup guides and first steps
+  - `docs/guides/` - In-depth guides:
+    - `development.md` - Complete development workflow (updated with all services)
+    - `testing.md` - Testing procedures and test suites
+    - `deployment.md` - Production deployment guide
+    - `monitoring.md` - Observability configuration
+    - `mcp-testing.md` - MCP tools testing guide
+    - `services-template.md` - Service template usage
+  - `docs/api/` - API reference:
+    - `llm-api/` - LLM API documentation
+    - `mcp-tools/` - MCP tools documentation
+  - `docs/architecture/` - System design documents
+  - `docs/conventions/` - Code standards and patterns
+- `docs/guides/development.md#makefile-commands-reference` - Command reference (100+ commands)
+  - `config/README.md` - Configuration guide
+- **Kubernetes documentation**:
+  - `k8s/README.md` - K8s deployment overview (updated for all services)
+  - `k8s/SETUP.md` - Step-by-step setup guide (updated for response-api and media-api)
+  - Complete Helm chart for all microservices
+
+#### Ship Kubernetes/Helm Enhancements
+- **Response API Kubernetes templates**:
+  - `k8s/jan-server/templates/response-api-deployment.yaml`
+  - `k8s/jan-server/templates/response-api-secret.yaml`
+  - `k8s/jan-server/templates/response-api-ingress.yaml`
+- **Updated Helm chart** (version 1.1.0):
+  - Added response-api configuration in all values files
+  - Fixed media-api configuration (added `apiKey` field)
+  - Updated Kong gateway routing for all services
+  - Enhanced values for development and production environments
+- **Kong API Gateway routing**:
+  - `/api/llm/*` -> llm-api:8080
+  - `/api/media/*` -> media-api:8285
+  - `/api/responses/*` -> response-api:8082
+  - `/api/mcp/*` -> mcp-tools:8091
+
+#### Design MCP Tools Integration
+- **Google Search** - `google_search` tool integration
+- **Web Scraping** - Web content extraction tools
+- **MCP Protocol Support** - Full Model Context Protocol implementation
+- **Serper API Integration** - Web search capabilities
+- **MCP endpoint** - `/v1/mcp` for tool interactions
+
+### Changed
+
+#### Architecture Architecture Transformation
+- **Deployment strategy**: Kubernetes-only -> **Docker Compose-first** with Kubernetes support
+- **API Gateway**: Custom Jan API Gateway -> **Kong 3.5**
+- **Authentication**: Google OAuth2 only -> **Keycloak (OIDC)** with guest auth
+- **Service structure**: Monolithic (2 services) -> **Microservices (4+ services)**
+- **Database**: PostgreSQL with read/write replicas -> **PostgreSQL 18** (simplified single instance)
+- **Inference**: Jan Inference Model (Python) -> **vLLM**
+- **MCP Framework**: Not specified -> **mark3labs/mcp-go**
+
+#### Package Service Organization
+- **Restructured** from `apps/` to `services/` directory:
+  - `services/llm-api/` - Core LLM orchestration (Go)
+  - `services/mcp-tools/` - MCP tools integration (Go)
+  - `services/media-api/` - Media management (Go)
+  - `services/response-api/` - Response orchestration (Go)
+  - `services/template-api/` - Service template (Go)
+- **Separated concerns** into specialized microservices
+- **All services in Go** (removed Python inference service)
+
+#### Tools Technology Stack Updates
+- **Go version**: 1.24.6 -> **Go 1.21+**
+- **PostgreSQL**: With replicas -> **PostgreSQL 18** (single instance)
+- **API Gateway**: Custom -> **Kong 3.5**
+- **Web Framework**: Gin (remains)
+- **Monitoring**: Grafana Pyroscope -> **OpenTelemetry + Prometheus + Jaeger + Grafana**
+
+#### Notes API Endpoints
+- **Gateway URL**: `http://localhost:8080` -> **`http://localhost:8000`** (Kong)
+- **Swagger UI**: `/api/swagger/index.html` -> **`/v1/swagger/`**
+- **Health endpoint**: `/healthcheck` -> **`/healthz`** (on each service)
+- **New endpoints**:
+  - `/v1/chat/completions` - OpenAI-compatible chat endpoint
+  - `/v1/mcp` - MCP tools endpoint
+- `/llm/auth/guest-login` - Guest authentication
+  - `/api/media/*` - Media API routes
+  - `/api/responses/*` - Response API routes
+
+#### Book README.md Optimization
+- **Reduced from 345 lines to 235 lines** (-32%)
+- **Focus on quick start** - `make setup && make up-full`
+- **Better organization** with clear sections
+- **Enhanced examples** for API usage
+- **Improved documentation links**
+
+#### Target Developer Workflow
+- **Build commands**: Docker build -> **`make build-llm-api`**, etc.
+- **Run commands**: `./scripts/run.sh` -> **`make up-full`**
+- **Test commands**: None -> **`make test-all`** and specific test suites
+- **Log viewing**: `kubectl logs` -> **`make logs-llm-api`**
+
+### Removed
+
+#### Waste Deprecated Features
+- **Multi-tenant organization management** - Removed organization/project-level access control
+- **PostgreSQL read/write replicas** - Simplified to single instance
+- **Google OAuth2 direct integration** - Now handled through Keycloak
+- **Python inference service** - Replaced with vLLM
+- **Database migration tools (Atlas)** - Changed migration approach
+- **Complex API key scoping** - Simplified authentication model
+- **pprof endpoints** (port 6060) - Replaced with comprehensive monitoring stack
+
+#### Folder Cleaned Up
+- Legacy `apps/` directory structure
+- Old Jan API Gateway monolithic service
+- Custom authentication implementation
+- Kubernetes-only deployment scripts
+
+### Fixed
+
+#### Bug Bug Fixes
+- **Media API configuration** - Added missing `apiKey` field alongside `serviceKey`
+- **Response API port** - Corrected port from 8280 to 8082 throughout documentation
+- **Kong gateway routing** - Updated to properly route all four API services
+- **Kubernetes templates** - Fixed media-api deployment to include MEDIA_API_KEY environment variable
+
+### Security
+
+#### Lock Security Enhancements
+- **Keycloak OIDC** - Industry-standard authentication
+- **Service-level authentication** - Each service can be independently secured
+- **API key management** - Secure key handling for media and MCP services
+- **Environment variable security** - Proper secrets management with `.env` files
+
+### Migration Guide
+
+#### Refresh Breaking Changes
+This is a **major version change** with breaking changes. Organizations using v2.0.0 need to:
+
+1. **Update deployment infrastructure** - Migrate from Kubernetes-only to Docker Compose or new Helm charts
+2. **Update authentication integration** - Migrate from Google OAuth2 to Keycloak
+3. **Update API client code** - New gateway URL and routing paths
+4. **Update service architecture** - Adapt to microservices structure
+5. **Update database schema** - Apply new migrations for multiple services
+6. **Update monitoring integration** - Configure new observability stack
+
+#### Stats Statistics
+| Metric | v2.0.0 | v0.2.0 | Change |
+|--------|--------|--------|--------|
+| **Services** | 2 | 4+ | +100% |
+| **Deployment Methods** | 1 (K8s) | 2 (Docker + K8s) | +100% |
+| **Make Commands** | ~20 | 100+ | +400% |
+| **Test Suites** | Basic | 6 collections | New |
+| **Documentation Pages** | ~5 | 20+ | +300% |
+| **Monitoring Tools** | 2 | 4 | +100% |
+| **Auth Methods** | 1 | 2 | +100% |
+
+### Performance
+
+#### Power Improvements
+- **Faster iteration** - Hybrid mode allows native execution with hot reload
+- **Better resource utilization** - Microservices can be scaled independently
+- **Improved developer experience** - One-command setup reduces onboarding time
+- **Enhanced observability** - Better troubleshooting with distributed tracing
+
+### Dependencies
+
+#### Package Updated Dependencies
+- **Kong**: 3.5
+- **Keycloak**: Latest with OIDC
+- **PostgreSQL**: 18
+- **Go**: 1.21+
+- **mark3labs/mcp-go**: Latest
+- **OpenTelemetry**: Latest
+- **Prometheus**: Latest
+- **Jaeger**: Latest
+- **Grafana**: Latest
+
+## [2.0.0] - 2025-01-07
+
+### Added
+- Consolidated Makefile structure (single file with 10 sections)
+- Hybrid development mode for faster iteration
+- MCP (Model Context Protocol) provider integration
+- Full observability stack (Prometheus, Jaeger, Grafana)
+- OpenTelemetry integration
+- Guest authentication with Keycloak token exchange
+- Comprehensive testing suite with jan-cli api-test
+- Documentation for all major features
+
+### Changed
+- Restructured project from monolithic to microservices architecture
+- Updated to PostgreSQL 16
+- Migrated to Kong 3.5 API Gateway
+- Improved Docker Compose organization with profiles
+
+### Removed
+- Modular Makefile files (consolidated into single Makefile)
+- Legacy authentication system
+
+## [1.0.0] - Initial Release
+
+### Added
+- Initial LLM API service with OpenAI-compatible endpoints
+- Basic authentication
+- Conversation and message management
+- Docker Compose deployment
+- PostgreSQL database backend
+
+---
+
+[Unreleased]: https://github.com/janhq/jan-server/compare/v2.0.0...HEAD
+[2.0.0]: https://github.com/janhq/jan-server/compare/v1.0.0...v2.0.0
+[1.0.0]: https://github.com/janhq/jan-server/releases/tag/v1.0.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000..ecec219d
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,91 @@
+# Contributing to Jan Server
+
+Thanks for taking the time to improve Jan Server! This guide explains how to propose changes, run the required checks, and keep the documentation aligned with the codebase.
+
+## Ways to Contribute
+- **Report issues**: use GitHub Issues with reproduction steps, logs, and the commit hash you tested.
+- **Feature proposals**: outline the use case, affected services, and expected APIs before opening a pull request.
+- **Code changes**: bug fixes, new functionality, refactors, and automation scripts.
+- **Docs and examples**: clarify setup steps, add API samples, or improve troubleshooting guides.
+
+## Development Workflow
+1. **Sync local environment**
+   ```bash
+   git checkout main
+   git pull origin main
+   ```
+2. **Create a feature branch**
+   ```bash
+   git checkout -b feature/<short-description>
+   ```
+3. **Bootstrap tooling**
+   ```bash
+   make env-create           # copies .env.template -> .env (idempotent)
+   make setup                # dependency check + docker network
+   ```
+4. **Pick a target service**
+   - Run everything in Docker: `make up-full`
+   - Hybrid mode for local debugging: `make hybrid-dev-api` / `make hybrid-dev-mcp`
+
+## Coding Standards
+- **Language**: Go 1.21+ across services. Use `go fmt ./...` or `make fmt` before committing.
+- **Static analysis**: run `make lint` to execute vet, golangci-lint, and other configured linters.
+- **Swagger/OpenAPI**: update specs with `make swagger` after changing HTTP handlers.
+- **Configuration**: add new env vars to `.env.template`, `config/defaults.env`, and mention them in `config/README.md`.
+- **Documentation**: update relevant guides plus `docs/index.md` when adding or moving features.
+
+## Required Test Matrix
+Run the smallest set that covers your change:
+
+| Change Type | Minimum Commands |
+|-------------|------------------|
+| Library or helper updates | `make test` |
+| API surface changes | `make test` + targeted Postman suite (for example `make test-conversations`) |
+| Cross-service or infra updates | `make test-all` |
+| Docker/Kubernetes manifests | `make up-full` (smoke) + `make health-check` |
+| Documentation-only | `make lint-docs` *(if available)* or spell/markdown checker of your choice |
+
+For MCP tooling, also run:
+```bash
+make test-mcp-integration
+```
+
+Before pushing, ensure the tree is clean:
+```bash
+go fmt ./...
+make lint
+make test
+git status -sb         # no unexpected files
+```
+
+## Commit and PR Guidelines
+- Keep commits focused; split large work into logical chunks.
+- Write descriptive messages (for example `feat(response-api): add SSE streaming`).
+- Reference the related issue in the pull request body (`Fixes #123`).
+- Include screenshots or log excerpts when they clarify behaviour.
+- For documentation-heavy PRs, mention which guides or runbooks were updated.
+
+## Documentation Expectations
+- `README.md` must stay aligned with the default Docker Compose workflow.
+- `docs/getting-started/README.md` is the canonical setup guide; keep it in sync with the Makefile targets.
+- `docs/index.md` acts as the sitemap; add or move entries there whenever you add documentation elsewhere.
+- If you introduce a new service or API, create or update:
+  - `docs/services.md`
+  - `docs/api/<service>/README.md`
+  - Per-service `services/<name>/README.md`
+
+## Testing Secrets
+Do **not** commit real keys or tokens. Place new variables in `.env.template` and document how to obtain them. For CI-only secrets, describe the expectation inside `config/secrets.env.example`.
+
+## Opening the Pull Request
+1. Push your branch: `git push origin feature/<short-description>`
+2. Create a PR against `main`
+3. Fill out the PR template, including:
+   - Motivation / context
+   - Testing evidence (commands + output summary)
+   - Docs updated checklist
+4. Respond to review feedback promptly; squash or rebase only when requested.
+
+## Code of Conduct
+Be respectful, stay constructive, and follow project maintainers' guidance. By participating you agree to uphold the community standards.
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..dbdb7d20
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,823 @@
+# ============================================================================================================
+# JAN SERVER MAKEFILE
+# ============================================================================================================
+#
+# A comprehensive build system for Jan Server - a microservices-based LLM API platform
+# with MCP (Model Context Protocol) tool integration.
+#
+# ============================================================================================================
+# QUICK START
+# ============================================================================================================
+#
+#   make quickstart     - Interactive setup and run (prompts for API keys, starts all services)
+#   make setup          - Initial project setup (dependencies, networks, .env)
+#   make cli-install    - Install jan-cli tool globally
+#   make build-all      - Build all Docker images
+#   make up-full        - Start all services (infrastructure + API + MCP)
+#   make dev-full       - Start all services with host.docker.internal support (for testing)
+#   make health-check   - Check if all services are healthy
+#   make test-all       - Run all integration tests
+#   make stop           - Stop all services (keeps containers & volumes)
+#   make down           - Stop and remove containers (keeps volumes)
+#   make down-clean     - Stop, remove containers and volumes (full cleanup)
+#
+# ============================================================================================================
+# MAKEFILE STRUCTURE
+# ============================================================================================================
+#
+# This Makefile is organized into the following sections:
+#
+#   1. SETUP & ENVIRONMENT       - Initial setup and dependency checks
+#   2. BUILD TARGETS             - Building services, code quality, Swagger documentation
+#   3. SERVICE MANAGEMENT        - Starting/stopping services (infra, API, MCP, vLLM, full stack)
+#   4. DATABASE MANAGEMENT       - DB operations, migrations, backups, restore
+#   5. MONITORING                - Observability stack (Prometheus, Grafana, Jaeger)
+#   6. TESTING                   - Integration tests with API Test
+#   7. DEVELOPER UTILITIES       - Development helpers (dev-full mode)
+#   8. HEALTH CHECKS             - Service health validation
+#
+# Documentation:
+#   docs/guides/development.md - Complete development guide
+#   README.md                  - Project overview and quick reference
+#
+# ============================================================================================================
+# VARIABLES
+# ============================================================================================================
+
+COMPOSE = docker compose
+COMPOSE_DEV_FULL = docker compose -f docker-compose.yml -f docker-compose.dev-full.yml
+MONITOR_COMPOSE = docker compose -f docker/observability.yml
+ifeq ($(OS),Windows_NT)
+API_TEST = powershell -ExecutionPolicy Bypass -File jan-cli.ps1 api-test run
+else
+API_TEST = bash jan-cli.sh api-test run
+endif
+API_TEST_AUTH_COLLECTION = tests/automation/auth-postman-scripts.json
+API_TEST_CONVERSATION_COLLECTION = tests/automation/conversations-postman-scripts.json
+API_TEST_RESPONSES_COLLECTION = tests/automation/responses-postman-scripts.json
+API_TEST_MEDIA_COLLECTION = tests/automation/media-postman-scripts.json
+API_TEST_MCP_COLLECTION = tests/automation/mcp-postman-scripts.json
+API_TEST_MEMORY_COLLECTION = tests/automation/memory-postman-scripts.json
+API_TEST_E2E_COLLECTION = tests/automation/test-all.postman.json
+
+MEDIA_SERVICE_KEY ?= changeme-media-key
+MEDIA_API_KEY ?= changeme-media-key
+
+EMBED_TEST_URL = $(if $(strip $(EMBEDDING_SERVICE_URL)),$(strip $(EMBEDDING_SERVICE_URL)),http://localhost:8091)
+EMBED_TEST_PROFILES = --profile infra --profile memory
+EMBED_TEST_SERVICES = api-db memory-tools
+ifeq ($(strip $(EMBEDDING_SERVICE_URL)),)
+EMBED_TEST_PROFILES += --profile memory-mock
+EMBED_TEST_SERVICES += bge-m3
+endif
+
+# ============================================================================================================
+# SECTION 1: SETUP & ENVIRONMENT
+# ============================================================================================================
+
+.PHONY: setup check-deps install-deps setup-and-run quickstart
+
+setup-and-run quickstart:
+	@echo "Starting interactive setup and run (includes Memory Tools prompt)..."
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 setup-and-run --with-memory-tools
+else
+	@bash jan-cli.sh setup-and-run --with-memory-tools
+endif
+
+setup:
+	@echo "Running setup via jan-cli..."
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 dev setup
+else
+	@bash jan-cli.sh dev setup
+endif
+
+check-deps:
+	@echo "Checking dependencies..."
+	@docker --version >/dev/null 2>&1 || echo "Docker not found"
+	@docker compose version >/dev/null 2>&1 || echo "Docker Compose V2 not found"
+	@go version >/dev/null 2>&1 || echo "Go not found (optional)"
+	@echo "Dependency check complete"
+
+install-deps:
+	@echo "Installing development dependencies..."
+	@go install github.com/swaggo/swag/cmd/swag@latest
+	@echo " Development dependencies installed"
+
+
+# ============================================================================================================
+# SECTION 3: BUILD TARGETS
+# ============================================================================================================
+
+.PHONY: build build-api build-mcp build-memory build-all clean-build build-llm-api build-media-api build-response-api build-memory-tools
+
+build: build-api build-mcp build-memory
+
+build-api: build-llm-api build-media-api build-response-api
+
+build-memory: build-memory-tools
+
+build-llm-api:
+	@echo "Building LLM API..."
+ifeq ($(OS),Windows_NT)
+	@cd services/llm-api && go build -o bin/llm-api.exe ./cmd/server
+else
+	@cd services/llm-api && go build -o bin/llm-api ./cmd/server
+endif
+	@echo " LLM API built: services/llm-api/bin/llm-api"
+
+build-media-api:
+	@echo "Building Media API..."
+ifeq ($(OS),Windows_NT)
+	@cd services/media-api && go build -o bin/media-api.exe ./cmd/server
+else
+	@cd services/media-api && go build -o bin/media-api ./cmd/server
+endif
+	@echo " Media API built: services/media-api/bin/media-api"
+
+build-response-api:
+	@echo "Building Response API..."
+ifeq ($(OS),Windows_NT)
+	@cd services/response-api && go build -o bin/response-api.exe ./cmd/server
+else
+	@cd services/response-api && go build -o bin/response-api ./cmd/server
+endif
+	@echo " Response API built: services/response-api/bin/response-api"
+
+build-mcp:
+	@echo "Building MCP Tools..."
+ifeq ($(OS),Windows_NT)
+	@cd services/mcp-tools && go build -o bin/mcp-tools.exe .
+else
+	@cd services/mcp-tools && go build -o bin/mcp-tools .
+endif
+	@echo " MCP Tools built: services/mcp-tools/bin/mcp-tools"
+
+build-memory-tools:
+	@echo "Building Memory Tools..."
+ifeq ($(OS),Windows_NT)
+	@cd services/memory-tools && go build -o bin/memory-tools.exe ./cmd/server
+else
+	@cd services/memory-tools && go build -o bin/memory-tools ./cmd/server
+endif
+	@echo " Memory Tools built: services/memory-tools/bin/memory-tools"
+
+build-all:
+	@echo "Building all Docker images..."
+	$(COMPOSE) --profile full build
+	@echo " All services built"
+.PHONY: config-generate config-test config-drift-check config-help
+
+config-generate:
+	@echo "Generating configuration files from Go structs..."
+	@cd cmd/jan-cli && go run . config generate
+	@echo " Configuration files generated:"
+	@echo "  - config/defaults.yaml (auto-generated)"
+	@echo "  - config/schema/*.schema.json (auto-generated)"
+
+config-drift-check:
+	@echo "Checking for configuration drift..."
+	@cd cmd/jan-cli && go run . config generate
+ifeq ($(OS),Windows_NT)
+	@git diff --exit-code config/ && echo " No configuration drift detected" || (echo " Configuration drift detected! Run 'make config-generate' to update." && exit 1)
+else
+	@git diff --exit-code config/ && echo " No configuration drift detected" || (echo " Configuration drift detected! Run 'make config-generate' to update." && exit 1)
+endif
+
+config-help:
+	@echo "Configuration Management Targets:"
+	@echo "  config-generate      Generate config files from Go structs (YAML, JSON schema)"
+	@echo "  config-drift-check  Verify generated files are in sync with code"
+	@echo ""
+	@echo "Files auto-generated by config-generate:"
+	@echo "  - config/defaults.yaml                Default configuration values"
+	@echo "  - config/schema/*.schema.json         JSON Schemas for validation"
+	@echo ""
+	@echo "Usage:"
+	@echo "  1. Update pkg/config/types.go with your configuration changes"
+	@echo "  2. Run 'make config-generate' to regenerate all files"
+	@echo "  4. Use 'make config-drift-check' in CI to prevent drift  "
+
+# --- CLI Tool ---
+
+.PHONY: cli-install cli-build cli-clean
+
+cli-build:
+	@echo "Building jan-cli..."
+	@cd cmd/jan-cli && go build -o jan-cli$(if $(filter Windows_NT,$(OS)),.exe,) .
+	@echo " jan-cli built successfully"
+
+cli-install: cli-build
+	@echo "Installing jan-cli to local bin directory..."
+ifeq ($(OS),Windows_NT)
+	@cmd/jan-cli/jan-cli.exe install
+else
+	@cmd/jan-cli/jan-cli install
+endif
+
+cli-clean:
+	@echo "Cleaning jan-cli binary..."
+	@rm -f cmd/jan-cli/jan-cli cmd/jan-cli/jan-cli.exe
+	@echo " jan-cli binary removed"
+
+# --- Swagger Documentation ---
+
+.PHONY: swagger swagger-llm-api swagger-media-api swagger-mcp-tools swagger-response-api swagger-combine swagger-install
+
+swagger:
+	@echo "Generating Swagger documentation for all services..."
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 swagger generate --combine
+else
+	@bash jan-cli.sh swagger generate --combine
+endif
+
+swagger-llm-api:
+	@echo "Generating Swagger for llm-api service..."
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 swagger generate -s llm-api
+else
+	@bash jan-cli.sh swagger generate -s llm-api
+endif
+	@echo " llm-api swagger generated at services/llm-api/docs/swagger"
+
+swagger-media-api:
+	@echo "Generating Swagger for media-api service..."
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 swagger generate -s media-api
+else
+	@bash jan-cli.sh swagger generate -s media-api
+endif
+	@echo " media-api swagger generated at services/media-api/docs/swagger"
+
+swagger-mcp-tools:
+	@echo "Generating Swagger for mcp-tools service..."
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 swagger generate -s mcp-tools
+else
+	@bash jan-cli.sh swagger generate -s mcp-tools
+endif
+	@echo " mcp-tools swagger generated at services/mcp-tools/docs/swagger"
+
+swagger-response-api:
+	@echo "Generating Swagger for response-api service..."
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 swagger generate -s response-api
+else
+	@bash jan-cli.sh swagger generate -s response-api
+endif
+	@echo " response-api swagger generated at services/response-api/docs/swagger"
+
+swagger-combine:
+	@echo \"Merging LLM API and MCP Tools swagger specs...\"
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 swagger combine
+else
+	@bash jan-cli.sh swagger combine
+endif
+	@echo \" Combined swagger created for unified API documentation\"
+
+swagger-install:
+	@echo "Installing swagger tools..."
+	@go install github.com/swaggo/swag/cmd/swag@latest
+	@echo " swag installed successfully"
+
+# --- Code Quality ---
+
+.PHONY: fmt lint vet
+
+fmt:
+	@echo "Formatting Go code..."
+	@gofmt -w $$(go list -f '{{.Dir}}' ./...)
+	@echo " Code formatted"
+
+lint:
+	@echo "Running linter..."
+	@go vet ./...
+	@echo " Linting complete"
+
+vet:
+	@echo "Running go vet..."
+	@go vet ./...
+	@echo " Vet complete"
+
+# ============================================================================================================
+# SECTION 4: SERVICE MANAGEMENT
+# ============================================================================================================
+
+# --- Infrastructure Services ---
+
+.PHONY: up-infra down-infra restart-infra logs-infra
+
+up-infra:
+	@echo "Starting infrastructure services..."
+	$(COMPOSE) --profile infra up -d
+	@echo " Infrastructure services started"
+	@echo ""
+	@echo "Services:"
+	@echo "  - PostgreSQL: localhost:5432"
+	@echo "  - Keycloak:   http://localhost:8085"
+	@echo "  - Kong:       http://localhost:8000"
+
+down-infra:
+	$(COMPOSE) --profile infra down
+
+restart-infra:
+	$(COMPOSE) --profile infra restart
+
+logs-infra:
+	$(COMPOSE) --profile infra logs -f
+
+# --- LLM API Service ---
+
+.PHONY: up-api down-api restart-api logs-api logs-media-api
+
+up-api:
+	@echo "Starting LLM API..."
+	$(COMPOSE) --profile api up -d
+	@echo " API services started:"
+	@echo "   - LLM API:   http://localhost:8080"
+	@echo "   - Media API: http://localhost:8285"
+
+down-api:
+	$(COMPOSE) --profile api down
+
+restart-api:
+	$(COMPOSE) --profile api restart
+
+logs-api:
+	$(COMPOSE) --profile api logs -f llm-api
+
+logs-media-api:
+	$(COMPOSE) --profile api logs -f media-api
+
+# --- MCP Services ---
+
+.PHONY: up-mcp down-mcp restart-mcp logs-mcp
+
+up-mcp:
+	@echo "Starting MCP services..."
+	$(COMPOSE) --profile mcp up -d
+	@echo " MCP services started"
+	@echo ""
+	@echo "Services:"
+	@echo "  - MCP Tools:      http://localhost:8091"
+	@echo "  - SearXNG:        http://localhost:8086"
+	@echo "  - Vector Store:   http://localhost:3015"
+	@echo "  - SandboxFusion:  http://localhost:3010"
+	@echo ""
+	@echo "Test MCP tools:"
+	@echo "  curl -X POST http://localhost:8091/v1/mcp -H 'Content-Type: application/json' -d '{\"jsonrpc\":\"2.0\",\"method\":\"tools/list\",\"id\":1}'"
+
+down-mcp:
+	$(COMPOSE) --profile mcp down
+
+restart-mcp:
+	$(COMPOSE) --profile mcp restart
+
+logs-mcp:
+	$(COMPOSE) --profile mcp logs -f
+
+# --- vLLM Inference Services ---
+
+.PHONY: up-vllm-gpu up-vllm-cpu down-vllm logs-vllm
+
+up-vllm-gpu:
+	@echo "Starting vLLM GPU inference..."
+	$(COMPOSE) --profile gpu up -d
+	@echo " vLLM GPU started at http://localhost:8101"
+	@echo ""
+	@echo "Test inference:"
+	@echo "  curl http://localhost:8101/v1/models"
+
+up-vllm-cpu:
+	@echo "Starting vLLM CPU inference..."
+	$(COMPOSE) --profile cpu up -d
+	@echo " vLLM CPU started at http://localhost:8101"
+	@echo ""
+	@echo "Test inference:"
+	@echo "  curl http://localhost:8101/v1/models"
+
+down-vllm:
+	@echo "Stopping vLLM services..."
+	$(COMPOSE) --profile gpu --profile cpu down
+
+logs-vllm:
+	$(COMPOSE) --profile gpu --profile cpu logs -f
+
+# --- Full Stack ---
+
+.PHONY: up-full down-full restart-full logs stop down down-clean dev-full dev-full-down dev-full-stop
+
+up-full: ## Start full stack (all services in Docker)
+	@echo "Starting services (based on COMPOSE_PROFILES in .env)..."
+	$(COMPOSE) up -d
+	@echo " Services started"
+	@echo ""
+	@echo "Infrastructure:"
+	@echo "  - PostgreSQL: localhost:5432"
+	@echo "  - Keycloak:   http://localhost:8085 (admin/admin)"
+	@echo "  - Kong:       http://localhost:8000"
+	@echo ""
+	@echo "Services:"
+	@echo "  - LLM API:        http://localhost:8080"
+	@echo "  - MCP Tools:      http://localhost:8091"
+	@echo "  - Vector Store:   http://localhost:3015"
+	@echo "  - vLLM (if enabled): http://localhost:8101"
+	@echo ""
+	@echo "Note: vLLM only starts if using local GPU provider (COMPOSE_PROFILES=full)"
+	@echo "To start monitoring stack: make monitor-up"
+
+down-full:
+	$(COMPOSE) down
+
+restart-full:
+	$(COMPOSE) restart
+
+stop:
+	@echo "Stopping all services (containers will be preserved)..."
+	$(COMPOSE) stop
+	@echo " All services stopped (containers preserved)"
+	@echo ""
+	@echo "To restart: make up-full"
+	@echo "To remove containers: make down"
+
+down:
+	@echo "Stopping and removing all containers (volumes will be preserved)..."
+	$(COMPOSE) down
+	@echo " All containers stopped and removed (volumes preserved)"
+	@echo ""
+	@echo "To restart: make up-full"
+	@echo "To clean volumes: make down-clean"
+
+down-clean:
+	@echo "Stopping and removing all containers and volumes..."
+	$(COMPOSE) down -v
+	@echo " All containers and volumes removed (full cleanup)"
+	@echo ""
+	@echo "To restart: make up-full"
+
+logs:
+	$(COMPOSE) logs -f
+
+# --- Individual Service Control ---
+
+.PHONY: restart-kong restart-keycloak restart-postgres
+
+restart-kong:
+	@echo "Restarting Kong..."
+	$(COMPOSE) restart kong
+ifeq ($(OS),Windows_NT)
+	@powershell -Command "Start-Sleep -Seconds 3"
+else
+	@sleep 3
+endif
+	@echo " Kong restarted"
+
+restart-keycloak:
+	$(COMPOSE) restart keycloak
+
+restart-postgres:
+	$(COMPOSE) restart api-db
+
+# ============================================================================================================
+# SECTION 5: DATABASE MANAGEMENT
+# ============================================================================================================
+
+.PHONY: db-reset db-migrate db-console db-backup db-restore db-dump
+
+db-reset:
+	@echo "  WARNING: This will delete all database data!"
+	@echo "Stopping and removing API database..."
+	$(COMPOSE) stop api-db
+	$(COMPOSE) rm -f api-db
+	@docker volume rm jan-server_api-db-data 2>nul || echo Volume removed or didn't exist
+	@echo " Database reset complete. Run 'make up-api' to restart."
+
+db-migrate:
+	@echo "Running database migrations..."
+	$(COMPOSE) exec llm-api /app/llm-api migrate
+	@echo " Migrations complete"
+
+db-console:
+	@echo "Opening database console..."
+	$(COMPOSE) exec api-db psql -U jan_user -d jan_llm_api
+
+db-backup:
+	@echo "Backing up database..."
+	@mkdir -p backups
+	@$(COMPOSE) exec -T api-db pg_dump -U jan_user jan_llm_api > backups/db_backup_$$(date +%Y%m%d_%H%M%S).sql
+	@echo " Database backed up to backups/"
+
+db-restore:
+	@if [ -z "$(FILE)" ]; then \
+		echo " FILE variable required. Usage: make db-restore FILE=backups/db_backup.sql"; \
+		exit 1; \
+	fi
+	@echo "Restoring database from $(FILE)..."
+	@cat $(FILE) | $(COMPOSE) exec -T api-db psql -U jan_user -d jan_llm_api
+	@echo " Database restored"
+
+db-dump:
+	@echo "Dumping database schema..."
+	@$(COMPOSE) exec api-db pg_dump -U jan_user -d jan_llm_api --schema-only
+
+# ============================================================================================================
+# SECTION 6: MONITORING
+# ============================================================================================================
+
+.PHONY: monitor-up monitor-down monitor-logs monitor-clean
+
+monitor-up:
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 monitor up
+else
+	@bash jan-cli.sh monitor up
+endif
+
+monitor-down:
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 monitor down
+else
+	@bash jan-cli.sh monitor down
+endif
+
+monitor-logs:
+	$(MONITOR_COMPOSE) logs -f
+
+monitor-clean:
+ifeq ($(OS),Windows_NT)
+	@powershell -ExecutionPolicy Bypass -File jan-cli.ps1 monitor reset
+else
+	@bash jan-cli.sh monitor reset
+endif
+
+# --- Advanced Monitoring Targets (from monitoring improvement plan) ---
+
+
+
+# --- Integration Tests (API Test) ---
+
+.PHONY: test-all test-auth test-conversations test-response test-media test-mcp-integration test-memory test-e2e api-test-debug
+
+test-all: test-auth test-conversations test-response test-media test-mcp-integration test-memory test-e2e
+	@echo ""
+	@echo " All integration tests passed!"
+
+test-auth:
+	@echo "Running authentication tests..."
+	@$(API_TEST) $(API_TEST_AUTH_COLLECTION) \
+		--env-var "kong_url=http://localhost:8000" \
+		--env-var "keycloak_base_url=http://localhost:8085" \
+		--env-var "keycloak_admin=admin" \
+		--env-var "keycloak_admin_password=admin" \
+		--env-var "realm=jan" \
+		--env-var "client_id_public=jan-client" \
+		--verbose \
+		--reporters cli
+	@echo " Authentication tests passed"
+
+test-conversations:
+	@echo "Running conversation API tests..."
+	@$(API_TEST) $(API_TEST_CONVERSATION_COLLECTION) \
+		--env-var "kong_url=http://localhost:8000" \
+		--env-var "keycloak_base_url=http://localhost:8085" \
+		--env-var "keycloak_admin=admin" \
+		--env-var "keycloak_admin_password=admin" \
+		--env-var "realm=jan" \
+		--env-var "client_id_public=jan-client" \
+		--verbose \
+		--reporters cli
+	@echo " Conversation API tests passed"
+
+test-response:
+	@echo "Running response API tests..."
+	@$(API_TEST) $(API_TEST_RESPONSES_COLLECTION) \
+		--env-var "response_api_url=http://localhost:8000/responses" \
+		--env-var "mcp_tools_url=http://localhost:8000/mcp" \
+		--verbose \
+		--reporters cli
+	@echo " Response API tests passed"
+
+test-media:
+	@echo "Running media API tests..."
+	@$(API_TEST) $(API_TEST_MEDIA_COLLECTION) \
+		--env-var "media_api_url=http://localhost:8000/media" \
+		--env-var "media_service_key=$(MEDIA_SERVICE_KEY)" \
+		--verbose \
+		--reporters cli
+	@echo " Media API tests passed"
+
+test-mcp-integration:
+	@echo "Running MCP integration tests..."
+	@$(API_TEST) $(API_TEST_MCP_COLLECTION) \
+		--env-var "kong_url=http://localhost:8000" \
+		--env-var "mcp_tools_url=http://localhost:8000/mcp" \
+		--verbose \
+		--reporters cli
+	@echo " MCP integration tests passed"
+
+test-memory:
+	@echo "Running memory-tools integration tests..."
+	@$(API_TEST) $(API_TEST_MEMORY_COLLECTION) \
+		--env-var "base_url=http://localhost:8090" \
+		--env-var "embedding_url=http://localhost:8091" \
+		--env-var "user_id=user_test_001" \
+		--env-var "project_id=proj_test_001" \
+		--env-var "conversation_id=conv_test_001" \
+		--verbose \
+		--reporters cli
+	@echo " Memory-tools integration tests passed"
+
+test-e2e:
+	@echo "Running gateway end-to-end tests..."
+	@$(API_TEST) $(API_TEST_E2E_COLLECTION) \
+		--env-var "gateway_url=http://localhost:8000" \
+		--env-var "media_api_url=http://localhost:8000/media" \
+		--env-var "response_api_url=http://localhost:8000/responses" \
+		--env-var "mcp_tools_url=http://localhost:8000/mcp" \
+		--env-var "media_service_key=$(MEDIA_SERVICE_KEY)" \
+		--verbose \
+		--reporters cli
+	@echo "o. Gateway end-to-end tests passed"
+
+api-test-debug:
+	@echo "Running authentication tests with debug output..."
+ifeq ($(OS),Windows_NT)
+	@$(API_TEST) $(API_TEST_AUTH_COLLECTION) \
+		--env-var "kong_url=http://localhost:8000" \
+		--env-var "keycloak_base_url=http://localhost:8085" \
+		--env-var "keycloak_admin=admin" \
+		--env-var "keycloak_admin_password=admin" \
+		--env-var "realm=jan" \
+		--env-var "client_id_public=jan-client" \
+		--verbose
+else
+	@$(API_TEST) $(API_TEST_AUTH_COLLECTION) \
+		--env-var "kong_url=http://localhost:8000" \
+		--env-var "keycloak_base_url=http://localhost:8085" \
+		--env-var "keycloak_admin=admin" \
+		--env-var "keycloak_admin_password=admin" \
+		--env-var "realm=jan" \
+		--env-var "client_id_public=jan-client" \
+		--verbose
+endif
+
+
+# ============================================================================================================
+# SECTION 8: DEVELOPER UTILITIES
+# ============================================================================================================
+
+# --- Development Full Stack (with host.docker.internal support) ---
+
+.PHONY: dev-full dev-full-stop dev-full-down
+
+dev-full: ## Start development full stack with host.docker.internal support
+	@echo "Starting development full stack with host.docker.internal support..."
+	@echo ""
+	@echo "This mode allows you to:"
+	@echo "  1. Stop any Docker service: docker compose stop <service>"
+	@echo "  2. Run it manually on host for debugging"
+	@echo "  3. Kong will automatically route to host.docker.internal"
+	@echo ""
+	$(COMPOSE_DEV_FULL) --profile full up -d
+	@echo ""
+	@echo " Development full stack started!"
+	@echo ""
+	@echo "Infrastructure:"
+	@echo "  - PostgreSQL: localhost:5432"
+	@echo "  - Keycloak:   http://localhost:8085 (admin/admin)"
+	@echo "  - Kong:       http://localhost:8000 (with upstreams to host)"
+	@echo ""
+	@echo "Services (running in Docker):"
+	@echo "  - LLM API:        http://localhost:8080"
+	@echo "  - Media API:      http://localhost:8285"
+	@echo "  - Response API:   http://localhost:8082"
+	@echo "  - MCP Tools:      http://localhost:8091"
+	@echo "  - SearXNG:        http://localhost:8086"
+	@echo "  - Vector Store:   http://localhost:3015"
+	@echo "  - SandboxFusion:  http://localhost:3010"
+	@echo ""
+	@echo "To run a service manually on host:"
+	@echo "  1. Stop Docker service:"
+	@echo "     docker compose stop llm-api"
+	@echo ""
+	@echo "  2. Run on host:"
+ifeq ($(OS),Windows_NT)
+	@echo "     jan-cli dev run llm-api"
+else
+	@echo "     jan-cli dev run llm-api"
+endif
+	@echo ""
+	@echo "  3. Kong will automatically route requests to your host service"
+	@echo ""
+	@echo "Check service routing: curl http://localhost:8000/healthz"
+	@echo ""
+	@echo "Documentation: docs/guides/dev-full-mode.md"
+
+dev-full-stop:
+	@echo "Stopping dev-full services..."
+	$(COMPOSE_DEV_FULL) --profile full stop
+	@echo " Dev-full services stopped"
+
+dev-full-down:
+	@echo "Stopping and removing dev-full containers..."
+	$(COMPOSE_DEV_FULL) --profile full down
+	@echo " Dev-full containers removed"
+
+# ============================================================================================================
+# SECTION 9: HEALTH CHECKS
+# ============================================================================================================
+
+.PHONY: health-check health-api health-mcp health-infra
+
+health-check:
+ifeq ($(OS),Windows_NT)
+	@echo ============================================
+	@echo Checking All Services Health Status
+	@echo ============================================
+	@echo [Infrastructure Services]
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:8085 -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host '  Keycloak:   healthy' } catch { Write-Host '  Keycloak:   unhealthy' }"
+	@powershell -Command "try { $$response = Invoke-WebRequest -Uri http://localhost:8000 -UseBasicParsing -TimeoutSec 2 -ErrorAction SilentlyContinue; if ($$response.StatusCode -ge 200 -and $$response.StatusCode -lt 500) { Write-Host '  Kong:       healthy' } else { Write-Host '  Kong:       unhealthy' } } catch { try { if ($$PSItem.Exception.Response.StatusCode.Value__ -eq 404) { Write-Host '  Kong:       healthy' } else { Write-Host '  Kong:       unhealthy' } } catch { Write-Host '  Kong:       unhealthy' } }"
+	@powershell -Command "try { $$null = docker compose exec -T api-db pg_isready -U jan_user 2>&1 | Out-Null; if ($$LASTEXITCODE -eq 0) { Write-Host '  PostgreSQL: healthy' } else { Write-Host '  PostgreSQL: unhealthy' } } catch { Write-Host '  PostgreSQL: unhealthy' }"
+	@echo.
+	@echo [API Services]
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:8080/healthz -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host '  LLM API:    healthy' } catch { Write-Host '  LLM API:    unhealthy' }"
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:8285/healthz -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host '  Media API:  healthy' } catch { try { if ($$PSItem.Exception.Response.StatusCode.Value__ -eq 401) { Write-Host '  Media API:  healthy' } else { Write-Host '  Media API:  unhealthy' } } catch { Write-Host '  Media API:  unhealthy' } }"
+	@echo.
+	@echo [MCP Services]
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:8091/healthz -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host '  MCP Tools:      healthy' } catch { Write-Host '  MCP Tools:      unhealthy' }"
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:3015/healthz -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host '  Vector Store:   healthy' } catch { Write-Host '  Vector Store:   unhealthy' }"
+	@echo.
+	@echo [Optional Services - may show unhealthy if disabled]
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:8086 -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host '  SearXNG:        healthy' } catch { Write-Host '  SearXNG:        not running' }"
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:3010 -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host '  SandboxFusion:  healthy' } catch { Write-Host '  SandboxFusion:  not running' }"
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:8101/v1/models -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host '  vLLM:           healthy' } catch { Write-Host '  vLLM:           not running' }"
+	@echo ============================================
+else
+	@echo "============================================"
+	@echo "Checking All Services Health Status"
+	@echo "============================================"
+	@echo ""
+	@echo "[Infrastructure Services]"
+	@curl -sf http://localhost:8085 >/dev/null && echo "  Keycloak:   healthy" || echo "  Keycloak:   unhealthy"
+	@curl -f --max-time 2 http://localhost:8000 >/dev/null 2>&1 || (curl --max-time 2 http://localhost:8000 2>&1 | grep -q "no Route matched" && echo "  Kong:       healthy" || echo "  Kong:       unhealthy")
+	@$(COMPOSE) exec -T api-db pg_isready -U jan_user >/dev/null 2>&1 && echo "  PostgreSQL: healthy" || echo "  PostgreSQL: unhealthy"
+	@echo ""
+	@echo "[API Services]"
+	@curl -sf http://localhost:8080/healthz >/dev/null && echo "  LLM API:    healthy" || echo "  LLM API:    unhealthy"
+	@curl -s http://localhost:8285/healthz >/dev/null && echo "  Media API:  healthy" || (curl -s -w "%{http_code}" -o /dev/null http://localhost:8285/healthz | grep -q "401" && echo "  Media API:  healthy" || echo "  Media API:  unhealthy")
+	@echo ""
+	@echo "[MCP Services]"
+	@curl -sf http://localhost:8091/healthz >/dev/null && echo "  MCP Tools:      healthy" || echo "  MCP Tools:      unhealthy"
+	@curl -sf http://localhost:3015/healthz >/dev/null && echo "  Vector Store:   healthy" || echo "  Vector Store:   unhealthy"
+	@echo ""
+	@echo "[Optional Services - may show 'not running' if disabled]"
+	@curl -sf http://localhost:8086 >/dev/null && echo "  SearXNG:        healthy" || echo "  SearXNG:        not running"
+	@curl -sf http://localhost:3010 >/dev/null && echo "  SandboxFusion:  healthy" || echo "  SandboxFusion:  not running"
+	@curl -sf http://localhost:8101/v1/models >/dev/null && echo "  vLLM:           healthy" || echo "  vLLM:           not running"
+	@echo ""
+	@echo "============================================"
+endif
+
+health-infra:
+ifeq ($(OS),Windows_NT)
+	@echo Checking infrastructure services...
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:8085 -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host 'OK Keycloak: healthy' } catch { Write-Host 'ERROR Keycloak: unhealthy' }"
+	@powershell -Command "try { $$response = Invoke-WebRequest -Uri http://localhost:8000 -UseBasicParsing -TimeoutSec 2 -ErrorAction SilentlyContinue; if ($$response.StatusCode -ge 200 -and $$response.StatusCode -lt 500) { Write-Host 'OK Kong: healthy' } else { Write-Host 'ERROR Kong: unhealthy' } } catch { try { if ($$PSItem.Exception.Response.StatusCode.Value__ -eq 404) { Write-Host 'OK Kong: healthy' } else { Write-Host 'ERROR Kong: unhealthy' } } catch { Write-Host 'ERROR Kong: unhealthy' } }"
+	@powershell -Command "try { $$null = docker compose exec -T api-db pg_isready -U jan_user 2>&1 | Out-Null; if ($$LASTEXITCODE -eq 0) { Write-Host 'OK PostgreSQL: healthy' } else { Write-Host 'ERROR PostgreSQL: unhealthy' } } catch { Write-Host 'ERROR PostgreSQL: unhealthy' }"
+else
+	@curl -sf http://localhost:8085 >/dev/null && echo " Keycloak: healthy" || echo " Keycloak: unhealthy"
+	@curl -f --max-time 2 http://localhost:8000 >/dev/null 2>&1 || (curl --max-time 2 http://localhost:8000 2>&1 | grep -q "no Route matched" && echo " Kong: healthy" || echo " Kong: unhealthy")
+	@$(COMPOSE) exec -T api-db pg_isready -U jan_user >/dev/null 2>&1 && echo " PostgreSQL: healthy" || echo " PostgreSQL: unhealthy"
+endif
+
+health-api:
+ifeq ($(OS),Windows_NT)
+	@powershell -Command "try { Invoke-WebRequest -Uri http://localhost:8080/healthz -UseBasicParsing | Select-Object -ExpandProperty Content | ConvertFrom-Json | ConvertTo-Json } catch { Write-Host 'ERROR LLM API not responding' }"
+	@powershell -Command "try { Invoke-WebRequest -Uri http://localhost:8285/healthz -UseBasicParsing | Select-Object -ExpandProperty Content | ConvertFrom-Json | ConvertTo-Json } catch { Write-Host 'ERROR Media API not responding' }"
+else
+	@curl -sf http://localhost:8080/healthz | jq || echo "? LLM API not responding"
+	@curl -sf http://localhost:8285/healthz | jq || echo "? Media API not responding"
+endif
+
+health-mcp:
+ifeq ($(OS),Windows_NT)
+	@echo Checking MCP services...
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:8091/healthz -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host 'OK MCP Tools: healthy' } catch { Write-Host 'ERROR MCP Tools: unhealthy' }"
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:8086 -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host 'OK SearXNG: healthy' } catch { Write-Host 'ERROR SearXNG: unhealthy' }"
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:3015/healthz -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host 'OK Vector Store: healthy' } catch { Write-Host 'ERROR Vector Store: unhealthy' }"
+	@powershell -Command "try { $$null = Invoke-WebRequest -Uri http://localhost:3010 -UseBasicParsing -TimeoutSec 2 -ErrorAction Stop; Write-Host 'OK SandboxFusion: healthy' } catch { Write-Host 'ERROR SandboxFusion: unhealthy' }"
+else
+	@curl -sf http://localhost:8091/healthz >/dev/null && echo " MCP Tools: healthy" || echo " MCP Tools: unhealthy"
+	@curl -sf http://localhost:8086 >/dev/null && echo " SearXNG: healthy" || echo " SearXNG: unhealthy"
+	@curl -sf http://localhost:3015/healthz >/dev/null && echo " Vector Store: healthy" || echo " Vector Store: unhealthy"
+	@curl -sf http://localhost:3010 >/dev/null && echo " SandboxFusion: healthy" || echo " SandboxFusion: unhealthy"
+endif
+
+# ============================================================================================================
+# END OF MAKEFILE
+# ============================================================================================================
+
+
diff --git a/README.md b/README.md
index c1ec20a5..91fc80fd 100644
--- a/README.md
+++ b/README.md
@@ -1,345 +1,532 @@
 # Jan Server
 
-A comprehensive self-hosted AI server platform that provides OpenAI-compatible APIs, multi-tenant organization management, and AI model inference capabilities. Jan Server enables organizations to deploy their own private AI infrastructure with full control over data, models, and access.
+> A microservices LLM API platform with MCP tool integration
 
-## 🚀 Overview
+[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
+[![Go](https://img.shields.io/badge/Go-1.21+-00ADD8?logo=go)](https://go.dev/)
+[![Docker](https://img.shields.io/badge/Docker-required-2496ED?logo=docker)](https://www.docker.com/)
 
-Jan Server is a Kubernetes-native platform consisting of multiple microservices that work together to provide a complete AI infrastructure solution. It offers:
+## Prerequisites
 
-- **OpenAI-Compatible API**: Full compatibility with OpenAI's chat completion API
-- **Multi-Tenant Architecture**: Organization and project-based access control
-- **AI Model Inference**: Scalable model serving with health monitoring
-- **Database Management**: PostgreSQL with read/write replicas
-- **Authentication & Authorization**: JWT + Google OAuth2 integration
-- **API Key Management**: Secure API key generation and management
-- **Model Context Protocol (MCP)**: Support for external tools and resources
-- **Web Search Integration**: Serper API integration for web search capabilities
-- **Monitoring & Profiling**: Built-in performance monitoring and health checks
+Before running Jan Server locally make sure you have:
 
-## 🏗️ System Architecture
+- **Docker Desktop** (Windows/macOS) or **Docker Engine + docker compose V2** (Linux)
+- **Make** (installed by default on Linux/macOS, [install on Windows](https://gnuwin32.sourceforge.net/packages/make.htm))
+- **Git** for cloning the repository
+- **8 GB RAM minimum** (12 GB recommended for all services)
+- Optional: **NVIDIA GPU + recent drivers** if you plan to run local vLLM inference
 
-![System Architecture Diagram](docs/Architect.png)
+## Quick Start
 
+```bash
+# Clone and enter the repo
+git clone https://github.com/janhq/jan-server.git
+cd jan-server
 
-## 📦 Services
+# Interactive setup (runs jan-cli wizard and docker compose)
+make quickstart
+```
 
-### Jan API Gateway
-The core API service that provides OpenAI-compatible endpoints and manages all client interactions.
+The `quickstart` target wraps `jan-cli` and guides you through:
+- Selecting the LLM provider (local vLLM vs remote OpenAI-compatible endpoint)
+- Choosing the MCP search provider (Serper, SearXNG, or disabled)
+- Enabling or disabling the Media API
 
-**Key Features:**
-- OpenAI-compatible chat completion API with streaming support
-- Multi-tenant organization and project management
-- JWT-based authentication with Google OAuth2 integration
-- API key management at organization and project levels
-- Model Context Protocol (MCP) support for external tools
-- Web search integration via Serper API
-- Comprehensive monitoring and profiling capabilities
-- Database transaction management with automatic rollback
+Need to rerun the wizard? Execute `make quickstart` again and accept the prompt to update your `.env`.
 
-**Technology Stack:**
-- Go 1.24.6 with Gin web framework
-- PostgreSQL with GORM and read/write replicas
-- JWT authentication and Google OAuth2
-- Swagger/OpenAPI documentation
-- Built-in pprof profiling with Grafana Pyroscope integration
+Prefer a scripted setup? Run:
 
-### PostgreSQL Database
-The persistent data storage layer with enterprise-grade features.
+```bash
+make setup   # Generates/updates .env via jan-cli
+make up-full # Starts every service defined in docker-compose.yml
+```
 
-**Key Features:**
-- Read/write replica support for high availability
-- Automatic schema migrations with Atlas
-- Connection pooling and optimization
-- Transaction management with rollback support
+**More detail**: [Quickstart Documentation](docs/quickstart.md)
+
+**Services running after `make up-full`:**
+- **API Gateway**: http://localhost:8000 (Kong)
+- **LLM API**: http://localhost:8080 (OpenAI-compatible)
+- **Response API**: http://localhost:8082 (Multi-step orchestration)
+- **Media API**: http://localhost:8285 (Media management)
+- **MCP Tools**: http://localhost:8091 (Tool integration)
+- **API Documentation**: http://localhost:8000/v1/swagger/
+- **Keycloak Console**: http://localhost:8085 (admin/admin)
+
+> Keycloak now runs directly from the official `quay.io/keycloak/keycloak:24.0.5` image with our realm/import scripts bind-mounted at runtime - no bundled Keycloak source tree is required.
+
+**Full setup guide**: [Getting Started](docs/getting-started/README.md)
+
+## What is Jan Server?
+
+Jan Server is an enterprise-grade LLM API platform that provides:
+- **OpenAI-compatible API** for chat completions and conversations
+- **Multi-step tool orchestration** with Response API for complex workflows
+- **Media management** with S3 integration and `jan_*` ID resolution
+- **MCP (Model Context Protocol)** tools for web search, scraping, and code execution
+- **OAuth/OIDC authentication** via Keycloak with guest access
+- **Full observability** with OpenTelemetry, Prometheus, Jaeger, and Grafana
+- **Flexible deployment** with Docker Compose profiles and Kubernetes support
+
+## Features
+
+- **OpenAI-compatible chat completions API** with streaming support
+- **Response API** for multi-step tool orchestration (max depth: 8, timeout: 45s)
+- **Media API** with S3 storage, jan_* ID system, and presigned URLs
+- **MCP tools** (google_search, web scraping, code execution via SandboxFusion)
+- **Conversation and message management** with PostgreSQL persistence
+- **Guest and user authentication** via Keycloak OIDC enforced by Kong gateway (JWT + custom API key plugin)
+- **API gateway routing** via Kong v3.5
+- **Distributed tracing** with Jaeger and OpenTelemetry
+- **Metrics and dashboards** with Prometheus and Grafana
+- **Development mode** with host.docker.internal support for flexible debugging
+- **Comprehensive testing suite** with 6 jan-cli api-test collections
+- **Service template system** for rapid microservice creation
+
+## Documentation
+
+Primary entry points:
+- [docs/README.md](docs/README.md) - Documentation hub overview
+- [docs/index.md](docs/index.md) - Navigation map grouped by audience
+- [docs/architecture/services.md](docs/architecture/services.md) - Service responsibilities and ports
+- [docs/api/README.md](docs/api/README.md) - API reference hub
+- [docs/getting-started/README.md](docs/getting-started/README.md) - Five minute setup
+- [docs/quickstart.md](docs/quickstart.md) - Interactive setup walkthrough and commands
+
+Governance and quality:
+- [DOCUMENTATION_QUALITY_REPORT.md](DOCUMENTATION_QUALITY_REPORT.md) - Recent audit summary and metrics
+- [CHANGELOG.md](CHANGELOG.md) - Release history and notable changes
+- [docs-improve.todo](docs-improve.todo) - Active documentation improvement tracker
+- [CONTRIBUTING.md](CONTRIBUTING.md) - Development workflow expectations
+- [docs/architecture/security.md](docs/architecture/security.md) - Security posture and hardening guidance
+
+## Project Structure
+
+```text
+jan-server/
+|-- services/              # Go microservices
+|   |-- llm-api/
+|   |-- response-api/
+|   |-- media-api/
+|   |-- mcp-tools/
+|   |-- template-api/
+|-- docs/                  # Documentation hub
+|-- docker/                # Compose profiles (infra, api, mcp, inference)
+|-- monitoring/            # Grafana, Prometheus, OTEL configs
+|-- k8s/                   # Helm chart + setup guide
+|-- config/                # Environment templates and helpers
+|-- kong/                  # Gateway declarative config
+|-- keycloak/              # Realm + theme customisation
+|-- scripts/               # Utility scripts (new service template, etc.)
+|-- Makefile               # Build, test, deploy targets
+```
 
-## 🚀 Quick Start
+Key directories:
+- `services/` - source for each microservice plus local docs.
+- `docs/` - user, operator, and developer documentation (see [docs/README.md](docs/README.md)).
+- `docker/` - compose files included via `docker-compose.yml`.
+- `monitoring/` - observability stack definitions (Grafana dashboards live here).
+- `k8s/` - Helm chart (`k8s/jan-server`) and cluster setup notes.
+- `config/` - `.env` templates and environment overlays.
+- `kong/` / `keycloak/` - edge and auth configuration.
+- `scripts/` - automation (service scaffolding, utility scripts).
 
-### Prerequisites
+### Microservices Overview
 
-Before setting up Jan Server, ensure you have the following components installed:
+| Service | Purpose | Port(s) | Source | Docs |
+|---------|---------|---------|--------|------|
+| LLM API | OpenAI-compatible chat, conversations, models | 8080 (direct), 8000 via Kong | `services/llm-api` | `docs/api/llm-api/README.md` |
+| Response API | Multi-step orchestration using MCP tools | 8082 | `services/response-api` | `docs/api/response-api/README.md` |
+| Media API | jan_* IDs, S3 ingest, media resolution | 8285 | `services/media-api` | `docs/api/media-api/README.md` |
+| MCP Tools | Model Context Protocol tools (search, scrape, file search, python) | 8091 | `services/mcp-tools` | `docs/api/mcp-tools/README.md` |
 
-#### Required Components
+See [docs/architecture/services.md](docs/architecture/services.md) for dependency graphs and integration notes.
 
-> **⚠️ Important**: Windows and macOS users can only run mock servers for development. Real LLM model inference with vLLM is only supported on Linux systems with NVIDIA GPUs.
+## Service Template
 
-1. **Docker Desktop**
-   - **Windows**: Download from [Docker Desktop for Windows](https://docs.docker.com/desktop/install/windows-install/)
-   - **macOS**: Download from [Docker Desktop for Mac](https://docs.docker.com/desktop/install/mac-install/)
-   - **Linux**: Follow [Docker Engine installation guide](https://docs.docker.com/engine/install/)
+Create new microservices quickly with the template system:
 
-2. **Minikube**
-   - **Windows**: `choco install minikube` or download from [minikube releases](https://github.com/kubernetes/minikube/releases)
-   - **macOS**: `brew install minikube` or download from [minikube releases](https://github.com/kubernetes/minikube/releases)
-   - **Linux**: `curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64 && sudo install minikube-linux-amd64 /usr/local/bin/minikube`
+```bash
+# Generate new service from template
+./scripts/new-service-from-template.ps1 -Name my-new-service
+
+# Template includes:
+# - Go service skeleton with Gin HTTP server
+# - Configuration management (Viper)
+# - Structured logging (Zerolog)
+# - OpenTelemetry tracing support
+# - PostgreSQL with GORM
+# - Dependency injection with Wire
+# - Docker and Makefile setup
+# - Health check endpoint
+```
 
-3. **Helm**
-   - **Windows**: `choco install kubernetes-helm` or download from [Helm releases](https://github.com/helm/helm/releases)
-   - **macOS**: `brew install helm` or download from [Helm releases](https://github.com/helm/helm/releases)
-   - **Linux**: `curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash`
+**Documentation:**
+- Template guide: `docs/guides/services-template.md`
+- Template README: `docs/guides/services-template.md`
 
-4. **kubectl**
-   - **Windows**: `choco install kubernetes-cli` or download from [kubectl releases](https://github.com/kubernetes/kubectl/releases)
-   - **macOS**: `brew install kubectl` or download from [kubectl releases](https://github.com/kubernetes/kubectl/releases)
-   - **Linux**: `curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" && sudo install kubectl /usr/local/bin/kubectl`
+## Development
 
-#### Optional: NVIDIA GPU Support (for Real LLM Models) 
-If you plan to run real LLM models (not mock servers) and have an NVIDIA GPU:
+### Quick Commands
 
-1. **Install NVIDIA Container Toolkit**: Follow the [official NVIDIA Container Toolkit installation guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+```bash
+# Start services
+make up-full              # Full stack (all 4 APIs + infrastructure)
+make up-gpu               # With GPU inference (vLLM)
+make up-cpu               # CPU-only inference
+make up                   # Infrastructure only (DB, Keycloak, Redis)
+
+# Build services
+make build-llm-api        # Build LLM API
+make build-response-api   # Build Response API
+make build-media-api      # Build Media API
+make build-mcp            # Build MCP Tools
+
+# Development
+make test-all             # Run all test suites
+make swag                 # Generate API docs
+
+# Testing
+make test-auth            # Authentication tests
+make test-conversations   # Conversation tests
+make test-response        # Response API tests
+make test-media           # Media API tests
+make test-mcp             # MCP tools tests
+make test-e2e             # Gateway E2E tests
+
+# Monitoring
+make monitor-up           # Start monitoring stack
+make monitor-logs         # View monitoring logs
+
+# Logs & Status
+make logs-llm-api         # View LLM API logs
+make logs-response-api    # View Response API logs
+make logs-media-api       # View Media API logs
+make logs-mcp             # View MCP Tools logs
+make health-check         # Check all services health
+
+# Database
+make db-migrate           # Run migrations
+make db-reset             # Reset database
+make db-seed              # Seed test data
+
+# Cleanup
+make down                 # Stop services
+make clean                # Clean artifacts
+make clean-all            # Clean everything (including volumes)
+```
 
-2. **Configure Minikube for GPU support**: Follow the [official minikube GPU tutorial](https://minikube.sigs.k8s.io/docs/tutorials/nvidia/) for complete setup instructions.
+### Development Mode
 
-### Local Development Setup
+Run services on your host for debugging:
 
-#### Option 1: Mock Server Setup (Recommended for Development)
+```bash
+# Start all services in Docker with host.docker.internal support
+make dev-full
 
-1. **Start Minikube and configure Docker**:
-   ```bash
-   minikube start
-   eval $(minikube docker-env)
-   ```
+# Stop any service and run it on your host
+docker compose stop llm-api
+.\scripts\dev-full-run.ps1 llm-api  # Windows
+./scripts/dev-full-run.sh llm-api   # Linux/Mac
+```
 
-2. **Build and deploy all services**:
-   ```bash
-   ./scripts/run.sh
-   ```
+See [Development Guide](docs/guides/development.md) and [Dev-Full Mode](docs/guides/dev-full-mode.md) for details.
 
-3. **Access the services**:
-   - **API Gateway**: http://localhost:8080
-   - **Swagger UI**: http://localhost:8080/api/swagger/index.html
-   - **Health Check**: http://localhost:8080/healthcheck
-   - **Version Info**: http://localhost:8080/v1/version
+## CLI Tool
 
-#### Option 2: Real LLM Setup (Requires NVIDIA GPU)
+Jan Server includes a unified CLI tool for configuration management, service operations, and development tasks.
 
-1. **Start Minikube with GPU support**:
-   ```bash
-   minikube start --gpus all
-   eval $(minikube docker-env)
-   ```
+### Quick Install
 
-2. **Configure GPU memory utilization** (if you have limited GPU memory):
-   
-   GPU memory utilization is configured in the vLLM Dockerfile. See the [vLLM CLI documentation](https://docs.vllm.ai/en/latest/cli/serve.html) for all available arguments.
-   
-   To modify GPU memory utilization, edit the vLLM launch command in:
-   - `apps/jan-inference-model/Dockerfile` (for Docker builds)
-   - Helm chart values (for Kubernetes deployment)
+```bash
+# Install globally (recommended)
+make cli-install
 
-3. **Build and deploy all services**:
-   ```bash
-   # For GPU setup, modify run.sh to use GPU-enabled minikube
-   # Edit scripts/run.sh and change "minikube start" to "minikube start --gpus all"
-   ./scripts/run.sh
-   ```
+# Add to PATH as instructed, then run from anywhere
+jan-cli --version
+jan-cli config validate
+jan-cli service list
+```
 
-### Production Deployment
+### Quick Usage (Without Installation)
 
-For production deployments, modify the Helm values in `charts/jan-server/values.yaml` and deploy using:
+Use the wrapper scripts from the project root:
 
 ```bash
-helm install jan-server ./charts/jan-server
+# Linux/macOS/WSL
+./jan-cli.sh config validate
+./jan-cli.sh service list
+./jan-cli.sh dev setup
+
+# Windows PowerShell
+.\jan-cli.ps1 config validate
+.\jan-cli.ps1 service list
+.\jan-cli.ps1 dev setup
 ```
 
-## ⚙️ Configuration
+The wrapper scripts automatically build the CLI if needed.
 
-### Environment Variables
+### Available Commands
 
-The system is configured through environment variables defined in the Helm values file. Key configuration areas include:
-
-#### Jan API Gateway Configuration
-- **Database Connection**: PostgreSQL connection strings for read/write replicas
-- **Authentication**: JWT secrets and Google OAuth2 credentials
-- **API Keys**: Encryption secrets for API key management
-- **External Services**: Serper API key for web search functionality
-- **Model Integration**: Jan Inference Model service URL
+**Configuration Management:**
+```bash
+jan-cli config validate              # Validate configuration
+jan-cli config export --format env   # Export as environment variables
+jan-cli config show llm-api          # Show service configuration
+jan-cli config k8s-values --env prod # Generate Kubernetes values
+```
 
-#### Security Configuration
-- **JWT_SECRET**: HMAC-SHA-256 secret for JWT token signing
-- **APIKEY_SECRET**: HMAC-SHA-256 secret for API key encryption
-- **Database Credentials**: PostgreSQL username, password, and database name
+**Service Operations:**
+```bash
+jan-cli service list                 # List all services
+jan-cli service logs llm-api         # Show service logs
+jan-cli service status               # Check service health
+```
 
-#### External Service Integration
-- **SERPER_API_KEY**: API key for web search functionality
-- **Google OAuth2**: Client ID, secret, and redirect URL for authentication
-- **Model Service**: URL for Jan Inference Model service communication
+**Development Tools:**
+```bash
+jan-cli dev setup                    # Setup development environment
+jan-cli dev scaffold my-service      # Create new service from template
+```
 
-### Helm Configuration
+**Documentation:** 
+- Complete guide: [docs/guides/jan-cli.md](docs/guides/jan-cli.md)
+- Command reference: [cmd/jan-cli/README.md](cmd/jan-cli/README.md)
 
-The system uses Helm charts for deployment configuration:
+## API Examples
 
-- **Values Files**: Configuration files for different environments
+### 1. Authentication
 
-## 🔧 Development
+Kong (`http://localhost:8000`) fronts all `/llm/*` services and enforces Keycloak-issued JWTs or the custom API key plugin (`X-API-Key: sk_*`). Acquire temporary guest tokens at `POST /llm/auth/guest-login`, then include `Authorization: Bearer <token>` (or `X-API-Key`) on subsequent requests.
 
-### Project Structure
-```
-jan-server/
-├── apps/                          # Application services
-│   ├── jan-api-gateway/           # Main API gateway service
-│   │   ├── application/           # Go application code
-│   │   ├── docker/               # Docker configuration
-│   │   └── README.md            # Service-specific documentation
-│   └── jan-inference-model/       # AI model inference service
-│       ├── application/           # Python application code
-│       └── Dockerfile           # Container configuration
-├── charts/                        # Helm charts
-│   └── jan-server/           # Main deployment chart
-├── scripts/                      # Deployment and utility scripts
-└── README.md                     # This file
+```bash
+# Get guest token (no registration required)
+curl -X POST http://localhost:8000/llm/auth/guest-login
+
+# Sample response:
+# {
+#   "access_token": "eyJhbGc...",
+#   "token_type": "Bearer",
+#   "expires_in": 3600,
+#   "refresh_token": "...",
+#   "user_id": "guest-..."
+# }
 ```
 
-### Building Services
+### 2. Chat Completion
 
 ```bash
-# Build API Gateway
-docker build -t jan-api-gateway:latest ./apps/jan-api-gateway
-
-# Build Inference Model
-docker build -t jan-inference-model:latest ./apps/jan-inference-model
+# Simple chat completion
+curl -X POST http://localhost:8000/v1/chat/completions \
+  -H "Authorization: Bearer <token>" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "jan-v1-4b",
+    "messages": [{"role": "user", "content": "Hello!"}],
+    "stream": true
+  }'
+
+# With media (using jan_* ID)
+curl -X POST http://localhost:8000/v1/chat/completions \
+  -H "Authorization: Bearer <token>" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "jan-v1-4b",
+    "messages": [{
+      "role": "user",
+      "content": [
+        {"type": "text", "text": "What's in this image?"},
+        {"type": "image_url", "image_url": {"url": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0"}}
+      ]
+    }]
+  }'
 ```
 
-### Database Migrations
-
-The system uses Atlas for database migrations:
+### 3. Media Upload & Resolution
 
 ```bash
-# Generate migration files
-go run ./apps/jan-api-gateway/application/cmd/codegen/dbmigration
-
-# Apply migrations
-atlas migrate apply --url "your-database-url"
+# Upload media (remote URL)
+curl -X POST http://localhost:8285/v1/media \
+  -H "Authorization: Bearer <token>" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source": {
+      "type": "remote_url",
+      "url": "https://example.com/image.jpg"
+    },
+    "user_id": "user123"
+  }'
+
+# Response:
+# {
+#   "id": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+#   "mime": "image/jpeg",
+#   "bytes": 45678,
+#   "presigned_url": "https://s3.menlo.ai/platform-dev/..."
+# }
+
+# Resolve jan_* ID to presigned URL
+curl -X POST http://localhost:8285/v1/media/resolve \
+  -H "Authorization: Bearer <token>" \
+  -H "Content-Type: application/json" \
+  -d '{"ids": ["jan_01hqr8v9k2x3f4g5h6j7k8m9n0"]}'
 ```
 
-## 📊 Monitoring & Observability
-
-### Health Monitoring
-- **Health Check Endpoints**: Available on all services
-- **Model Health Monitoring**: Automated health checks for inference models
-- **Database Health**: Connection monitoring and replica status
+### 4. MCP Tools
 
-### Performance Profiling
-- **pprof Endpoints**: Available on port 6060 for performance analysis
-- **Grafana Pyroscope**: Continuous profiling integration
-- **Request Tracing**: Unique request IDs for end-to-end tracing
+```bash
+# Google search
+curl -X POST http://localhost:8000/v1/mcp \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "id": 1,
+    "method": "tools/call",
+    "params": {
+      "name": "google_search",
+      "arguments": {"q": "latest AI news", "num": 5}
+    }
+  }'
+
+# List available tools
+curl -X GET http://localhost:8091/v1/mcp/tools
+```
 
-### Logging
-- **Structured Logging**: JSON-formatted logs across all services
-- **Request/Response Logging**: Complete request lifecycle tracking
-- **Error Tracking**: Unique error codes for debugging
+### 5. Response API (Multi-step Orchestration)
 
-## 🔒 Security
+```bash
+# Create response with tool execution
+curl -X POST http://localhost:8082/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o-mini",
+    "input": "Search for the latest AI news and summarize the top 3 results"
+  }'
+
+# Response includes:
+# - Tool execution trace
+# - Final generated response
+# - Execution metadata (depth, duration, etc.)
+```
 
-### Authentication & Authorization
-- **JWT Tokens**: Secure token-based authentication
-- **Google OAuth2**: Social authentication integration
-- **API Key Management**: Scoped API keys for different access levels
-- **Multi-tenant Security**: Organization and project-level access control
+More examples: [API Documentation ->](docs/api/)
 
-### Data Protection
-- **Encrypted API Keys**: HMAC-SHA-256 encryption for sensitive data
-- **Secure Database Connections**: SSL-enabled database connections
-- **Environment Variable Security**: Secure handling of sensitive configuration
+## Deployment
 
-## 🚀 Deployment
+### Docker Compose Profiles
 
-### Local Development
 ```bash
-# Start local cluster
-minikube start
-eval $(minikube docker-env)
+make up-full              # All services
+make up-gpu               # With GPU inference
+make up-cpu               # CPU-only inference
+make monitor-up           # Add monitoring stack
+```
 
-# Deploy services
-./scripts/run.sh
+### Environment Configuration
 
-# Access services
-kubectl port-forward svc/jan-server-jan-api-gateway 8080:8080
+```bash
+# Quick setup with defaults
+make setup
+
+# Or manually configure
+cp config/secrets.env.example config/secrets.env
+# Edit config/secrets.env with your API keys:
+# - HF_TOKEN (HuggingFace token for model downloads)
+# - SERPER_API_KEY (for Google Search tool)
+# - POSTGRES_PASSWORD (database password)
+# - KEYCLOAK_ADMIN_PASSWORD (Keycloak admin password)
+
+# Available environment configs:
+# - config/defaults.env       - Base configuration
+# - config/development.env    - Docker development
+# - config/testing.env        - Testing configuration
+# - config/production.env.example - Production template
 ```
 
-### Production Deployment
-```bash
-# Update Helm dependencies
-helm dependency update ./charts/jan-server
+**Required secrets:**
+- `HF_TOKEN` - HuggingFace token (get from https://huggingface.co/settings/tokens)
+- `SERPER_API_KEY` - Serper API key (get from https://serper.dev)
 
-# Deploy to production
-helm install jan-server ./charts/jan-server
+See [Deployment Guide](docs/guides/deployment.md) for production setup.
 
-# Upgrade deployment
-helm upgrade jan-server ./charts/jan-server
+## Testing
 
-# Uninstall
-helm uninstall jan-server
+```bash
+# Run all tests (6 jan-cli api-test collections)
+make test-all
+
+# Specific test suites
+make test-auth            # Authentication flows (guest + user)
+make test-conversations   # Conversation management
+make test-response        # Response API orchestration
+make test-media           # Media API operations
+make test-mcp             # MCP tools integration
+make test-e2e             # Gateway end-to-end tests
+
+# Test reports
+# - CLI output: Detailed results with assertions
 ```
 
-## 🐛 Troubleshooting
+**Test Collections:**
+- `tests/automation/auth-postman-scripts.json` - Auth flows
+- `tests/automation/conversations-postman-scripts.json` - Conversations
+- `tests/automation/responses-postman-scripts.json` - Response API
+- `tests/automation/media-postman-scripts.json` - Media API
+- `tests/automation/mcp-postman-scripts.json` - MCP tools
+- `tests/automation/test-all.postman.json` - Complete E2E suite
 
-### Common Issues and Solutions
+Testing guide: [docs/guides/testing.md](docs/guides/testing.md)
 
-### 1. LLM Pod Not Starting (Pending Status)
+## Monitoring
 
-**Symptoms**: The `jan-server-jan-inference-model` pod stays in `Pending` status.
+Access monitoring dashboards:
 
-**Diagnosis Steps**:
-```bash
-# Check pod status
-kubectl get pods
+- **Grafana**: http://localhost:3331 (admin/admin)
+- **Prometheus**: http://localhost:9090
+- **Jaeger**: http://localhost:16686
 
-# Get detailed pod information (replace with your actual pod name)
-kubectl describe pod jan-server-jan-inference-model-<POD_ID>
-```
+See [Monitoring Guide](docs/guides/monitoring.md) for configuration.
 
-**Common Error Messages and Solutions**:
+## Technology Stack
 
-##### Error: "Insufficient nvidia.com/gpu"
-```
-0/1 nodes are available: 1 Insufficient nvidia.com/gpu. no new claims to deallocate, preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.
-```
-**Solution for Real LLM Setup**:
-1. Ensure you have NVIDIA GPU and drivers installed
-2. Install NVIDIA Container Toolkit (see Prerequisites section) 
-3. Start minikube with GPU support:
-   ```bash
-   minikube start --gpus all
-   ```
-
-##### Error: vLLM Pod Keeps Restarting
-```
-# Check pod logs to see the actual error
-kubectl logs jan-server-jan-inference-model-<POD_ID>
-```
+| Layer | Technology | Version |
+|-------|------------|---------|
+| **API Gateway** | Kong | 3.5 |
+| **Services** | Go (Gin framework) | 1.21+ |
+| **Database** | PostgreSQL | 18 |
+| **Cache** | Redis | Latest |
+| **Auth** | Keycloak (OIDC) | Latest |
+| **Inference** | vLLM | Latest |
+| **Search** | SearXNG | Latest |
+| **Code Execution** | SandboxFusion | Latest |
+| **Observability** | OpenTelemetry | Latest |
+| **Metrics** | Prometheus | Latest |
+| **Tracing** | Jaeger | Latest |
+| **Dashboards** | Grafana | Latest |
+| **MCP Protocol** | mark3labs/mcp-go | Latest |
+| **Container** | Docker Compose | 2.0+ |
+| **Orchestration** | Kubernetes + Helm | 1.28+ |
 
-**Common vLLM startup issues**:
-1. **CUDA Out of Memory**: Modify vLLM arguments in Dockerfile to reduce memory usage
-2. **Model Loading Errors**: Check if model path is correct and accessible
-3. **GPU Not Detected**: Ensure NVIDIA Container Toolkit is properly installed
+**Microservices:**
+- LLM API: Go 1.21+ with Gin, GORM, Wire DI
+- Response API: Go 1.21+ with Gin, GORM, Wire DI
+- Media API: Go 1.21+ with Gin, GORM, S3 SDK
+- MCP Tools: Go 1.21+ with JSON-RPC 2.0
 
-#### 2. Helm Issues
+## Contributing
 
-**Symptoms**: Helm commands fail or charts won't install.
+See [CONTRIBUTING.md](CONTRIBUTING.md) for contribution guidelines.
 
-**Solutions**:
-```bash
-# Update Helm dependencies
-helm dependency update ./charts/jan-server
+## License
 
-# Check Helm status
-helm list
+[License information]
 
-# Uninstall and reinstall
-helm uninstall jan-server
-helm install jan-server ./charts/jan-server
-```
+## Support
 
-## 📚 API Documentation
+- Documentation: [docs/README.md](docs/README.md)
+- Issue Tracker: https://github.com/janhq/jan-server/issues
+- Discussions: https://github.com/janhq/jan-server/discussions
 
-- **Swagger UI**: Available at `/api/swagger/index.html` when running
-- **OpenAPI Specification**: Auto-generated from code annotations
-- **Interactive Testing**: Built-in API testing interface
+---
 
-## 🤝 Contributing
+**Quick Start**: `make setup && make up-full` | **Documentation**: [docs/](docs/) | **API Docs**: http://localhost:8000/v1/swagger/
 
-1. Fork the repository
-2. Create a feature branch
-3. Make your changes
-4. Add tests for new functionality
-5. Submit a pull request
\ No newline at end of file
diff --git a/ReleaseProcedure.md b/ReleaseProcedure.md
deleted file mode 100644
index c4662dc6..00000000
--- a/ReleaseProcedure.md
+++ /dev/null
@@ -1,33 +0,0 @@
-# Release Procedure
-
-## 0. Announce Release
-- Create a thread in **#jan-server-internal** channel  
-  - `Jan Server Release vX.X.X (Release Manager:)`  
-
-## 2. Pre-flight Checks
-- **System Environment** 
-  - Confirm environment variables are correctly configured  
-  - jan-api-gateway:
-    - git diff release..main -- apps/jan-api-gateway/application/config/environment_variables/env.go
-  - Ask DevOps to verify **Prod Vault** is updated 
-
-- **Database**  
-  - Review **migration plan**  
-  - Confirm migrations are backward compatible (or plan downtime)  
-
-- **API Compatibility**  
-  - Validate no **breaking changes** for clients (or prepare migration guides)  
-
-## 3. Deployment
-- Merge `main` → `release` branch  
-- Tag release with target version: vX.X.X
-- Use the GitHub UI for tagging, it generates some useful notes.
-
-**Monitoring**
-- /v1/versions
-  ```bash
-  curl -X 'GET' \
-  'https://api.jan.ai/v1/version' \
-  -H 'accept: application/json'
-  ```
-- Run Prod Test cases
\ No newline at end of file
diff --git a/apps/jan-api-gateway/.gitignore b/apps/jan-api-gateway/.gitignore
deleted file mode 100644
index c076e6c4..00000000
--- a/apps/jan-api-gateway/.gitignore
+++ /dev/null
@@ -1,254 +0,0 @@
-# Logs
-*.log
-# Temporary files
-*.tmp
-*~
-
-# Ignore Go debug binaries generated by Delve or Go tools
-__debug_bin*
-*.test
-
-
-*.bak
-# Environment files (containing secrets, API keys, credentials)
-.env
-*.env
-.env.*
-
-# Local configuration that shouldn't be shared
-*.local
-*.vscode
-
-### Vscode ###
-# Visual Studio Code editor settings and workspace files
-
-# Visual Studio Code
-application/.vscode/*
-application/.vscode
-
-.history/
-*.vsix
-.ionide
-
-.vs/
-*.code-workspace
-.vscode-test
-.vscodeignore
-.vscode/chrome
-
-.vscode-server/
-.vscode/sftp.json
-.vscode/tags
-.devcontainer/
-
-### Pycharm ###
-# pycharm editor specific files
-
-# PyCharm
-.idea/
-*.iws
-*.iml
-*.ipr
-
-.idea_modules/
-out/
-.idea/workspace.xml
-.idea/tasks.xml
-.idea/dictionaries
-
-.idea/misc.xml
-.idea/modules.xml
-.idea/vcs.xml
-.idea/jsLibraryMappings.xml
-.idea/datasources.xml
-
-.idea/dataSources.ids
-.idea/sqlDataSources.xml
-.idea/dynamic.xml
-.idea/uiDesigner.xml
-.idea/inspectionProfiles/
-
-### Goland ###
-# goland editor specific files
-
-# GoLand
-.idea/
-*.iws
-*.iml
-*.ipr
-
-.idea_modules/
-out/
-.idea/workspace.xml
-.idea/tasks.xml
-.idea/dictionaries
-
-.idea/goland.xml
-.idea/golangProjectSettings.xml
-.idea/goLibraries.xml
-go.work
-go.sum
-
-.idea/goprojector.xml
-.idea/go.imports.xml
-.idea/go.vendoring.xml
-
-### Python ###
-# Python compiled files, virtual environments, and cache
-
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-.Python
-
-pypackages/
-build/
-develop-eggs/
-dist/
-downloads/
-
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-
-.installed.cfg
-*.egg
-MANIFEST
-.venv
-env/
-
-venv/
-ENV/
-.pytest_cache/
-.coverage
-htmlcov/
-
-# PyInstaller
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-.tox/
-.nox/
-.coverage.*
-
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-
-cover/
-# Translations
-*.mo
-*.pot
-# Django
-
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask
-instance/
-
-.webassets-cache
-# Scrapy
-.scrapy
-# Sphinx
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-#.python-version
-
-# pipenv
-#Pipfile.lock
-# poetry
-#poetry.lock
-# pdm
-
-.pdm.toml
-__pypackages__/
-# Celery
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath
-*.sage.py
-# Spyder
-.spyderproject
-.spyproject
-
-# Rope
-.ropeproject
-# mkdocs
-/site
-# mypy
-
-.mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre
-.pyre/
-
-# pytype
-.pytype/
-# Cython
-cython_debug/
-# Poetry
-
-poetry.toml
-# ruff
-.ruff_cache/
-# LSP
-pyrightconfig.json
-
-*.sage.py
-.pdm.toml
-profile_default/
-.nox/
-.spyproject
-
-site/
-
-### Go ###
-# go specific files
-
-*.exe
-*.test
-vendor/
-*.out
-go.work
-
-go.sum
-
-### Typescript ###
-# typescript specific files
-
-*.tsbuildinfo
-node_modules/
-dist/
-
-# helm
-charts/jan-server/charts/*.tgz
-
-apps/jan-api-gateway/application/cmd/server/__debug_bin*
\ No newline at end of file
diff --git a/apps/jan-api-gateway/.ide/launch.json b/apps/jan-api-gateway/.ide/launch.json
deleted file mode 100644
index 2b561034..00000000
--- a/apps/jan-api-gateway/.ide/launch.json
+++ /dev/null
@@ -1,102 +0,0 @@
-{
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "name": "Launch Jan API Gateway (Debug)",
-            "type": "go",
-            "request": "launch",
-            "mode": "auto",
-            "program": "${workspaceFolder}/application/cmd/server",
-            "cwd": "${workspaceFolder}/application/cmd/server",
-            "env": {
-                "DB_POSTGRESQL_WRITE_DSN": "postgres://jan_user:jan_password@localhost:5432/jan_api_gateway?sslmode=disable",
-                "DB_POSTGRESQL_READ1_DSN": "postgres://jan_user:jan_password@localhost:5432/jan_api_gateway?sslmode=disable",
-                "ENABLE_ADMIN_API": "True",
-                "JWT_SECRET": "your-super-secret-jwt-key-change-in-production",
-                "APIKEY_SECRET": "your-api-key-secret-change-in-production",
-                "JAN_INFERENCE_MODEL_URL": "http://localhost:8000",
-                "SERPER_API_KEY": "your-serper-api-key",
-                "OAUTH2_GOOGLE_CLIENT_ID": "your-google-client-id",
-                "OAUTH2_GOOGLE_CLIENT_SECRET": "your-google-client-secret",
-                "OAUTH2_GOOGLE_REDIRECT_URL": "http://localhost:8080/auth/google/callback"
-            },
-            "args": [],
-            "showLog": true,
-            "console": "integratedTerminal",
-            "dlvLoadConfig": {
-                "followPointers": true,
-                "maxVariableRecurse": 3,
-                "maxStringLen": 256,
-                "maxArrayValues": 64,
-                "maxStructFields": -1
-            },
-            "apiVersion": 2,
-            "preLaunchTask": "Start Database"
-        },
-        {
-            "name": "Attach to Jan API Gateway",
-            "type": "go",
-            "request": "attach",
-            "mode": "remote",
-            "remotePath": "${workspaceFolder}/application",
-            "port": 2345,
-            "host": "127.0.0.1",
-            "showLog": true,
-            "dlvLoadConfig": {
-                "followPointers": true,
-                "maxVariableRecurse": 3,
-                "maxStringLen": 256,
-                "maxArrayValues": 64,
-                "maxStructFields": -1
-            },
-            "apiVersion": 2
-        },
-        {
-            "name": "Launch Tests",
-            "type": "go",
-            "request": "launch",
-            "mode": "test",
-            "program": "${workspaceFolder}/application",
-            "env": {
-                "DB_POSTGRESQL_WRITE_DSN": "postgres://jan_user:jan_password@localhost:5432/jan_api_gateway?sslmode=disable",
-                "DB_POSTGRESQL_READ1_DSN": "postgres://jan_user:jan_password@localhost:5432/jan_api_gateway?sslmode=disable",
-                "ENABLE_ADMIN_API": "True",
-                "JWT_SECRET": "your-super-secret-jwt-key-change-in-production",
-                "APIKEY_SECRET": "your-api-key-secret-change-in-production",
-                "JAN_INFERENCE_MODEL_URL": "http://localhost:8000",
-                "SERPER_API_KEY": "your-serper-api-key",
-                "OAUTH2_GOOGLE_CLIENT_ID": "your-google-client-id",
-                "OAUTH2_GOOGLE_CLIENT_SECRET": "your-google-client-secret",
-                "OAUTH2_GOOGLE_REDIRECT_URL": "http://localhost:8080/auth/google/callback"
-            },
-            "args": [
-                "-test.v",
-                "-test.run",
-                ".*"
-            ],
-            "showLog": true,
-            "console": "integratedTerminal",
-            "preLaunchTask": "Start Database"
-        },
-        {
-          "name": "CodeGen",
-          "type": "go",
-          "request": "launch",
-          "mode": "debug",
-          "program": "${workspaceFolder}/cmd/codegen/gorm",
-          "env": {
-            "local_dev": "1",
-            "DB_POSTGRESQL_WRITE_DSN": "postgres://jan_user:jan_password@localhost:5432/jan_api_gateway?sslmode=disable",
-            "DB_POSTGRESQL_READ1_DSN": "postgres://jan_user:jan_password@localhost:5432/jan_api_gateway?sslmode=disable",
-            "ENABLE_ADMIN_API": "True",
-            "JWT_SECRET": "your-super-secret-jwt-key-change-in-production",
-            "APIKEY_SECRET": "your-api-key-secret-change-in-production",
-            "JAN_INFERENCE_MODEL_URL": "http://localhost:8000",
-            "SERPER_API_KEY": "your-serper-api-key",
-            "OAUTH2_GOOGLE_CLIENT_ID": "your-google-client-id",
-            "OAUTH2_GOOGLE_CLIENT_SECRET": "your-google-client-secret",
-            "OAUTH2_GOOGLE_REDIRECT_URL": "http://localhost:8080/auth/google/callback"
-          }
-        }
-    ]
-}
\ No newline at end of file
diff --git a/apps/jan-api-gateway/.ide/tasks.json b/apps/jan-api-gateway/.ide/tasks.json
deleted file mode 100644
index 5c468858..00000000
--- a/apps/jan-api-gateway/.ide/tasks.json
+++ /dev/null
@@ -1,91 +0,0 @@
-{
-    "version": "2.0.0",
-    "tasks": [
-        {
-            "label": "Start Database",
-            "type": "shell",
-            "command": "docker-compose",
-            "args": ["-f", "docker/docker-compose.yml", "up", "-d", "postgres"],
-            "group": "build",
-            "presentation": {
-                "echo": true,
-                "reveal": "always",
-                "focus": false,
-                "panel": "shared",
-                "showReuseMessage": true,
-                "clear": false
-            },
-            "problemMatcher": []
-        },
-        {
-            "label": "Stop Database",
-            "type": "shell",
-            "command": "docker-compose",
-            "args": ["-f", "docker/docker-compose.yml", "down"],
-            "group": "build",
-            "presentation": {
-                "echo": true,
-                "reveal": "always",
-                "focus": false,
-                "panel": "shared",
-                "showReuseMessage": true,
-                "clear": false
-            },
-            "problemMatcher": []
-        },
-        {
-            "label": "Wait for Database",
-            "type": "shell",
-            "command": "docker-compose",
-            "args": ["-f", "docker/docker-compose.yml", "exec", "-T", "postgres", "pg_isready", "-U", "jan_user", "-d", "jan_api_gateway"],
-            "group": "build",
-            "presentation": {
-                "echo": true,
-                "reveal": "always",
-                "focus": false,
-                "panel": "shared",
-                "showReuseMessage": true,
-                "clear": false
-            },
-            "problemMatcher": []
-        },
-        {
-            "label": "Build Application",
-            "type": "shell",
-            "command": "go",
-            "args": ["build", "-o", "server", "server.go", "wire_gen.go"],
-            "options": {
-                "cwd": "${workspaceFolder}/application/cmd/server"
-            },
-            "group": "build",
-            "presentation": {
-                "echo": true,
-                "reveal": "always",
-                "focus": false,
-                "panel": "shared",
-                "showReuseMessage": true,
-                "clear": false
-            },
-            "problemMatcher": ["$go"]
-        },
-        {
-            "label": "Run Tests",
-            "type": "shell",
-            "command": "go",
-            "args": ["test", "./..."],
-            "options": {
-                "cwd": "${workspaceFolder}/application"
-            },
-            "group": "test",
-            "presentation": {
-                "echo": true,
-                "reveal": "always",
-                "focus": false,
-                "panel": "shared",
-                "showReuseMessage": true,
-                "clear": false
-            },
-            "problemMatcher": ["$go"]
-        }
-    ]
-}
diff --git a/apps/jan-api-gateway/Dockerfile b/apps/jan-api-gateway/Dockerfile
deleted file mode 100644
index 847a23c5..00000000
--- a/apps/jan-api-gateway/Dockerfile
+++ /dev/null
@@ -1,24 +0,0 @@
-FROM golang:1.24.6-alpine AS builder
-WORKDIR /app
-
-RUN apk add --no-cache build-base
-
-COPY application/go.mod ./
-RUN go mod download
-
-ARG VERSION_TAG=dev
-COPY application/. .
-RUN go mod tidy
-
-RUN CGO_ENABLED=1 GOOS=linux \
-    go build -o /jan-api-gateway \
-      -gcflags="all=-N -l" \
-      -ldflags="-linkmode=external -X menlo.ai/jan-api-gateway/config.Version=${VERSION_TAG}" \
-      ./cmd/server
-
-FROM alpine:latest
-WORKDIR /root/
-COPY --from=builder /jan-api-gateway .
-
-EXPOSE 8080
-CMD ["./jan-api-gateway"]
diff --git a/apps/jan-api-gateway/LOCAL_DEV_SETUP.md b/apps/jan-api-gateway/LOCAL_DEV_SETUP.md
deleted file mode 100644
index bf29c88f..00000000
--- a/apps/jan-api-gateway/LOCAL_DEV_SETUP.md
+++ /dev/null
@@ -1,232 +0,0 @@
-# Local Development Setup - VS Code/Cursor IDE
-
-This guide will help you set up and run the Jan API Gateway locally using VS Code/Cursor's integrated debugging and launch configurations.
-
-## Prerequisites
-
-- **VS Code** or **Cursor IDE** installed
-- **Go extension** for VS Code/Cursor installed
-- **Docker and Docker Compose** installed
-- **Go 1.19+** installed
-- **Git** installed
-
-## Project Structure
-
-```
-jan-api-gateway/
-├── .vscode/                         # VS Code/Cursor configuration
-│   ├── launch.json                 # Debug and launch configurations
-│   └── tasks.json                  # Automated tasks (database management)
-├── docker/                         # Docker configuration
-│   ├── docker-compose.yml         # PostgreSQL and Valkey cache service configuration
-│   └── init.sql                   # Database initialization script
-├── application/                    # Go application code
-│   ├── cmd/server/                # Main server entry point
-│   ├── app/                       # Application layers
-│   └── Makefile                   # Build automation (optional)
-└── LOCAL_DEV_SETUP.md             # This documentation
-```
-
-## 🚀 Quick Start Guide
-
-### Step 1: Open Project in VS Code/Cursor
-
-1. **Open VS Code/Cursor**
-2. **File → Open Folder** → Select the `jan-api-gateway` directory
-3. **Install Go extension** if prompted
-4. **Trust the workspace** when prompted
-
-### Step 2: Start Development Environment
-
-1. **Press `F5`** or **Run → Start Debugging**
-2. **Select "Launch Jan API Gateway (Debug)"** from the dropdown
-3. **Wait for automatic setup:**
-   - PostgreSQL database starts automatically
-   - Valkey cache service starts automatically (Redis-compatible)
-   - Environment variables are set
-   - Application launches with debugger attached
-
-That's it! Your development environment is ready. 🎉
-
-## 🎯 Available Launch Configurations
-
-### 1. **Launch Jan API Gateway (Debug)** ⭐ *Recommended*
-- **Purpose**: Full development environment with debugging
-- **What it does**:
-  - Automatically starts PostgreSQL database and Valkey cache (Redis-compatible)
-  - Sets all required environment variables
-  - Launches the application with debugger attached
-  - Opens integrated terminal for logs
-- **When to use**: Daily development and debugging
-
-### 2. **Attach to Jan API Gateway**
-- **Purpose**: Attach debugger to already running process
-- **What it does**:
-  - Connects to a running debug session on port 2345
-  - Useful for debugging without restarting the application
-- **When to use**: When you want to debug a running instance
-
-### 3. **Launch Tests**
-- **Purpose**: Debug unit tests
-- **What it does**:
-  - Starts database and Valkey cache for testing
-  - Runs tests with debugging enabled
-  - Allows setting breakpoints in test code
-- **When to use**: Debugging test failures or test logic
-
-## 🔧 Development Workflow
-
-### Daily Development
-1. **Open project** in VS Code/Cursor
-2. **Set breakpoints** in your Go code where needed
-3. **Press F5** → Select "Launch Jan API Gateway (Debug)"
-4. **Code, debug, repeat**:
-   - Make code changes
-   - Save files (auto-reload on save)
-   - Use debug controls to step through code
-   - Inspect variables in debug panel
-
-### Debugging Features Available
-- ✅ **Breakpoints**: Click left margin to set/remove
-- ✅ **Variable Inspection**: Hover over variables or use debug panel
-- ✅ **Debug Console**: Execute Go expressions while debugging
-- ✅ **Call Stack**: Full call stack visualization
-- ✅ **Step Controls**: 
-  - `F10` - Step Over
-  - `F11` - Step Into 
-  - `Shift+F11` - Step Out
-  - `F5` - Continue
-- ✅ **Watch Expressions**: Monitor specific variables
-- ✅ **Conditional Breakpoints**: Right-click breakpoint for conditions
-
-### Testing Workflow
-1. **Write your tests** in `*_test.go` files
-2. **Set breakpoints** in test code if needed
-3. **Press F5** → Select "Launch Tests"
-4. **Debug your tests** with full IDE support
-
-## 🛠️ Manual Database Management
-
-While the launch configurations handle the database automatically, you can also manage it manually using VS Code tasks:
-
-### Using Command Palette (Recommended)
-1. **Press `Ctrl+Shift+P` (Windows/Linux) or `Cmd+Shift+P` (macOS)**
-2. **Type "Tasks: Run Task"**
-3. **Select one of:**
-   - **Start Database** - Start PostgreSQL and Valkey cache
-   - **Stop Database** - Stop PostgreSQL and Valkey cache
-   - **Wait for Database** - Check if database is ready
-   - **Wait for Cache** - Check if Valkey cache is ready
-   - **Build Application** - Build the Go application
-   - **Run Tests** - Run all tests
-
-### Using Terminal
-```bash
-# Start database and Valkey cache (primary cache service)
-docker-compose -f docker/docker-compose.yml up -d postgres valkey
-
-# Stop all services
-docker-compose -f docker/docker-compose.yml down
-
-# Reset database and Valkey cache (removes all data)
-docker-compose -f docker/docker-compose.yml down -v
-docker-compose -f docker/docker-compose.yml up -d postgres valkey
-
-# View logs
-docker-compose -f docker/docker-compose.yml logs postgres
-docker-compose -f docker/docker-compose.yml logs valkey
-
-# Connect to database
-docker-compose -f docker/docker-compose.yml exec postgres psql -U jan_user -d jan_api_gateway
-
-# Connect to Valkey cache
-docker-compose -f docker/docker-compose.yml exec valkey valkey-cli
-```
-
-## ⚙️ Environment Variables
-
-The following environment variables are **automatically configured** in the launch configurations:
-
-| Variable | Description | Value |
-|----------|-------------|-------|
-| `DB_POSTGRESQL_WRITE_DSN` | Primary database connection | `postgres://jan_user:jan_password@localhost:5432/jan_api_gateway?sslmode=disable` |
-| `DB_POSTGRESQL_READ1_DSN` | Read replica database connection | `postgres://jan_user:jan_password@localhost:5432/jan_api_gateway?sslmode=disable` |
-| `ENABLE_ADMIN_API` | Enable admin API functionality | `True` |
-| `JWT_SECRET` | Secret key for JWT token signing | `your-super-secret-jwt-key-change-in-production` |
-| `APIKEY_SECRET` | Secret key for API key encryption | `your-api-key-secret-change-in-production` |
-| `JAN_INFERENCE_MODEL_URL` | Jan inference model service URL | `http://localhost:8000` |
-| `SERPER_API_KEY` | Serper API key for web search | `your-serper-api-key` |
-| `OAUTH2_GOOGLE_CLIENT_ID` | Google OAuth2 client ID | `your-google-client-id` |
-| `OAUTH2_GOOGLE_CLIENT_SECRET` | Google OAuth2 client secret | `your-google-client-secret` |
-| `OAUTH2_GOOGLE_REDIRECT_URL` | Google OAuth2 redirect URL | `http://localhost:8080/auth/google/callback` |
-| `REDIS_URL` | Redis connection URL | `redis://localhost:6379` |
-| `REDIS_PASSWORD` | Redis authentication password | `` (empty for dev) |
-| `REDIS_DB` | Redis database number | `0` |
-
-**📝 Redis Cache Notes:**
-- **Redis** is used for caching inference models and improving performance
-- Cache keys are automatically managed by the application
-- Redis connection is required for optimal performance
-
-**Note**: You can modify these values in `.vscode/launch.json` if needed for your environment.
-
-## 🐛 Troubleshooting
-
-### Database & Redis Connection Issues
-1. **Check Docker**: Ensure Docker Desktop is running
-2. **Check Ports**: Make sure ports 5432 (PostgreSQL) and 6379 (Redis) are available
-3. **View Database Status**: Use Command Palette → "Tasks: Run Task" → "Wait for Database"
-4. **View Redis Status**: Use Command Palette → "Tasks: Run Task" → "Wait for Redis"
-5. **View Logs**: Check the integrated terminal for database and Redis startup logs
-6. **Redis Connection**: Ensure Redis is running and accessible on the configured port
-
-### Go Extension Issues
-1. **Install Go Extension**: VS Code/Cursor should prompt you automatically
-2. **Go Tools**: Use Command Palette → "Go: Install/Update Tools"
-3. **Restart IDE**: Sometimes required after installing tools
-
-### Debug Issues
-1. **Check Go Installation**: `go version` in terminal
-2. **Install Delve**: Will be automatically installed on first debug run
-3. **Check Firewall**: Ensure localhost:2345 is accessible
-
-### Permission Issues
-- **Windows**: Run VS Code/Cursor as Administrator if Docker access issues
-- **Linux/macOS**: Ensure your user is in the `docker` group
-
-## 🏗️ Database Schema
-
-The application automatically creates and migrates the database schema on startup. The schema includes:
-
-- **Users** - User accounts and authentication
-- **Organizations** - Multi-tenant organization structure
-- **Projects** - Project management within organizations
-- **API Keys** - API authentication and authorization
-- **Additional domain tables** - Based on Go structs in the `domain` package
-
-All tables are created automatically using GORM migrations when the application starts.
-
-## 📝 Additional Notes
-
-### Hot Reload
-- The debugger supports hot reload - save your Go files and the application will restart automatically
-- Breakpoints will be preserved across restarts
-
-### Multiple Debug Sessions
-- You can run multiple debug sessions simultaneously
-- Use "Attach to Jan API Gateway" to connect additional debuggers
-
-### Production Environment Variables
-- For production deployment, replace the example values in environment variables
-- Use secure, randomly generated secrets for JWT and API keys
-- Configure proper database connections for your production database
-
-### IDE Extensions Recommended
-- **Go** - Official Go language support
-- **Docker** - Docker container management
-- **PostgreSQL** - Database query and management (optional)
-- **REST Client** - API testing (optional)
-
----
-
-**Happy Coding! 🚀** Your Jan API Gateway development environment is now fully integrated with VS Code/Cursor for the best possible developer experience.
\ No newline at end of file
diff --git a/apps/jan-api-gateway/README.md b/apps/jan-api-gateway/README.md
deleted file mode 100644
index cedd0c51..00000000
--- a/apps/jan-api-gateway/README.md
+++ /dev/null
@@ -1,494 +0,0 @@
-# Jan API Gateway
-
-A comprehensive API gateway for Jan Server that provides OpenAI-compatible endpoints, multi-tenant organization management, conversation handling, and AI model inference capabilities. The system serves as a centralized gateway for AI model interactions with enterprise-grade features including user management, organization hierarchies, project-based access control, and real-time streaming responses.
-
-## 🚀 Features
-
-### Core Features
-- **OpenAI-Compatible API**: Full compatibility with OpenAI's chat completion API with streaming support and reasoning content handling
-- **Multi-Tenant Architecture**: Organization and project-based access control with hierarchical permissions and member management
-- **Conversation Management**: Persistent conversation storage and retrieval with item-level management, including message, function call, and reasoning content types
-- **Authentication & Authorization**: JWT-based auth with Google OAuth2 integration and role-based access control
-- **API Key Management**: Secure API key generation and management at organization and project levels with multiple key types (admin, project, organization, service, ephemeral)
-- **Model Registry**: Dynamic model endpoint management with automatic health checking and service discovery
-- **Cache Service**: High-performance caching for inference models using Redis to reduce load times and improve response performance
-- **Streaming Support**: Real-time streaming responses with Server-Sent Events (SSE) and chunked transfer encoding
-- **MCP Integration**: Model Context Protocol support for external tools and resources with JSON-RPC 2.0
-- **Web Search**: Serper API integration for web search capabilities via MCP with webpage fetching
-- **Database Management**: PostgreSQL with read/write replicas and automatic migrations using Atlas
-- **Transaction Management**: Automatic database transaction handling with rollback support
-- **Health Monitoring**: Automated health checks with cron-based model endpoint monitoring
-- **Performance Profiling**: Built-in pprof endpoints for performance monitoring and Grafana Pyroscope integration
-- **Request Logging**: Comprehensive request/response logging with unique request IDs and structured logging
-- **CORS Support**: Cross-origin resource sharing middleware with configurable allowed hosts
-- **Swagger Documentation**: Auto-generated API documentation with interactive UI
-- **Email Integration**: SMTP support for invitation and notification systems
-- **Response Management**: Comprehensive response tracking with status management and usage statistics
-
-
-
-## 🏗️ Business Domain Architecture
-
-### Core Domain Models
-![System Design Diagram](docs/System_Design.png)
-#### User Management
-- **Users**: Support for both regular users and guest users with email-based authentication
-- **Organizations**: Multi-tenant organizations with owner/member roles and hierarchical access
-- **Projects**: Project-based resource isolation within organizations with member management
-- **Invites**: Email-based invitation system for organization and project membership
-
-#### Authentication & Authorization
-- **API Keys**: Multiple types (admin, project, organization, service, ephemeral) with scoped permissions
-- **JWT Tokens**: Stateless authentication with Google OAuth2 integration
-- **Role-Based Access**: Hierarchical permissions from organization owners to project members
-
-#### Conversation Management
-- **Conversations**: Persistent chat sessions with metadata and privacy controls
-- **Items**: Rich conversation items supporting messages, function calls, and reasoning content
-- **Content Types**: Support for text, images, files, and multimodal content with annotations
-- **Status Tracking**: Real-time status management (pending, in_progress, completed, failed, cancelled)
-
-#### Response Management
-- **Responses**: Comprehensive tracking of AI model interactions with full parameter logging
-- **Streaming**: Real-time streaming with Server-Sent Events and chunked transfer encoding
-- **Usage Statistics**: Token usage tracking and performance metrics
-- **Error Handling**: Detailed error tracking with unique error codes
-
-#### External Integrations
-- **Jan Inference Service**: Primary AI model inference backend with health monitoring
-- **Serper API**: Web search capabilities via MCP with search and webpage fetching
-- **SMTP**: Email notifications for invitations and system alerts
-- **Model Registry**: Dynamic model discovery and health checking
-
-### Data Flow Architecture
-
-1. **Request Processing**: HTTP requests → Authentication → Authorization → Business Logic
-2. **AI Inference**: Request → Jan Inference Service → Streaming Response → Database Storage
-3. **MCP Integration**: JSON-RPC 2.0 → Tool Execution → External APIs → Response Streaming
-4. **Health Monitoring**: Cron Jobs → Service Discovery → Model Registry Updates
-5. **Database Operations**: Read/Write Replicas → Transaction Management → Automatic Migrations
-
-
-## 🛠️ Technology Stack
-
-- **Backend**: Go 1.24.6
-- **Web Framework**: Gin v1.10.1
-- **Database**: PostgreSQL with GORM v1.30.1
-- **Database Features**: 
-  - Read/Write Replicas with GORM dbresolver
-  - Automatic migrations with Atlas
-  - Generated query interfaces with GORM Gen
-- **Authentication**: JWT v5.3.0 + Google OAuth2 v3.15.0
-- **Caching**: Redis v9.14.0 for high-performance model caching
-- **API Documentation**: Swagger/OpenAPI v1.16.6
-- **Streaming**: Server-Sent Events (SSE) with chunked transfer
-- **Dependency Injection**: Google Wire v0.6.0
-- **Logging**: Logrus v1.9.3 with structured logging
-- **HTTP Client**: Resty v3.0.0-beta.3
-- **Profiling**: 
-  - Built-in pprof endpoints
-  - Grafana Pyroscope Go integration v0.1.8
-- **Scheduling**: Crontab v1.2.0 for health checks
-- **MCP Protocol**: MCP-Go v0.37.0 for Model Context Protocol
-- **External Integrations**: 
-  - Jan Inference Service
-  - Serper API (Web Search)
-  - Google OAuth2
-- **Development Tools**:
-  - Atlas for database migrations
-  - GORM Gen for code generation
-  - Swagger for API documentation
-
-### API Endpoints
-
-#### Authentication API (`/v1/auth`)
-- `POST /google/callback` - Google OAuth2 callback handler
-- `GET /google/testcallback` - Test callback for development
-
-#### Chat Completions API (`/v1/chat`, `/v1/mcp`, `/v1/models`)
-- `POST /chat/completions` - OpenAI-compatible chat completions with streaming support
-- `POST /mcp` - MCP streamable endpoint with JSON-RPC 2.0 support
-- `GET /models` - List available models from inference registry
-- Supported MCP methods:
-  - `initialize` - MCP initialization
-  - `notifications/initialized` - Initialization notification
-  - `ping` - Connection ping
-  - `tools/list` - List available tools (Serper search, webpage fetch)
-  - `tools/call` - Execute tool calls
-  - `prompts/list` - List available prompts
-  - `prompts/call` - Execute prompts
-  - `resources/list` - List available resources
-  - `resources/templates/list` - List resource templates
-  - `resources/read` - Read resource content
-  - `resources/subscribe` - Subscribe to resource updates
-
-#### Conversation-aware Chat API (`/v1/conv`)
-- `POST /chat/completions` - Conversation-based chat completions with streaming support
-- `POST /mcp` - MCP streamable endpoint for conversation-aware chat
-- `GET /models` - List available models for conversation-aware chat
-
-#### Conversations API (`/v1/conversations`)
-- `POST /` - Create new conversation
-- `GET /` - List conversations with pagination
-- `GET /{conversation_id}` - Get conversation by ID
-- `PATCH /{conversation_id}` - Update conversation metadata
-- `DELETE /{conversation_id}` - Delete conversation
-- `POST /{conversation_id}/items` - Add items to conversation
-- `GET /{conversation_id}/items` - List conversation items
-- `GET /{conversation_id}/items/{item_id}` - Get specific item
-- `DELETE /{conversation_id}/items/{item_id}` - Delete specific item
-
-#### Administration API (`/v1/organization`)
-- `GET /` - List organizations
-- `POST /` - Create organization
-- `GET /{org_id}` - Get organization details
-- `PATCH /{org_id}` - Update organization
-- `DELETE /{org_id}` - Delete organization
-- `GET /{org_id}/api_keys` - List organization API keys
-- `POST /{org_id}/api_keys` - Create organization API key
-- `DELETE /{org_id}/api_keys/{key_id}` - Delete API key
-- `GET /admin_api_keys` - List admin API keys
-- `POST /admin_api_keys` - Create admin API key
-- `GET /admin_api_keys/{key_id}` - Get admin API key
-- `DELETE /admin_api_keys/{key_id}` - Delete admin API key
-
-##### Projects (`/v1/organization/{org_id}/projects`)
-- `GET /` - List projects
-- `POST /` - Create project
-- `GET /{project_id}` - Get project details
-- `PATCH /{project_id}` - Update project
-- `DELETE /{project_id}` - Delete project
-- `GET /{project_id}/api_keys` - List project API keys
-- `POST /{project_id}/api_keys` - Create project API key
-- `DELETE /{project_id}/api_keys/{key_id}` - Delete project API key
-
-##### Invites (`/v1/organization/{org_id}/invites`)
-- `GET /` - List organization invites
-- `POST /` - Create organization invite
-- `GET /{invite_id}` - Get invite details
-- `DELETE /{invite_id}` - Delete invite
-
-#### Responses API (`/v1/responses`)
-- `POST /` - Create response
-- `GET /{response_id}` - Get response details
-- `DELETE /{response_id}` - Delete response
-- `POST /{response_id}/cancel` - Cancel running response
-- `GET /{response_id}/input_items` - List response input items
-
-#### Server API (`/v1/version`, `/healthcheck`)
-- `GET /healthcheck` - Health check endpoint
-- `GET /v1/version` - API version information
-- `GET /google/testcallback` - Development callback test endpoint
-
-## 🚀 Quick Start
-
-### Prerequisites
-- Go 1.24.6+
-- Docker & Docker Compose
-- PostgreSQL (or use Docker)
-- Atlas (for database migrations): `brew install ariga/tap/atlas`
-
-### Local Development
-
-1. **Clone and setup**:
-   ```bash
-   git clone <repository-url>
-   cd jan-api-gateway/application
-   make setup
-   go mod tidy
-   ```
-
-2. **Start the server**:
-   ```bash
-   go run ./cmd/server
-   ```
-
-3. **Access the API**:
-   - API Base URL: `http://localhost:8080`
-   - Swagger UI: `http://localhost:8080/api/swagger/index.html`
-   - Health Check: `http://localhost:8080/healthcheck`
-   - Version Info: `http://localhost:8080/v1/version`
-   - Profiling Endpoints: `http://localhost:6060/debug/pprof/`
-
-### Environment Variables
-
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `DB_POSTGRESQL_WRITE_DSN` | Primary database connection | `postgres://jan_user:jan_password@localhost:5432/jan_api_gateway?sslmode=disable` |
-| `DB_POSTGRESQL_READ1_DSN` | Read replica database connection | Same as write DSN |
-| `JWT_SECRET` | JWT token signing secret | `your-super-secret-jwt-key-change-in-production` |
-| `APIKEY_SECRET` | API key encryption secret | `your-api-key-secret-change-in-production` |
-| `JAN_INFERENCE_MODEL_URL` | Jan inference service URL | `http://localhost:8000` |
-| `SERPER_API_KEY` | Serper API key for web search | `your-serper-api-key` |
-| `OAUTH2_GOOGLE_CLIENT_ID` | Google OAuth2 client ID | `your-google-client-id` |
-| `OAUTH2_GOOGLE_CLIENT_SECRET` | Google OAuth2 client secret | `your-google-client-secret` |
-| `OAUTH2_GOOGLE_REDIRECT_URL` | Google OAuth2 redirect URL | `http://localhost:8080/auth/google/callback` |
-| `ALLOWED_CORS_HOSTS` | Value of allowed CORS hosts, separated by commas, supporting prefix wildcards with '*'. | `http://localhost:8080,*jan.ai` |
-| `SMTP_HOST` | SMTP server host for email notifications | `smtp.gmail.com` |
-| `SMTP_PORT` | SMTP server port | `587` |
-| `SMTP_USERNAME` | SMTP username | `your-smtp-username` |
-| `SMTP_PASSWORD` | SMTP password | `your-smtp-password` |
-| `SMTP_SENDER_EMAIL` | Default sender email address | `noreply@yourdomain.com` |
-| `INVITE_REDIRECT_URL` | Redirect URL for invitation acceptance | `http://localhost:8080/invite/accept` |
-| `REDIS_URL` | Redis connection URL | `redis://localhost:6379` |
-| `REDIS_PASSWORD` | Redis authentication password | `` (empty for dev) |
-| `REDIS_DB` | Redis database number | `0` |
-
-## 🚀 Redis Caching
-
-The Jan API Gateway includes Redis caching for inference models to significantly improve performance by avoiding repeated model loading and caching identical requests.
-
-### Redis Features
-- **Model List Caching**: Cache model discovery for 10 minutes
-- **Transparent Integration**: No code changes needed in existing handlers
-- **Centralized Constants**: Redis cache keys defined as constants
-
-### Quick Setup
-
-1. **Deploy Redis Infrastructure**:
-   ```bash
-   helm dependency update charts/umbrella-chart/
-   helm install jan-server charts/umbrella-chart/
-   ```
-
-2. **Environment Variables**:
-   ```bash
-   REDIS_URL=redis://jan-server-redis-master:6379
-   REDIS_PASSWORD=""  # Empty for dev
-   REDIS_DB=0
-   ```
-
-3. **Verify Setup**:
-   ```bash
-   # Check Redis connectivity in logs
-   kubectl logs deployment/jan-server-jan-api-gateway | grep "Successfully connected to Redis"
-   ```
-
-### Performance Benefits
-- **Reduced latency** for model discovery calls
-- **Reduced CPU usage** by avoiding repeated model loading
-- **Better scalability** with reduced backend load
-- **Improved user experience** with faster response times
-
-## 📚 API Usage Examples
-
-### Chat Completion (OpenAI Compatible)
-
-```bash
-curl -X POST http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer YOUR_API_KEY" \
-  -d '{
-    "model": "jan-v1-4b",
-    "messages": [
-      {"role": "user", "content": "Hello, how are you?"}
-    ],
-    "stream": true,
-    "temperature": 0.7,
-    "max_tokens": 1000
-  }'
-```
-
-### Conversation-based Chat Completion
-
-```bash
-curl -X POST http://localhost:8080/v1/conv/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer YOUR_API_KEY" \
-  -d '{
-    "model": "jan-v1-4b",
-    "input": "Hello, how are you?",
-    "conversation_id": "conv_abc123",
-    "stream": true,
-    "temperature": 0.7,
-    "max_tokens": 1000
-  }'
-```
-
-### Create Organization
-
-```bash
-curl -X POST http://localhost:8080/v1/organization \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer YOUR_JWT_TOKEN" \
-  -d '{
-    "name": "My Organization",
-    "description": "A sample organization"
-  }'
-```
-
-### Create API Key
-
-```bash
-curl -X POST http://localhost:8080/v1/organization/{org_id}/api_keys \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer YOUR_JWT_TOKEN" \
-  -d '{
-    "name": "My API Key",
-    "description": "API key for external integrations"
-  }'
-```
-
-### Web Search via MCP
-
-```bash
-curl -X POST http://localhost:8080/v1/mcp \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer YOUR_API_KEY" \
-  -d '{
-    "jsonrpc": "2.0",
-    "id": 1,
-    "method": "tools/call",
-    "params": {
-      "name": "serper_search",
-      "arguments": {
-        "q": "latest AI developments",
-        "num": 5
-      }
-    }
-  }'
-```
-
-## 🔧 Development
-
-### Database Migrations
-
-The project uses Atlas for database migrations. To generate and apply migrations:
-
-1. **Setup migration database**:
-   ```sql
-   CREATE ROLE migration WITH LOGIN PASSWORD 'migration';
-   ALTER ROLE migration WITH SUPERUSER;
-   CREATE DATABASE migration WITH OWNER = migration;
-   ```
-
-2. **Generate migration files**:
-   ```bash
-   # Generate schema files
-   go run ./cmd/codegen/dbmigration
-   
-   # Generate diff SQL
-   atlas schema diff --dev-url "postgres://migration:migration@localhost:5432/migration?sslmode=disable" \
-     --from file://tmp/release.hcl --to file://tmp/main.hcl > tmp/diff.sql
-   ```
-
-3. **Apply migrations**:
-   ```bash
-   # Auto-migration on startup (development)
-   go run ./cmd/server
-   
-   # Manual migration (production)
-   atlas migrate apply --url "your-production-db-url"
-   ```
-
-### Project Structure
-
-```
-jan-api-gateway/
-├── application/                 # Main Go application
-│   ├── app/
-│   │   ├── cmd/server/         # Server entry point
-│   │   ├── domain/             # Business logic and entities
-│   │   ├── infrastructure/     # Database and external services
-│   │   ├── interfaces/         # HTTP handlers and routes
-│   │   └── utils/              # Utilities and helpers
-│   ├── config/                 # Configuration management
-│   ├── docs/                   # Swagger documentation
-│   └── Makefile               # Build automation
-├── docker/                     # Docker configuration
-└── LOCAL_DEV_SETUP.md         # Detailed development setup
-```
-
-### Key Features Implementation
-
-#### Streaming with Server-Sent Events
-The chat completion endpoints implement real-time streaming using Server-Sent Events (SSE) with chunked transfer encoding, providing low-latency responses for AI model interactions. The system supports both content and reasoning content streaming with proper buffering and event sequencing.
-
-#### Multi-Tenant Architecture
-Organizations and projects provide hierarchical access control with fine-grained permissions and resource isolation. API keys can be scoped to organization or project levels with different types (admin, project, organization, service, ephemeral) for various use cases.
-
-#### OpenAI Compatibility
-Full compatibility with OpenAI's chat completion API, including streaming, function calls, tool usage, and all standard parameters (temperature, max_tokens, etc.). The system also supports reasoning content and multimodal inputs.
-
-#### Model Context Protocol (MCP)
-Comprehensive MCP implementation supporting tools, prompts, and resources with JSON-RPC 2.0 protocol. Includes Serper API integration for web search capabilities and webpage fetching functionality.
-
-#### Database Architecture
-- Read/Write replica support with automatic load balancing using GORM dbresolver
-- Transaction management with automatic rollback on errors
-- Generated query interfaces using GORM Gen for type safety
-- Automatic schema migrations with Atlas integration
-- Support for complex data types including JSON fields and relationships
-
-#### Monitoring & Observability
-- Built-in pprof endpoints for performance profiling on port 6060
-- Grafana Pyroscope integration for continuous profiling
-- Structured logging with unique request IDs and comprehensive request/response tracking
-- Automated health checks for inference model endpoints with cron-based monitoring
-- Model registry with dynamic service discovery and health status tracking
-
-## 📊 Monitoring & Profiling
-
-### Health Monitoring
-- **Health Check Endpoint**: `GET /healthcheck` - Basic server health status
-- **Version Endpoint**: `GET /v1/version` - API version information
-- **Automated Model Health Checks**: Cron-based monitoring of inference model endpoints
-- **Database Health**: Automatic connection monitoring with read/write replica support
-
-### Performance Profiling
-- **pprof Endpoints**: Available on port `6060` for performance analysis
-  - CPU profiling: `http://localhost:6060/debug/pprof/profile`
-  - Memory profiling: `http://localhost:6060/debug/pprof/heap`
-  - Goroutine profiling: `http://localhost:6060/debug/pprof/goroutine`
-  - Block profiling: `http://localhost:6060/debug/pprof/block`
-- **Grafana Pyroscope Integration**: Built-in support for continuous profiling
-- **Request Tracing**: Unique request IDs for end-to-end tracing
-
-### Logging
-- **Structured Logging**: JSON-formatted logs with Logrus
-- **Request/Response Logging**: Complete request lifecycle tracking
-- **Error Tracking**: Unique error codes for debugging
-- **Streaming Request Handling**: Special handling for SSE and streaming responses
-
-### Database Monitoring
-- **Read/Write Replica Support**: Automatic load balancing
-- **Connection Pooling**: Optimized database connections
-- **Migration Tracking**: Automatic schema migration monitoring
-- **Transaction Monitoring**: Automatic rollback on errors
-
-
-## 📖 Documentation
-
-- **API Documentation**: Available at `/api/swagger/index.html` when running locally
-- **OpenAI-Style Documentation**: Professional API reference documentation with OpenAI-style layout
-- **Development Setup**: See [LOCAL_DEV_SETUP.md](LOCAL_DEV_SETUP.md) for detailed VS Code/Cursor setup
-- **Architecture**: See the mermaid diagram above for system architecture
-
-### API Structure Overview
-
-The API is organized into the following main groups:
-
-1. **Authentication API** - User authentication and authorization
-2. **Chat Completions API** - Chat completions, models, and MCP functionality
-3. **Conversation-aware Chat API** - Conversation-based chat completions
-4. **Conversations API** - Conversation management and items
-5. **Responses API** - Response tracking and management
-6. **Administration API** - Organization and project management
-7. **Server API** - System information and health checks
-
-### Swagger Documentation
-
-The API documentation is automatically generated from code annotations and includes:
-- Interactive API explorer
-- Request/response examples
-- Authentication requirements
-- Error code documentation
-- Model schemas and validation rules
-
-## 🤝 Contributing
-
-1. Fork the repository
-2. Create a feature branch
-3. Make your changes
-4. Add tests for new functionality
-5. Submit a pull request
diff --git a/apps/jan-api-gateway/application/Makefile b/apps/jan-api-gateway/application/Makefile
deleted file mode 100644
index 79442912..00000000
--- a/apps/jan-api-gateway/application/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-.PHONY: install
-install:
-	@go install github.com/swaggo/swag/cmd/swag@latest
-	@go install github.com/google/wire/cmd/wire@latest
-
-.PHONY: doc
-doc:
-	@swag init --parseDependency -g cmd/server/server.go -o docs
-
-.PHONY: wire
-wire:
-	@wire ./cmd/server
-
-.PHONY: setup
-setup:
-	@make doc
-	@make wire
\ No newline at end of file
diff --git a/apps/jan-api-gateway/application/app/domain/apikey/apikey.go b/apps/jan-api-gateway/application/app/domain/apikey/apikey.go
deleted file mode 100644
index f48487d0..00000000
--- a/apps/jan-api-gateway/application/app/domain/apikey/apikey.go
+++ /dev/null
@@ -1,72 +0,0 @@
-package apikey
-
-import (
-	"context"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-)
-
-type ApikeyType string
-
-const (
-	ApikeyTypeAdmin        ApikeyType = "admin"
-	ApikeyTypeProject      ApikeyType = "project"
-	ApikeyTypeService      ApikeyType = "service"
-	ApikeyTypeOrganization ApikeyType = "organization"
-	ApikeyTypeEphemeral    ApikeyType = "ephemeral"
-)
-
-type ApiKey struct {
-	ID             uint
-	PublicID       string
-	KeyHash        string
-	PlaintextHint  string
-	Description    string
-	Enabled        bool
-	ApikeyType     string // "admin","project","service","organization","ephemeral"
-	OwnerPublicID  string
-	ProjectID      *uint
-	OrganizationID *uint
-	Permissions    string //json
-	ExpiresAt      *time.Time
-	CreatedAt      time.Time
-	UpdatedAt      time.Time
-	LastUsedAt     *time.Time
-}
-
-func (k *ApiKey) Revoke() {
-	k.Enabled = false
-	k.UpdatedAt = time.Now()
-}
-
-func (k *ApiKey) IsValid() bool {
-	if !k.Enabled {
-		return false
-	}
-	if k.ExpiresAt != nil && k.ExpiresAt.Before(time.Now()) {
-		return false
-	}
-	return true
-}
-
-type ApiKeyFilter struct {
-	KeyHash        *string
-	PublicID       *string
-	ApikeyType     *string
-	OwnerPublicID  *string
-	ProjectID      *uint
-	UserID         *uint
-	OrganizationID *uint
-}
-
-type ApiKeyRepository interface {
-	Create(ctx context.Context, u *ApiKey) error
-	Update(ctx context.Context, u *ApiKey) error
-	DeleteByID(ctx context.Context, id uint) error
-	FindByID(ctx context.Context, id uint) (*ApiKey, error)
-	FindByKeyHash(ctx context.Context, keyHash string) (*ApiKey, error)
-	FindByFilter(ctx context.Context, filter ApiKeyFilter, pagination *query.Pagination) ([]*ApiKey, error)
-	FindOneByFilter(ctx context.Context, filter ApiKeyFilter) (*ApiKey, error)
-	Count(ctx context.Context, filter ApiKeyFilter) (int64, error)
-}
diff --git a/apps/jan-api-gateway/application/app/domain/apikey/apikey_service.go b/apps/jan-api-gateway/application/app/domain/apikey/apikey_service.go
deleted file mode 100644
index 76964e8f..00000000
--- a/apps/jan-api-gateway/application/app/domain/apikey/apikey_service.go
+++ /dev/null
@@ -1,114 +0,0 @@
-package apikey
-
-import (
-	"crypto/hmac"
-	"crypto/sha256"
-	"encoding/hex"
-	"fmt"
-
-	"golang.org/x/net/context"
-	"menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-type ApiKeyService struct {
-	repo                ApiKeyRepository
-	organizationService *organization.OrganizationService
-}
-
-func NewService(
-	repo ApiKeyRepository,
-	organizationService *organization.OrganizationService,
-) *ApiKeyService {
-	return &ApiKeyService{
-		repo,
-		organizationService,
-	}
-}
-
-const ApikeyPrefix = "sk"
-
-func (s *ApiKeyService) GenerateKeyAndHash(ctx context.Context, ownerType ApikeyType) (string, string, error) {
-	prefix := fmt.Sprintf("%s-%s", ApikeyPrefix, ownerType)
-	baseKey, err := idgen.GenerateSecureID(prefix, 24)
-	if err != nil {
-		return "", "", err
-	}
-
-	hash := s.HashKey(ctx, baseKey)
-	return baseKey, hash, nil
-}
-
-func (s *ApiKeyService) generatePublicID() (string, error) {
-	return idgen.GenerateSecureID("key", 16)
-}
-
-func (s *ApiKeyService) HashKey(ctx context.Context, key string) string {
-	h := hmac.New(sha256.New, []byte(environment_variables.EnvironmentVariables.APIKEY_SECRET))
-	h.Write([]byte(key))
-
-	return hex.EncodeToString(h.Sum(nil))
-}
-
-func (s *ApiKeyService) CreateApiKey(ctx context.Context, apiKey *ApiKey) (*ApiKey, error) {
-	publicId, err := s.generatePublicID()
-	if err != nil {
-		return nil, err
-	}
-	apiKey.PublicID = publicId
-	if err := s.repo.Create(ctx, apiKey); err != nil {
-		return nil, err
-	}
-	return apiKey, nil
-}
-
-func (s *ApiKeyService) Delete(ctx context.Context, apiKey *ApiKey) error {
-	if err := s.repo.DeleteByID(ctx, apiKey.ID); err != nil {
-		return err
-	}
-	return nil
-}
-
-func (s *ApiKeyService) FindById(ctx context.Context, id uint) (*ApiKey, error) {
-	return s.repo.FindByID(ctx, id)
-}
-
-func (s *ApiKeyService) FindByPublicID(ctx context.Context, publicID string) (*ApiKey, error) {
-	entities, err := s.repo.FindByFilter(ctx, ApiKeyFilter{
-		PublicID: &publicID,
-	}, nil)
-	if err != nil {
-		return nil, err
-	}
-	if len(entities) != 1 {
-		return nil, fmt.Errorf("record not found")
-	}
-	return entities[0], nil
-}
-
-func (s *ApiKeyService) FindByKeyHash(ctx context.Context, key string) (*ApiKey, error) {
-	return s.repo.FindByKeyHash(ctx, key)
-}
-
-func (s *ApiKeyService) FindByKey(ctx context.Context, key string) (*ApiKey, error) {
-	return s.repo.FindByKeyHash(ctx, s.HashKey(ctx, key))
-}
-
-func (s *ApiKeyService) Find(ctx context.Context, filter ApiKeyFilter, p *query.Pagination) ([]*ApiKey, error) {
-	return s.repo.FindByFilter(ctx, filter, p)
-}
-
-func (s *ApiKeyService) Count(ctx context.Context, filter ApiKeyFilter) (int64, error) {
-	return s.repo.Count(ctx, filter)
-}
-
-func (s *ApiKeyService) Save(ctx context.Context, entity *ApiKey) error {
-	return s.repo.Update(ctx, entity)
-}
-
-func (s *ApiKeyService) FindOneByFilter(ctx context.Context, filter ApiKeyFilter) (*ApiKey, error) {
-	return s.repo.FindOneByFilter(ctx, filter)
-}
diff --git a/apps/jan-api-gateway/application/app/domain/auth/auth_service.go b/apps/jan-api-gateway/application/app/domain/auth/auth_service.go
deleted file mode 100644
index 7c4ece98..00000000
--- a/apps/jan-api-gateway/application/app/domain/auth/auth_service.go
+++ /dev/null
@@ -1,622 +0,0 @@
-package auth
-
-import (
-	"context"
-	"fmt"
-	"net/http"
-	"strings"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	"github.com/golang-jwt/jwt/v5"
-
-	"menlo.ai/jan-api-gateway/app/domain/apikey"
-	"menlo.ai/jan-api-gateway/app/domain/invite"
-	"menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/domain/project"
-
-	"menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/requests"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-type AuthService struct {
-	userService         *user.UserService
-	apiKeyService       *apikey.ApiKeyService
-	organizationService *organization.OrganizationService
-	projectService      *project.ProjectService
-	inviteService       *invite.InviteService
-}
-
-func NewAuthService(
-	userService *user.UserService,
-	apiKeyService *apikey.ApiKeyService,
-	organizationService *organization.OrganizationService,
-	projectService *project.ProjectService,
-	inviteService *invite.InviteService,
-) *AuthService {
-	return &AuthService{
-		userService,
-		apiKeyService,
-		organizationService,
-		projectService,
-		inviteService,
-	}
-}
-
-const AccessTokenExpirationDuration = 15 * time.Minute
-const RefreshTokenExpirationDuration = 7 * 24 * time.Hour
-
-type UserContextKey string
-
-const (
-	UserContextKeyEntity UserContextKey = "UserContextKeyEntity"
-	UserContextKeyID     UserContextKey = "UserContextKeyID"
-)
-
-func (s *AuthService) InitOrganization(ctx context.Context) error {
-	orgEntity, err := s.organizationService.FindOrCreateDefaultOrganization(ctx)
-	if err != nil {
-		return err
-	}
-	// set DEFAULT_ORGANIZATION
-	organization.UpdateDefaultOrganization(orgEntity)
-
-	emails := environment_variables.EnvironmentVariables.ORGANIZATION_ADMIN_EMAILS
-	if len(emails) == 0 {
-		return fmt.Errorf("no ORGANIZATION_ADMIN_EMAILS configured")
-	}
-
-	for _, rawEmail := range emails {
-		email := strings.TrimSpace(rawEmail)
-		if email == "" {
-			continue
-		}
-
-		admin, err := s.userService.FindByEmail(ctx, email)
-		if err != nil {
-			return err
-		}
-		if admin == nil {
-			admin, err = s.RegisterUser(ctx, &user.User{
-				Name:    "Admin",
-				Email:   email,
-				IsGuest: false,
-				Enabled: true,
-			})
-			if err != nil {
-				return err
-			}
-		}
-
-		err = s.organizationService.AddMember(ctx, &organization.OrganizationMember{
-			UserID:         admin.ID,
-			OrganizationID: orgEntity.ID,
-			Role:           organization.OrganizationMemberRoleOwner,
-		})
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-func (s *AuthService) RegisterUser(ctx context.Context, user *user.User) (*user.User, error) {
-	_, err := s.userService.RegisterUser(ctx, user)
-	if err != nil {
-		return nil, err
-	}
-	projEntity, err := s.projectService.CreateProjectWithPublicID(ctx, &project.Project{
-		Name:           "Default Project",
-		Status:         string(project.ProjectStatusActive),
-		OrganizationID: organization.DEFAULT_ORGANIZATION.ID,
-	})
-	if err != nil {
-		return nil, err
-	}
-	err = s.projectService.AddMember(ctx, &project.ProjectMember{
-		ProjectID: projEntity.ID,
-		UserID:    user.ID,
-		Role:      string(project.ProjectMemberRoleOwner),
-	})
-	if err != nil {
-		return nil, err
-	}
-	return user, nil
-}
-
-func (s *AuthService) FindOrRegisterUser(ctx context.Context, user *user.User) (*user.User, error) {
-	userEntity, err := s.userService.FindByEmail(ctx, user.Email)
-	if err != nil {
-		return nil, err
-	}
-	if userEntity != nil {
-		return userEntity, nil
-	}
-	return s.RegisterUser(ctx, user)
-}
-
-func (s *AuthService) HasOrganizationUser(ctx context.Context, email string, orgID uint) (bool, error) {
-	user, err := s.userService.FindByEmail(ctx, email)
-	if err != nil {
-		return false, err
-	}
-	if user == nil {
-		return false, nil
-	}
-	member, err := s.organizationService.FindOneMemberByFilter(ctx, organization.OrganizationMemberFilter{
-		UserID:         &user.ID,
-		OrganizationID: &orgID,
-	})
-	if err != nil {
-		return false, err
-	}
-	if member != nil {
-		return true, nil
-	}
-	return false, nil
-}
-
-func (s *AuthService) JWTAuthMiddleware() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		userId, ok := s.getUserPublicIDFromJWT(reqCtx)
-		if !ok {
-			return
-		}
-		SetUserIDToContext(reqCtx, userId)
-		reqCtx.Next()
-	}
-}
-
-// Retrieve the user's public ID from the header.
-func (s *AuthService) AppUserAuthMiddleware() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		userId, ok := s.getUserPublicIDFromJWT(reqCtx)
-		if ok {
-			SetUserIDToContext(reqCtx, userId)
-			reqCtx.Next()
-			return
-		}
-		userId, ok = s.getUserIDFromApikey(reqCtx)
-		if ok {
-			SetUserIDToContext(reqCtx, userId)
-			reqCtx.Next()
-			return
-		}
-
-		reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-			Code: "019947f0-eca1-7474-8ed2-09d6e5389b54",
-		})
-	}
-}
-
-func (s *AuthService) AdminUserAuthMiddleware() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		userId, ok := s.getUserPublicIDFromJWT(reqCtx)
-		if ok {
-			SetUserIDToContext(reqCtx, userId)
-			reqCtx.Next()
-			return
-		}
-		userId, ok = s.getUserIDFromAdminkey(reqCtx)
-		if ok {
-			SetUserIDToContext(reqCtx, userId)
-			reqCtx.Next()
-			return
-		}
-
-		reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-			Code: "4026757e-d5a4-4cf7-8914-2c96f011084f",
-		})
-	}
-}
-
-// Verify user from public ID
-func (s *AuthService) RegisteredUserMiddleware() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		ctx := reqCtx.Request.Context()
-		userPublicId, ok := GetUserIDFromContext(reqCtx)
-		if !ok {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "3296ce86-783b-4c05-9fdb-930d3713024e",
-			})
-			return
-		}
-		if userPublicId == "" {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "80e1017d-038a-48c1-9de7-c3cdffdddb95",
-			})
-			return
-		}
-		user, err := s.userService.FindByPublicID(ctx, userPublicId)
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "6272df83-f538-421b-93ba-c2b6f6d39f39",
-			})
-			return
-		}
-		if user == nil {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "b1ef40e7-9db9-477d-bb59-f3783585195d",
-			})
-			return
-		}
-		SetUserToContext(reqCtx, user)
-		reqCtx.Next()
-	}
-}
-
-var OrganizationMemberRuleOwnerOnly = map[string]bool{
-	string(organization.OrganizationMemberRoleOwner): true,
-}
-
-var OrganizationMemberRuleAll = map[string]bool{
-	string(organization.OrganizationMemberRoleOwner):  true,
-	string(organization.OrganizationMemberRoleReader): true,
-}
-
-func (s *AuthService) getDefaultOrganizationMember(reqCtx *gin.Context) (*organization.OrganizationMember, bool) {
-	ctx := reqCtx.Request.Context()
-	member, ok := GetAdminOrganizationMemberFromContext(reqCtx)
-	if ok {
-		return member, ok
-	}
-	user, ok := GetUserFromContext(reqCtx)
-	if !ok || user == nil {
-		return nil, false
-	}
-	membership, err := s.organizationService.FindOneMemberByFilter(ctx, organization.OrganizationMemberFilter{
-		UserID:         &user.ID,
-		OrganizationID: &organization.DEFAULT_ORGANIZATION.ID,
-	})
-	if err != nil || membership == nil {
-		return nil, false
-	}
-	return membership, true
-}
-
-func (s *AuthService) DefaultOrganizationMemberOptionalMiddleware() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		SetAdminOrganizationToContext(reqCtx, organization.DEFAULT_ORGANIZATION)
-		membership, ok := s.getDefaultOrganizationMember(reqCtx)
-		if ok {
-			SetAdminOrganizationMemberToContext(reqCtx, membership)
-		}
-		reqCtx.Next()
-	}
-}
-
-func (s *AuthService) OrganizationMemberRoleMiddleware(rolesAllowed map[string]bool) gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		SetAdminOrganizationToContext(reqCtx, organization.DEFAULT_ORGANIZATION)
-		membership, ok := s.getDefaultOrganizationMember(reqCtx)
-		if !ok {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "983a8764-6888-450d-a5d5-7442c3904637",
-			})
-			return
-		}
-		ok, exists := rolesAllowed[string(membership.Role)]
-		if !exists || !ok {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "f0167776-febc-4fc0-a7c1-13a3ba1673ce",
-			})
-			return
-		}
-		reqCtx.Next()
-	}
-}
-
-func (s *AuthService) getUserPublicIDFromJWT(reqCtx *gin.Context) (string, bool) {
-	tokenString, ok := requests.GetTokenFromBearer(reqCtx)
-	if !ok {
-		return "", false
-	}
-	token, err := jwt.ParseWithClaims(tokenString, &UserClaim{}, func(token *jwt.Token) (interface{}, error) {
-		return environment_variables.EnvironmentVariables.JWT_SECRET, nil
-	})
-	if err != nil || !token.Valid {
-		return "", false
-	}
-	claims, ok := token.Claims.(*UserClaim)
-	if !ok {
-		return "", false
-	}
-	return claims.ID, true
-}
-
-func (s *AuthService) getUserIDFromApikey(reqCtx *gin.Context) (string, bool) {
-	tokenString, ok := requests.GetTokenFromBearer(reqCtx)
-	if !ok {
-		return "", false
-	}
-	if !strings.HasPrefix(tokenString, apikey.ApikeyPrefix) {
-		return "", false
-	}
-	token, ok := requests.GetTokenFromBearer(reqCtx)
-	if !ok {
-		return "", false
-	}
-	ctx := reqCtx.Request.Context()
-	hashed := s.apiKeyService.HashKey(reqCtx, token)
-	apikeyEntity, err := s.apiKeyService.FindByKeyHash(ctx, hashed)
-	if err != nil {
-		return "", false
-	}
-	if apikeyEntity == nil || apikeyEntity.ApikeyType == string(apikey.ApikeyTypeAdmin) {
-		return "", false
-	}
-	return apikeyEntity.OwnerPublicID, true
-}
-
-func (s *AuthService) getUserIDFromAdminkey(reqCtx *gin.Context) (string, bool) {
-	tokenString, ok := requests.GetTokenFromBearer(reqCtx)
-	if !ok {
-		return "", false
-	}
-	if !strings.HasPrefix(tokenString, apikey.ApikeyPrefix) {
-		return "", false
-	}
-	ctx := reqCtx.Request.Context()
-	hashed := s.apiKeyService.HashKey(reqCtx, tokenString)
-	apikeyEntity, err := s.apiKeyService.FindByKeyHash(ctx, hashed)
-	if err != nil {
-		return "", false
-	}
-	if apikeyEntity == nil || apikeyEntity.ApikeyType != string(apikey.ApikeyTypeAdmin) {
-		return "", false
-	}
-
-	return apikeyEntity.OwnerPublicID, true
-}
-
-func GetUserFromContext(reqCtx *gin.Context) (*user.User, bool) {
-	v, ok := reqCtx.Get(string(UserContextKeyEntity))
-	if !ok {
-		return nil, false
-	}
-	return v.(*user.User), true
-}
-
-func SetUserToContext(reqCtx *gin.Context, user *user.User) {
-	reqCtx.Set(string(UserContextKeyEntity), user)
-}
-
-func GetUserIDFromContext(reqCtx *gin.Context) (string, bool) {
-	userId, ok := reqCtx.Get(string(UserContextKeyID))
-	if !ok {
-		return "", false
-	}
-	v, ok := userId.(string)
-	if !ok {
-		return "", false
-	}
-	return v, true
-}
-
-func SetUserIDToContext(reqCtx *gin.Context, v string) {
-	reqCtx.Set(string(UserContextKeyID), v)
-}
-
-type ApikeyContextKey string
-
-const (
-	ApikeyContextKeyEntity   ApikeyContextKey = "ApikeyContextKeyEntity"
-	ApikeyContextKeyPublicID ApikeyContextKey = "apikey_public_id"
-)
-
-func (s *AuthService) GetAdminApiKeyFromQuery() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		ctx := reqCtx.Request.Context()
-		user, ok := GetUserFromContext(reqCtx)
-		if !ok {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "72ca928d-bd8b-44f8-af70-1a9e33b58295",
-			})
-			return
-		}
-
-		publicID := reqCtx.Param(string(ApikeyContextKeyPublicID))
-		if publicID == "" {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code:  "9c6ed28c-1dab-4fab-945a-f0efa2dec1eb",
-				Error: "missing apikey public ID",
-			})
-			return
-		}
-		adminKeyEntity, err := s.apiKeyService.FindOneByFilter(ctx, apikey.ApiKeyFilter{
-			PublicID: &publicID,
-		})
-
-		if adminKeyEntity == nil || err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-				Code: "f4f47443-0c80-4c7a-bedc-ac30ec49f494",
-			})
-			return
-		}
-
-		memberEntity, err := s.organizationService.FindOneMemberByFilter(ctx, organization.OrganizationMemberFilter{
-			UserID:         &user.ID,
-			OrganizationID: adminKeyEntity.OrganizationID,
-		})
-
-		if memberEntity == nil || err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "56a9fa87-ddd7-40b7-b2d6-94ae41a600f8",
-			})
-			return
-		}
-		SetAdminKeyToContext(reqCtx, adminKeyEntity)
-	}
-}
-
-func GetAdminKeyFromContext(reqCtx *gin.Context) (*apikey.ApiKey, bool) {
-	apiKey, ok := reqCtx.Get(string(ApikeyContextKeyEntity))
-	if !ok {
-		return nil, false
-	}
-	v, ok := apiKey.(*apikey.ApiKey)
-	if !ok {
-		return nil, false
-	}
-	return v, true
-}
-
-func SetAdminKeyToContext(reqCtx *gin.Context, apiKey *apikey.ApiKey) {
-	reqCtx.Set(string(ApikeyContextKeyEntity), apiKey)
-}
-
-type OrganizationContextKey string
-
-const (
-	OrganizationContextKeyEntity       ApikeyContextKey = "OrganizationContextKeyEntity"
-	OrganizationContextKeyPublicID     ApikeyContextKey = "org_public_id"
-	OrganizationContextKeyMemberEntity ApikeyContextKey = "OrganizationContextKeyMemberEntity"
-)
-
-func GetAdminOrganizationFromContext(reqCtx *gin.Context) (*organization.Organization, bool) {
-	org, ok := reqCtx.Get(string(OrganizationContextKeyEntity))
-	if !ok {
-		return nil, false
-	}
-	v, ok := org.(*organization.Organization)
-	if !ok {
-		return nil, false
-	}
-	return v, true
-}
-
-func SetAdminOrganizationToContext(reqCtx *gin.Context, org *organization.Organization) {
-	reqCtx.Set(string(OrganizationContextKeyEntity), org)
-}
-
-func GetAdminOrganizationMemberFromContext(reqCtx *gin.Context) (*organization.OrganizationMember, bool) {
-	org, ok := reqCtx.Get(string(OrganizationContextKeyMemberEntity))
-	if !ok {
-		return nil, false
-	}
-	v, ok := org.(*organization.OrganizationMember)
-	if !ok {
-		return nil, false
-	}
-	return v, true
-}
-
-func SetAdminOrganizationMemberToContext(reqCtx *gin.Context, org *organization.OrganizationMember) {
-	reqCtx.Set(string(OrganizationContextKeyMemberEntity), org)
-}
-
-type ProjectContextKey string
-
-const (
-	ProjectContextKeyPublicID ProjectContextKey = "proj_public_id"
-	ProjectContextKeyEntity   ProjectContextKey = "ProjectContextKeyEntity"
-)
-
-func GetProjectFromContext(reqCtx *gin.Context) (*project.Project, bool) {
-	proj, ok := reqCtx.Get(string(ProjectContextKeyEntity))
-	if !ok {
-		return nil, false
-	}
-	return proj.(*project.Project), true
-}
-
-func SetProjectToContext(reqCtx *gin.Context, project *project.Project) {
-	reqCtx.Set(string(ProjectContextKeyEntity), project)
-}
-
-func (s *AuthService) AdminProjectMiddleware() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		ctx := reqCtx.Request.Context()
-		user, ok := GetUserFromContext(reqCtx)
-		if !ok {
-			return
-		}
-		orgEntity, ok := GetAdminOrganizationFromContext(reqCtx)
-		if !ok {
-			return
-		}
-		publicID := reqCtx.Param(string(ProjectContextKeyPublicID))
-		if publicID == "" {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code:  "5cbdb58e-6228-4d9a-9893-7f744608a9e8",
-				Error: "missing project public ID",
-			})
-			return
-		}
-		projectFilter := project.ProjectFilter{
-			PublicID:       &publicID,
-			OrganizationID: &orgEntity.ID,
-		}
-		_, ok = GetAdminOrganizationMemberFromContext(reqCtx)
-		if !ok {
-			projectFilter.MemberID = &user.ID
-		}
-		proj, err := s.projectService.FindOne(ctx, projectFilter)
-		if err != nil || proj == nil {
-			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-				Code:  "121ef112-cb39-4235-9500-b116adb69984",
-				Error: "proj not found",
-			})
-			return
-		}
-		SetProjectToContext(reqCtx, proj)
-		reqCtx.Next()
-	}
-}
-
-type InviteContextKey string
-
-const (
-	InviteContextKeyPublicID InviteContextKey = "invite_public_id"
-	InviteContextKeyEntity   InviteContextKey = "InviteContextKeyEntity"
-)
-
-func GetAdminInviteFromContext(reqCtx *gin.Context) (*invite.Invite, bool) {
-	i, ok := reqCtx.Get(string(InviteContextKeyEntity))
-	if !ok {
-		return nil, false
-	}
-	v, ok := i.(*invite.Invite)
-	if !ok {
-		return nil, false
-	}
-	return v, true
-}
-
-func SetAdminInviteToContext(reqCtx *gin.Context, i *invite.Invite) {
-	reqCtx.Set(string(InviteContextKeyEntity), i)
-}
-
-func (s *AuthService) AdminInviteMiddleware() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		ctx := reqCtx.Request.Context()
-		orgEntity, ok := GetAdminOrganizationFromContext(reqCtx)
-		if !ok {
-			return
-		}
-		publicID := reqCtx.Param(string(InviteContextKeyPublicID))
-		if publicID == "" {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code:  "5cbdb58e-6228-4d9a-9893-7f744608a9e8",
-				Error: "missing invite public ID",
-			})
-			return
-		}
-
-		inviteEntity, err := s.inviteService.FindOne(ctx, invite.InvitesFilter{
-			PublicID:       &publicID,
-			OrganizationID: &orgEntity.ID,
-		})
-		if err != nil || inviteEntity == nil {
-			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-				Code:  "2daa8be0-df7d-4faa-ba4d-00c4dae8ceae",
-				Error: "invite not found",
-			})
-			return
-		}
-		SetAdminInviteToContext(reqCtx, inviteEntity)
-		reqCtx.Next()
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/domain/auth/claim.go b/apps/jan-api-gateway/application/app/domain/auth/claim.go
deleted file mode 100644
index ed2320e9..00000000
--- a/apps/jan-api-gateway/application/app/domain/auth/claim.go
+++ /dev/null
@@ -1,65 +0,0 @@
-package auth
-
-import (
-	"fmt"
-
-	"github.com/gin-gonic/gin"
-	"github.com/golang-jwt/jwt/v5"
-
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-const RefreshTokenKey = "jan_refresh_token"
-const OAuthStateKey = "jan_oauth_state"
-const ContextUserClaim = "context_user_claim"
-
-type UserClaim struct {
-	Email string
-	Name  string
-	ID    string
-	jwt.RegisteredClaims
-}
-
-func CreateJwtSignedString(u UserClaim) (string, error) {
-	token := jwt.NewWithClaims(jwt.SigningMethodHS512, u)
-	return token.SignedString(environment_variables.EnvironmentVariables.JWT_SECRET)
-}
-
-func GetUserClaimFromRequestContext(reqCtx *gin.Context) (*UserClaim, error) {
-	userClaim, ok := reqCtx.Get(ContextUserClaim)
-	if !ok {
-		return nil, fmt.Errorf("userclaim not found in context")
-	}
-	u, ok := userClaim.(*UserClaim)
-	if !ok {
-		return nil, fmt.Errorf("invalid user claim in context: expected *auth.UserClaim, got %T", userClaim)
-	}
-	return u, nil
-}
-
-func GetUserClaimFromRefreshToken(reqCtx *gin.Context) (*UserClaim, bool) {
-	refreshTokenString, err := reqCtx.Cookie(RefreshTokenKey)
-	if err != nil {
-		return nil, false
-	}
-
-	token, err := jwt.ParseWithClaims(refreshTokenString, &UserClaim{}, func(token *jwt.Token) (interface{}, error) {
-		return environment_variables.EnvironmentVariables.JWT_SECRET, nil
-	})
-	if err != nil {
-		return nil, false
-	}
-
-	if !token.Valid {
-		return nil, false
-	}
-
-	claims, ok := token.Claims.(*UserClaim)
-	if !ok {
-		return nil, false
-	}
-	if claims.ID == "" {
-		return nil, false
-	}
-	return claims, true
-}
diff --git a/apps/jan-api-gateway/application/app/domain/common/error.go b/apps/jan-api-gateway/application/app/domain/common/error.go
deleted file mode 100644
index dc35ea58..00000000
--- a/apps/jan-api-gateway/application/app/domain/common/error.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package common
-
-import "fmt"
-
-// Error represents a standardized error with code and underlying error
-type Error struct {
-	Err  error  `json:"-"`
-	Code string `json:"code"`
-}
-
-// NewError creates a new Error instance from an existing error
-func NewError(err error, code string) *Error {
-	return &Error{
-		Err:  err,
-		Code: code,
-	}
-}
-
-// NewErrorWithMessage creates a new Error instance with a custom message
-func NewErrorWithMessage(message string, code string) *Error {
-	return &Error{
-		Err:  fmt.Errorf("%s", message),
-		Code: code,
-	}
-}
-
-// Error implements the error interface
-func (e *Error) Error() string {
-	if e.Err != nil {
-		return e.Err.Error()
-	}
-	return ""
-}
-
-// String returns the string representation of the error
-func (e *Error) String() string {
-	return e.Error()
-}
-
-// GetMessage returns the error message from the underlying error
-func (e *Error) GetMessage() string {
-	if e.Err != nil {
-		return e.Err.Error()
-	}
-	return ""
-}
-
-// GetCode returns the error code
-func (e *Error) GetCode() string {
-	return e.Code
-}
-
-// GetCode returns the error code
-func (e *Error) GetError() error {
-	return e.Err
-}
diff --git a/apps/jan-api-gateway/application/app/domain/conversation/conversation.go b/apps/jan-api-gateway/application/app/domain/conversation/conversation.go
deleted file mode 100644
index 7e121e80..00000000
--- a/apps/jan-api-gateway/application/app/domain/conversation/conversation.go
+++ /dev/null
@@ -1,243 +0,0 @@
-package conversation
-
-import (
-	"context"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-)
-
-type ConversationStatus string
-
-const (
-	ConversationStatusActive   ConversationStatus = "active"
-	ConversationStatusArchived ConversationStatus = "archived"
-	ConversationStatusDeleted  ConversationStatus = "deleted"
-)
-
-// @Enum(message, function_call, function_call_output)
-type ItemType string
-
-const (
-	ItemTypeMessage      ItemType = "message"
-	ItemTypeFunction     ItemType = "function_call"
-	ItemTypeFunctionCall ItemType = "function_call_output"
-)
-
-func ValidateItemType(input string) bool {
-	switch ItemType(input) {
-	case ItemTypeMessage, ItemTypeFunction, ItemTypeFunctionCall:
-		return true
-	default:
-		return false
-	}
-}
-
-// @Enum(system, user, assistant, tool)
-type ItemRole string
-
-const (
-	ItemRoleSystem    ItemRole = "system"
-	ItemRoleUser      ItemRole = "user"
-	ItemRoleAssistant ItemRole = "assistant"
-	ItemRoleTool      ItemRole = "tool"
-)
-
-func ValidateItemRole(input string) bool {
-	switch ItemRole(input) {
-	case ItemRoleSystem, ItemRoleUser, ItemRoleAssistant, ItemRoleTool:
-		return true
-	default:
-		return false
-	}
-}
-
-// @Enum(pending, in_progress, completed, failed, cancelled)
-type ItemStatus string
-
-const (
-	ItemStatusPending    ItemStatus = "pending"
-	ItemStatusInProgress ItemStatus = "in_progress"
-	ItemStatusCompleted  ItemStatus = "completed"
-	ItemStatusFailed     ItemStatus = "failed"
-	ItemStatusCancelled  ItemStatus = "cancelled"
-)
-
-func ValidateItemStatus(input string) bool {
-	switch ItemStatus(input) {
-	case ItemStatusPending, ItemStatusInProgress, ItemStatusCompleted, ItemStatusFailed, ItemStatusCancelled:
-		return true
-	default:
-		return false
-	}
-}
-
-// ToItemStatusPtr returns a pointer to the given ItemStatus
-func ToItemStatusPtr(s ItemStatus) *ItemStatus {
-	return &s
-}
-
-// ItemStatusToStringPtr converts *ItemStatus to *string
-func ItemStatusToStringPtr(s *ItemStatus) *string {
-	if s == nil {
-		return nil
-	}
-	str := string(*s)
-	return &str
-}
-
-type Item struct {
-	ID                uint               `json:"-"`
-	ConversationID    uint               `json:"-"`
-	PublicID          string             `json:"id"`
-	Type              ItemType           `json:"type"`
-	Role              *ItemRole          `json:"role,omitempty"`
-	Content           []Content          `json:"content,omitempty"`
-	Status            *ItemStatus        `json:"status,omitempty"`
-	IncompleteAt      *time.Time         `json:"incomplete_at,omitempty"`
-	IncompleteDetails *IncompleteDetails `json:"incomplete_details,omitempty"`
-	CompletedAt       *time.Time         `json:"completed_at,omitempty"`
-	ResponseID        *uint              `json:"-"`
-	CreatedAt         time.Time          `json:"created_at"`
-}
-
-type Content struct {
-	Type             string        `json:"type"`
-	FinishReason     *string       `json:"finish_reason,omitempty"`     // Finish reason
-	Text             *Text         `json:"text,omitempty"`              // Generic text content
-	InputText        *string       `json:"input_text,omitempty"`        // User input text (simple)
-	OutputText       *OutputText   `json:"output_text,omitempty"`       // AI output text (with annotations)
-	ReasoningContent *string       `json:"reasoning_content,omitempty"` // AI reasoning content
-	Image            *ImageContent `json:"image,omitempty"`             // Image content
-	File             *FileContent  `json:"file,omitempty"`              // File content
-}
-
-// Generic text content (backward compatibility)
-type Text struct {
-	Value       string       `json:"value"`
-	Annotations []Annotation `json:"annotations,omitempty"`
-}
-
-type OutputText struct {
-	Text        string       `json:"text"`
-	Annotations []Annotation `json:"annotations"`        // Required for OpenAI compatibility
-	LogProbs    []LogProb    `json:"logprobs,omitempty"` // Token probabilities
-}
-
-// Image content for multimodal support
-type ImageContent struct {
-	URL    string `json:"url,omitempty"`
-	FileID string `json:"file_id,omitempty"`
-	Detail string `json:"detail,omitempty"` // "low", "high", "auto"
-}
-
-// File content for attachments
-type FileContent struct {
-	FileID   string `json:"file_id"`
-	Name     string `json:"name,omitempty"`
-	MimeType string `json:"mime_type,omitempty"`
-	Size     int64  `json:"size,omitempty"`
-}
-
-type Annotation struct {
-	Type       string `json:"type"`              // "file_citation", "url_citation", etc.
-	Text       string `json:"text,omitempty"`    // Display text
-	FileID     string `json:"file_id,omitempty"` // For file citations
-	URL        string `json:"url,omitempty"`     // For URL citations
-	StartIndex int    `json:"start_index"`
-	EndIndex   int    `json:"end_index"`
-	Index      int    `json:"index,omitempty"` // Citation index
-}
-
-// Log probability for AI responses
-type LogProb struct {
-	Token       string       `json:"token"`
-	LogProb     float64      `json:"logprob"`
-	Bytes       []int        `json:"bytes,omitempty"`
-	TopLogProbs []TopLogProb `json:"top_logprobs,omitempty"`
-}
-
-type TopLogProb struct {
-	Token   string  `json:"token"`
-	LogProb float64 `json:"logprob"`
-	Bytes   []int   `json:"bytes,omitempty"`
-}
-
-type IncompleteDetails struct {
-	Reason string `json:"reason"`
-}
-
-type Conversation struct {
-	ID        uint               `json:"-"`
-	PublicID  string             `json:"id"` // OpenAI-compatible string ID like "conv_abc123"
-	Title     *string            `json:"title,omitempty"`
-	UserID    uint               `json:"-"`
-	Status    ConversationStatus `json:"status"`
-	Items     []Item             `json:"items,omitempty"`
-	Metadata  map[string]string  `json:"metadata,omitempty"`
-	IsPrivate bool               `json:"is_private"`
-	CreatedAt time.Time          `json:"created_at"` // Unix timestamp for OpenAI compatibility
-	UpdatedAt time.Time          `json:"updated_at"` // Unix timestamp for OpenAI compatibility
-}
-
-type ConversationFilter struct {
-	PublicID *string
-	UserID   *uint
-}
-
-type ItemFilter struct {
-	PublicID       *string
-	ConversationID *uint
-	Role           *ItemRole
-	ResponseID     *uint
-}
-
-type ConversationRepository interface {
-	Create(ctx context.Context, conversation *Conversation) error
-	FindByFilter(ctx context.Context, filter ConversationFilter, pagination *query.Pagination) ([]*Conversation, error)
-	Count(ctx context.Context, filter ConversationFilter) (int64, error)
-	FindByID(ctx context.Context, id uint) (*Conversation, error)
-	FindByPublicID(ctx context.Context, publicID string) (*Conversation, error)
-	Update(ctx context.Context, conversation *Conversation) error
-	Delete(ctx context.Context, id uint) error
-	AddItem(ctx context.Context, conversationID uint, item *Item) error
-	SearchItems(ctx context.Context, conversationID uint, query string) ([]*Item, error)
-	BulkAddItems(ctx context.Context, conversationID uint, items []*Item) error
-}
-
-type ItemRepository interface {
-	Create(ctx context.Context, item *Item) error
-	FindByID(ctx context.Context, id uint) (*Item, error)
-	FindByPublicID(ctx context.Context, publicID string) (*Item, error) // Find by OpenAI-compatible string ID
-	FindByConversationID(ctx context.Context, conversationID uint) ([]*Item, error)
-	Search(ctx context.Context, conversationID uint, query string) ([]*Item, error)
-	Delete(ctx context.Context, id uint) error
-	BulkCreate(ctx context.Context, items []*Item) error
-	CountByConversation(ctx context.Context, conversationID uint) (int64, error)
-	ExistsByIDAndConversation(ctx context.Context, itemID uint, conversationID uint) (bool, error)
-	FindByFilter(ctx context.Context, filter ItemFilter, pagination *query.Pagination) ([]*Item, error)
-	Count(ctx context.Context, filter ItemFilter) (int64, error)
-}
-
-// NewItem creates a new conversation item with the given parameters
-func NewItem(publicID string, itemType ItemType, role ItemRole, content []Content, conversationID uint, responseID *uint) *Item {
-	return &Item{
-		PublicID:       publicID,
-		Type:           itemType,
-		Role:           &role,
-		Content:        content,
-		ConversationID: conversationID,
-		ResponseID:     responseID,
-		CreatedAt:      time.Now(),
-	}
-}
-
-// NewTextContent creates a new text content item
-func NewTextContent(text string) Content {
-	return Content{
-		Type: "text",
-		Text: &Text{
-			Value: text,
-		},
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/domain/conversation/conversation_service.go b/apps/jan-api-gateway/application/app/domain/conversation/conversation_service.go
deleted file mode 100644
index ca753901..00000000
--- a/apps/jan-api-gateway/application/app/domain/conversation/conversation_service.go
+++ /dev/null
@@ -1,431 +0,0 @@
-package conversation
-
-import (
-	"net/http"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	"golang.org/x/net/context"
-
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-)
-
-type ConversationContextKey string
-
-const (
-	ConversationContextKeyPublicID ConversationContextKey = "conv_public_id"
-	ConversationContextEntity      ConversationContextKey = "ConversationContextEntity"
-)
-
-type ConversationItemContextKey string
-
-const (
-	ConversationItemContextKeyPublicID ConversationItemContextKey = "conv_item_public_id"
-	ConversationItemContextEntity      ConversationItemContextKey = "ConversationItemContextEntity"
-)
-
-type ConversationService struct {
-	conversationRepo ConversationRepository
-	itemRepo         ItemRepository
-	validator        *ConversationValidator
-}
-
-func NewService(conversationRepo ConversationRepository, itemRepo ItemRepository) *ConversationService {
-	// Initialize with default validation config
-	validator := NewConversationValidator(DefaultValidationConfig())
-	return &ConversationService{
-		conversationRepo: conversationRepo,
-		itemRepo:         itemRepo,
-		validator:        validator,
-	}
-}
-
-func (s *ConversationService) FindConversationsByFilter(ctx context.Context, filter ConversationFilter, pagination *query.Pagination) ([]*Conversation, *common.Error) {
-	conversations, err := s.conversationRepo.FindByFilter(ctx, filter, pagination)
-	if err != nil {
-		return nil, common.NewError(err, "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
-	}
-	return conversations, nil
-}
-
-func (s *ConversationService) CountConversationsByFilter(ctx context.Context, filter ConversationFilter) (int64, *common.Error) {
-	count, err := s.conversationRepo.Count(ctx, filter)
-	if err != nil {
-		return 0, common.NewError(err, "b2c3d4e5-f6g7-8901-bcde-f23456789012")
-	}
-	return count, nil
-}
-
-func (s *ConversationService) CreateConversation(ctx context.Context, userID uint, title *string, isPrivate bool, metadata map[string]string) (*Conversation, *common.Error) {
-	if err := s.validator.ValidateConversationInput(title, metadata); err != nil {
-		return nil, common.NewError(err, "c3d4e5f6-g7h8-9012-cdef-345678901234")
-	}
-
-	publicID, err := s.generateConversationPublicID()
-	if err != nil {
-		return nil, common.NewError(err, "d4e5f6g7-h8i9-0123-defg-456789012345")
-	}
-
-	conversation := &Conversation{
-		PublicID:  publicID,
-		Title:     title,
-		UserID:    userID,
-		Status:    ConversationStatusActive,
-		IsPrivate: isPrivate,
-		Metadata:  metadata,
-	}
-
-	if err := s.conversationRepo.Create(ctx, conversation); err != nil {
-		return nil, common.NewError(err, "e5f6g7h8-i9j0-1234-efgh-567890123456")
-	}
-
-	return conversation, nil
-}
-
-// GetConversation retrieves a conversation by its public ID with access control and items loaded
-func (s *ConversationService) GetConversationByPublicIDAndUserID(ctx context.Context, publicID string, userID uint) (*Conversation, *common.Error) {
-	return s.getConversationWithAccessCheck(ctx, publicID, userID)
-}
-
-// GetConversationByID retrieves a conversation by its internal ID without user access control
-func (s *ConversationService) GetConversationByID(ctx context.Context, conversationID uint) (*Conversation, *common.Error) {
-	// Validate inputs
-	if conversationID == 0 {
-		return nil, common.NewErrorWithMessage("Conversation ID cannot be zero", "f6g7h8i9-j0k1-2345-fghi-678901234567")
-	}
-
-	conversation, err := s.conversationRepo.FindByID(ctx, conversationID)
-	if err != nil {
-		return nil, common.NewError(err, "g7h8i9j0-k1l2-3456-ghij-789012345678")
-	}
-	if conversation == nil {
-		return nil, common.NewErrorWithMessage("Conversation not found", "h8i9j0k1-l2m3-4567-hijk-890123456789")
-	}
-
-	return conversation, nil
-}
-
-// getConversationWithAccessCheck is the internal method that handles conversation retrieval with optional item loading
-func (s *ConversationService) getConversationWithAccessCheck(ctx context.Context, publicID string, userID uint) (*Conversation, *common.Error) {
-	// Validate inputs
-	if publicID == "" {
-		return nil, common.NewErrorWithMessage("Public ID cannot be empty", "i9j0k1l2-m3n4-5678-ijkl-901234567890")
-	}
-
-	convs, err := s.conversationRepo.FindByFilter(ctx, ConversationFilter{
-		UserID:   &userID,
-		PublicID: &publicID,
-	}, nil)
-	if err != nil {
-		return nil, common.NewError(err, "j0k1l2m3-n4o5-6789-jklm-012345678901")
-	}
-	if len(convs) != 1 {
-		return nil, common.NewErrorWithMessage("Conversation not found", "k1l2m3n4-o5p6-7890-klmn-123456789012")
-	}
-	return convs[0], nil
-}
-
-func (s *ConversationService) UpdateConversation(ctx context.Context, entity *Conversation) (*Conversation, *common.Error) {
-	if err := s.conversationRepo.Update(ctx, entity); err != nil {
-		return nil, common.NewError(err, "l2m3n4o5-p6q7-8901-lmno-234567890123")
-	}
-	return entity, nil
-}
-
-func (s *ConversationService) DeleteConversation(ctx context.Context, conv *Conversation) (bool, *common.Error) {
-	if err := s.conversationRepo.Delete(ctx, conv.ID); err != nil {
-		return false, common.NewError(err, "m3n4o5p6-q7r8-9012-mnop-345678901234")
-	}
-	return true, nil
-}
-
-func (s *ConversationService) AddItem(ctx context.Context, conversation *Conversation, userID uint, itemType ItemType, role *ItemRole, content []Content) (*Item, *common.Error) {
-	// Check access permissions
-	if conversation.IsPrivate && conversation.UserID != userID {
-		return nil, common.NewErrorWithMessage("Private conversation access denied", "n4o5p6q7-r8s9-0123-nopq-456789012345")
-	}
-
-	if err := s.validator.ValidateItemContent(content); err != nil {
-		return nil, common.NewError(err, "o5p6q7r8-s9t0-1234-opqr-567890123456")
-	}
-
-	itemPublicID, err := s.generateItemPublicID()
-	if err != nil {
-		return nil, common.NewError(err, "p6q7r8s9-t0u1-2345-pqrs-678901234567")
-	}
-
-	item := &Item{
-		PublicID: itemPublicID,
-		Type:     itemType,
-		Role:     role,
-		Content:  content,
-		Status:   ToItemStatusPtr(ItemStatusCompleted),
-	}
-
-	if err := s.conversationRepo.AddItem(ctx, conversation.ID, item); err != nil {
-		return nil, common.NewError(err, "q7r8s9t0-u1v2-3456-qrst-789012345678")
-	}
-
-	// Update conversation timestamp
-	if err := s.updateConversationTimestamp(ctx, conversation, "r8s9t0u1-v2w3-4567-rstu-890123456789"); err != nil {
-		return nil, err
-	}
-
-	return item, nil
-}
-
-// AddItemWithID adds an item to a conversation with a custom public ID
-func (s *ConversationService) AddItemWithID(ctx context.Context, conversation *Conversation, userID uint, itemType ItemType, role *ItemRole, content []Content, customPublicID string) (*Item, *common.Error) {
-	// Check access permissions
-	if conversation.IsPrivate && conversation.UserID != userID {
-		return nil, common.NewErrorWithMessage("Private conversation access denied", "n4o5p6q7-r8s9-0123-nopq-456789012345")
-	}
-
-	if err := s.validator.ValidateItemContent(content); err != nil {
-		return nil, common.NewError(err, "o5p6q7r8-s9t0-1234-opqr-567890123456")
-	}
-
-	item := &Item{
-		PublicID: customPublicID,
-		Type:     itemType,
-		Role:     role,
-		Content:  content,
-		Status:   ToItemStatusPtr(ItemStatusCompleted),
-	}
-
-	if err := s.conversationRepo.AddItem(ctx, conversation.ID, item); err != nil {
-		return nil, common.NewError(err, "q7r8s9t0-u1v2-3456-qrst-789012345678")
-	}
-
-	// Update conversation timestamp
-	if err := s.updateConversationTimestamp(ctx, conversation, "r8s9t0u1-v2w3-4567-rstu-890123456789"); err != nil {
-		return nil, err
-	}
-
-	return item, nil
-}
-
-// DeleteItemWithConversation deletes an item by its ID and updates the conversation accordingly.
-func (s *ConversationService) DeleteItemWithConversation(ctx context.Context, conversation *Conversation, item *Item) (*Item, *common.Error) {
-	if err := s.itemRepo.Delete(ctx, item.ID); err != nil {
-		return nil, common.NewError(err, "e1f2g3h4-i5j6-7890-efgh-123456789012")
-	}
-
-	if err := s.updateConversationTimestamp(ctx, conversation, "f2g3h4i5-j6k7-8901-fghi-234567890123"); err != nil {
-		return nil, err
-	}
-
-	return item, nil
-}
-
-// generateConversationPublicID generates a conversation ID with business rules
-// Business rule: conversations use "conv" prefix with 42 character length for OpenAI compatibility
-func (s *ConversationService) generateConversationPublicID() (string, error) {
-	return idgen.GenerateSecureID("conv", 42)
-}
-
-// generateItemPublicID generates an item/message ID with business rules
-// Business rule: items/messages use "msg" prefix with 42 character length for OpenAI compatibility
-func (s *ConversationService) generateItemPublicID() (string, error) {
-	return idgen.GenerateSecureID("msg", 42)
-}
-
-// updateConversationTimestamp updates the conversation's UpdatedAt timestamp and saves to database
-func (s *ConversationService) updateConversationTimestamp(ctx context.Context, conversation *Conversation, errorCode string) *common.Error {
-	conversation.UpdatedAt = time.Now()
-	if err := s.conversationRepo.Update(ctx, conversation); err != nil {
-		return common.NewError(err, errorCode)
-	}
-	return nil
-}
-
-func (s *ConversationService) ValidateItems(ctx context.Context, items []*Item) *common.Error {
-	if len(items) > 100 {
-		return common.NewErrorWithMessage("Too many items", "g3h4i5j6-k7l8-9012-ghij-345678901234")
-	}
-	for _, itemData := range items {
-		if errCode := s.validator.ValidateItemContent(itemData.Content); errCode != nil {
-			return common.NewErrorWithMessage("Item validation failed", "h4i5j6k7-l8m9-0123-hijk-456789012345")
-		}
-	}
-	return nil
-}
-
-func (s *ConversationService) FindItemsByFilter(ctx context.Context, filter ItemFilter, p *query.Pagination) ([]*Item, *common.Error) {
-	items, err := s.itemRepo.FindByFilter(ctx, filter, p)
-	if err != nil {
-		return nil, common.NewError(err, "i5j6k7l8-m9n0-1234-ijkl-567890123456")
-	}
-	return items, nil
-}
-
-func (s *ConversationService) CountItemsByFilter(ctx context.Context, filter ItemFilter) (int64, *common.Error) {
-	count, err := s.itemRepo.Count(ctx, filter)
-	if err != nil {
-		return 0, common.NewError(err, "j6k7l8m9-n0o1-2345-jklm-678901234567")
-	}
-	return count, nil
-}
-
-// AddMultipleItems adds multiple items to a conversation in a single transaction
-func (s *ConversationService) AddMultipleItems(ctx context.Context, conversation *Conversation, userID uint, items []*Item) ([]*Item, *common.Error) {
-	// Check access permissions
-	now := time.Now()
-	createdItems := make([]*Item, len(items))
-
-	// Create all items
-	for i, itemData := range items {
-		itemPublicID, err := s.generateItemPublicID()
-		if err != nil {
-			return nil, common.NewError(err, "k7l8m9n0-o1p2-3456-klmn-789012345678")
-		}
-
-		item := &Item{
-			PublicID:    itemPublicID,
-			Type:        itemData.Type,
-			Role:        itemData.Role,
-			Content:     itemData.Content,
-			Status:      ToItemStatusPtr(ItemStatusCompleted),
-			CompletedAt: &now,
-			ResponseID:  itemData.ResponseID,
-		}
-
-		if err := s.conversationRepo.AddItem(ctx, conversation.ID, item); err != nil {
-			return nil, common.NewErrorWithMessage("Failed to add item", "l8m9n0o1-p2q3-4567-lmno-890123456789")
-		}
-
-		createdItems[i] = item
-	}
-
-	if err := s.updateConversationTimestamp(ctx, conversation, "m9n0o1p2-q3r4-5678-mnop-901234567890"); err != nil {
-		return nil, err
-	}
-
-	return createdItems, nil
-}
-
-func (s *ConversationService) GetConversationMiddleWare() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		ctx := reqCtx.Request.Context()
-		publicID := reqCtx.Param(string(ConversationContextKeyPublicID))
-		if publicID == "" {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code:  "f5742805-2c6e-45a8-b6a8-95091b9d46f0",
-				Error: "missing conversation public ID",
-			})
-			return
-		}
-		user, ok := auth.GetUserFromContext(reqCtx)
-		if !ok {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code:  "01994c96-38fb-7426-9c45-37c8df6c757f",
-				Error: "user not found",
-			})
-			return
-		}
-		entities, err := s.FindConversationsByFilter(ctx, ConversationFilter{
-			PublicID: &publicID,
-			UserID:   &user.ID,
-		}, nil)
-
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code:  err.GetCode(),
-				Error: err.Error(),
-			})
-			return
-		}
-
-		if len(entities) == 0 {
-			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-				Code:  "e91636c2-fced-4a89-bf08-55309005365f",
-				Error: "conversation not found",
-			})
-			return
-		}
-
-		SetConversationFromContext(reqCtx, entities[0])
-		reqCtx.Next()
-	}
-}
-
-func SetConversationFromContext(reqCtx *gin.Context, conv *Conversation) {
-	reqCtx.Set(string(ConversationContextEntity), conv)
-}
-
-func GetConversationFromContext(reqCtx *gin.Context) (*Conversation, bool) {
-	conv, ok := reqCtx.Get(string(ConversationContextEntity))
-	if !ok {
-		return nil, false
-	}
-	v, ok := conv.(*Conversation)
-	if !ok {
-		return nil, false
-	}
-	return v, true
-}
-
-func (s *ConversationService) GetConversationItemMiddleWare() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		ctx := reqCtx.Request.Context()
-		conv, ok := GetConversationFromContext(reqCtx)
-		if !ok {
-			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-				Code:  "0f5c3304-bf46-45ce-8719-7c03a3485b37",
-				Error: "conversation not found",
-			})
-			return
-		}
-		publicID := reqCtx.Param(string(ConversationItemContextKeyPublicID))
-		if publicID == "" {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code:  "f5b144fe-090e-4251-bed0-66e27c37c328",
-				Error: "missing conversation item public ID",
-			})
-			return
-		}
-		entities, err := s.FindItemsByFilter(ctx, ItemFilter{
-			PublicID:       &publicID,
-			ConversationID: &conv.ID,
-		}, nil)
-
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:  err.GetCode(),
-				Error: err.Error(),
-			})
-			return
-		}
-
-		if len(entities) == 0 {
-			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-				Code:  "25647b40-4967-497e-9cbd-a85243ccef58",
-				Error: "conversation item not found",
-			})
-			return
-		}
-
-		SetConversationItemFromContext(reqCtx, entities[0])
-		reqCtx.Next()
-	}
-}
-
-func SetConversationItemFromContext(reqCtx *gin.Context, item *Item) {
-	reqCtx.Set(string(ConversationItemContextEntity), item)
-}
-
-func GetConversationItemFromContext(reqCtx *gin.Context) (*Item, bool) {
-	item, ok := reqCtx.Get(string(ConversationItemContextEntity))
-	if !ok {
-		return nil, false
-	}
-	v, ok := item.(*Item)
-	if !ok {
-		return nil, false
-	}
-	return v, true
-}
diff --git a/apps/jan-api-gateway/application/app/domain/conversation/validation.go b/apps/jan-api-gateway/application/app/domain/conversation/validation.go
deleted file mode 100644
index e4f2d10d..00000000
--- a/apps/jan-api-gateway/application/app/domain/conversation/validation.go
+++ /dev/null
@@ -1,284 +0,0 @@
-package conversation
-
-import (
-	"fmt"
-	"regexp"
-	"strings"
-	"unicode/utf8"
-
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-)
-
-// ValidationConfig holds conversation validation rules
-type ValidationConfig struct {
-	MaxTitleLength          int
-	MaxMetadataKeys         int
-	MaxMetadataKeyLength    int
-	MaxMetadataValueLength  int
-	MaxContentBlocks        int
-	MaxTextContentLength    int
-	MaxItemsPerConversation int
-	MaxItemsPerBatch        int
-}
-
-// DefaultValidationConfig returns production-ready validation rules
-func DefaultValidationConfig() *ValidationConfig {
-	return &ValidationConfig{
-		MaxTitleLength:          200,   // Reduced from 255
-		MaxMetadataKeys:         20,    // Reduced from 50
-		MaxMetadataKeyLength:    50,    // Reduced from 100
-		MaxMetadataValueLength:  500,   // Reduced from 1000
-		MaxContentBlocks:        10,    // Reduced from 20
-		MaxTextContentLength:    50000, // Reduced from 100000
-		MaxItemsPerConversation: 1000,  // New limit
-		MaxItemsPerBatch:        50,    // Reduced from 100
-	}
-}
-
-type ConversationValidator struct {
-	config *ValidationConfig
-	// Compiled regex patterns for performance
-	publicIDPattern    *regexp.Regexp
-	metadataKeyPattern *regexp.Regexp
-}
-
-// NewConversationValidator creates a new validator with security patterns
-func NewConversationValidator(config *ValidationConfig) *ConversationValidator {
-	return &ConversationValidator{
-		config: config,
-		// Validate public ID format (prevent injection)
-		publicIDPattern: regexp.MustCompile(`^[a-zA-Z0-9_-]+$`),
-		// Validate metadata keys (alphanumeric + underscore only)
-		metadataKeyPattern: regexp.MustCompile(`^[a-zA-Z0-9_]+$`),
-	}
-}
-
-// ValidateConversationInput performs comprehensive validation
-func (v *ConversationValidator) ValidateConversationInput(title *string, metadata map[string]string) error {
-	// Validate title
-	if title != nil {
-		if err := v.validateTitle(*title); err != nil {
-			return fmt.Errorf("invalid title: %w", err)
-		}
-	}
-
-	// Validate metadata
-	if metadata != nil {
-		if err := v.validateMetadata(metadata); err != nil {
-			return fmt.Errorf("invalid metadata: %w", err)
-		}
-	}
-
-	return nil
-}
-
-// ValidateItemContent performs comprehensive content validation
-func (v *ConversationValidator) ValidateItemContent(content []Content) error {
-	if len(content) == 0 {
-		return fmt.Errorf("aa497939-edbb-416a-899c-a8acc387247e")
-	}
-
-	if len(content) > v.config.MaxContentBlocks {
-		return fmt.Errorf("6dbdb6a2-72f0-430a-909c-9f8ca5dd3397")
-	}
-
-	for _, c := range content {
-		if err := v.validateContentBlock(c); err != nil {
-			return fmt.Errorf("c67847d7-9011-41c0-9a05-520c9c670a28")
-		}
-	}
-
-	return nil
-}
-
-// ValidatePublicID ensures public ID format is secure
-func (v *ConversationValidator) ValidatePublicID(publicID string) error {
-	if publicID == "" {
-		return fmt.Errorf("public ID cannot be empty")
-	}
-
-	if len(publicID) < 5 || len(publicID) > 50 {
-		return fmt.Errorf("public ID must be between 5 and 50 characters")
-	}
-
-	// Use domain-specific ID validation with business rules
-	if strings.HasPrefix(publicID, "conv_") {
-		// Business rule: conversation IDs must follow "conv_" prefix format
-		if !idgen.ValidateIDFormat(publicID, "conv") {
-			return fmt.Errorf("invalid conversation ID format")
-		}
-	} else if strings.HasPrefix(publicID, "msg_") {
-		// Business rule: message/item IDs must follow "msg_" prefix format
-		if !idgen.ValidateIDFormat(publicID, "msg") {
-			return fmt.Errorf("invalid item ID format")
-		}
-	} else {
-		// Fallback to regex pattern for unknown prefixes
-		if !v.publicIDPattern.MatchString(publicID) {
-			return fmt.Errorf("public ID contains invalid characters")
-		}
-	}
-
-	return nil
-}
-
-// ValidateBatchSize ensures batch operations are within limits
-func (v *ConversationValidator) ValidateBatchSize(itemCount int) error {
-	if itemCount == 0 {
-		return fmt.Errorf("batch cannot be empty")
-	}
-
-	if itemCount > v.config.MaxItemsPerBatch {
-		return fmt.Errorf("cannot process more than %d items in a single batch", v.config.MaxItemsPerBatch)
-	}
-
-	return nil
-}
-
-// Private validation methods
-
-func (v *ConversationValidator) validateTitle(title string) error {
-	// Check length
-	if utf8.RuneCountInString(title) > v.config.MaxTitleLength {
-		return fmt.Errorf("title cannot exceed %d characters", v.config.MaxTitleLength)
-	}
-
-	// Check for suspicious content
-	title = strings.TrimSpace(title)
-	if title == "" {
-		return fmt.Errorf("title cannot be empty or only whitespace")
-	}
-
-	return nil
-}
-
-func (v *ConversationValidator) validateMetadata(metadata map[string]string) error {
-	if len(metadata) > v.config.MaxMetadataKeys {
-		return fmt.Errorf("metadata cannot have more than %d keys", v.config.MaxMetadataKeys)
-	}
-
-	for key, value := range metadata {
-		if err := v.validateMetadataKey(key); err != nil {
-			return fmt.Errorf("invalid metadata key '%s': %w", key, err)
-		}
-
-		if err := v.validateMetadataValue(value); err != nil {
-			return fmt.Errorf("invalid metadata value for key '%s': %w", key, err)
-		}
-	}
-
-	return nil
-}
-
-func (v *ConversationValidator) validateMetadataKey(key string) error {
-	if len(key) == 0 {
-		return fmt.Errorf("metadata key cannot be empty")
-	}
-
-	if len(key) > v.config.MaxMetadataKeyLength {
-		return fmt.Errorf("metadata key cannot exceed %d characters", v.config.MaxMetadataKeyLength)
-	}
-
-	if !v.metadataKeyPattern.MatchString(key) {
-		return fmt.Errorf("metadata key contains invalid characters (only alphanumeric and underscore allowed)")
-	}
-
-	return nil
-}
-
-func (v *ConversationValidator) validateMetadataValue(value string) error {
-	if utf8.RuneCountInString(value) > v.config.MaxMetadataValueLength {
-		return fmt.Errorf("metadata value cannot exceed %d characters", v.config.MaxMetadataValueLength)
-	}
-
-	return nil
-}
-
-func (v *ConversationValidator) validateContentBlock(content Content) error {
-	if content.Type == "" {
-		return fmt.Errorf("content type cannot be empty")
-	}
-
-	// Validate based on content type
-	switch content.Type {
-	case "text":
-		if content.Text != nil {
-			return v.validateTextContent(content.Text.Value)
-		}
-	case "input_text":
-		if content.InputText != nil {
-			return v.validateTextContent(*content.InputText)
-		}
-	case "output_text":
-		if content.OutputText != nil {
-			return v.validateTextContent(content.OutputText.Text)
-		}
-	case "image":
-		if content.Image != nil {
-			return v.validateImageContent(content.Image)
-		}
-	case "file":
-		if content.File != nil {
-			return v.validateFileContent(content.File)
-		}
-	default:
-		return fmt.Errorf("unsupported content type: %s", content.Type)
-	}
-
-	return nil
-}
-
-func (v *ConversationValidator) validateTextContent(text string) error {
-	if utf8.RuneCountInString(text) > v.config.MaxTextContentLength {
-		return fmt.Errorf("text content cannot exceed %d characters", v.config.MaxTextContentLength)
-	}
-
-	return nil
-}
-
-func (v *ConversationValidator) validateImageContent(image *ImageContent) error {
-	if image.URL == "" && image.FileID == "" {
-		return fmt.Errorf("image content must have either URL or file ID")
-	}
-
-	if image.URL != "" {
-		// Basic URL validation
-		if !strings.HasPrefix(image.URL, "http://") && !strings.HasPrefix(image.URL, "https://") && !strings.HasPrefix(image.URL, "data:") {
-			return fmt.Errorf("invalid image URL format")
-		}
-	}
-
-	if image.Detail != "" {
-		validDetails := []string{"low", "high", "auto"}
-		isValid := false
-		for _, valid := range validDetails {
-			if image.Detail == valid {
-				isValid = true
-				break
-			}
-		}
-		if !isValid {
-			return fmt.Errorf("invalid image detail level: %s", image.Detail)
-		}
-	}
-
-	return nil
-}
-
-func (v *ConversationValidator) validateFileContent(file *FileContent) error {
-	if file.FileID == "" {
-		return fmt.Errorf("file content must have a file ID")
-	}
-
-	if file.Size < 0 {
-		return fmt.Errorf("file size cannot be negative")
-	}
-
-	// Validate file size (100MB limit)
-	const maxFileSize = 100 * 1024 * 1024
-	if file.Size > maxFileSize {
-		return fmt.Errorf("file size cannot exceed 100MB")
-	}
-
-	return nil
-}
diff --git a/apps/jan-api-gateway/application/app/domain/cron/cron_service.go b/apps/jan-api-gateway/application/app/domain/cron/cron_service.go
deleted file mode 100644
index e2af0a75..00000000
--- a/apps/jan-api-gateway/application/app/domain/cron/cron_service.go
+++ /dev/null
@@ -1,34 +0,0 @@
-package cron
-
-import (
-	"context"
-
-	"github.com/mileusna/crontab"
-	inference_model_registry "menlo.ai/jan-api-gateway/app/domain/inference_model_registry"
-	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-type CronService struct {
-	JanInferenceClient     *janinference.JanInferenceClient
-	InferenceModelRegistry *inference_model_registry.InferenceModelRegistry
-}
-
-func NewService(janInferenceClient *janinference.JanInferenceClient, registry *inference_model_registry.InferenceModelRegistry) *CronService {
-	return &CronService{
-		JanInferenceClient:     janInferenceClient,
-		InferenceModelRegistry: registry,
-	}
-}
-
-func (cs *CronService) Start(ctx context.Context, ctab *crontab.Crontab) {
-	// Run initial check
-	cs.InferenceModelRegistry.CheckInferenceModels(ctx)
-
-	ctab.AddJob("* * * * *", func() {
-		cs.InferenceModelRegistry.CheckInferenceModels(ctx)
-
-		// Reload environment variables
-		environment_variables.EnvironmentVariables.LoadFromEnv()
-	})
-}
diff --git a/apps/jan-api-gateway/application/app/domain/inference/inference_provider.go b/apps/jan-api-gateway/application/app/domain/inference/inference_provider.go
deleted file mode 100644
index 204be1ba..00000000
--- a/apps/jan-api-gateway/application/app/domain/inference/inference_provider.go
+++ /dev/null
@@ -1,37 +0,0 @@
-package inference
-
-import (
-	"context"
-	"io"
-
-	openai "github.com/sashabaranov/go-openai"
-)
-
-// InferenceProvider defines the interface for AI inference services
-type InferenceProvider interface {
-	// CreateCompletion creates a non-streaming chat completion
-	CreateCompletion(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*openai.ChatCompletionResponse, error)
-
-	// CreateCompletionStream creates a streaming chat completion
-	CreateCompletionStream(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (io.ReadCloser, error)
-
-	// GetModels returns available models
-	GetModels(ctx context.Context) (*ModelsResponse, error)
-
-	// ValidateModel checks if a model is supported
-	ValidateModel(model string) error
-}
-
-// ModelsResponse represents the response from GetModels
-type ModelsResponse struct {
-	Object string  `json:"object"`
-	Data   []Model `json:"data"`
-}
-
-// Model represents an AI model
-type Model struct {
-	ID      string `json:"id"`
-	Object  string `json:"object"`
-	Created int    `json:"created"`
-	OwnedBy string `json:"owned_by"`
-}
diff --git a/apps/jan-api-gateway/application/app/domain/inference_model/inference_model.go b/apps/jan-api-gateway/application/app/domain/inference_model/inference_model.go
deleted file mode 100644
index f78be9e8..00000000
--- a/apps/jan-api-gateway/application/app/domain/inference_model/inference_model.go
+++ /dev/null
@@ -1,8 +0,0 @@
-package inferencemodel
-
-type Model struct {
-	ID      string `json:"id"`
-	Object  string `json:"object"`
-	Created int    `json:"created"`
-	OwnedBy string `json:"owned_by"`
-}
diff --git a/apps/jan-api-gateway/application/app/domain/inference_model_registry/inference_model_registry.go b/apps/jan-api-gateway/application/app/domain/inference_model_registry/inference_model_registry.go
deleted file mode 100644
index 8ae9973c..00000000
--- a/apps/jan-api-gateway/application/app/domain/inference_model_registry/inference_model_registry.go
+++ /dev/null
@@ -1,331 +0,0 @@
-package inferencemodelregistry
-
-import (
-	"context"
-	"encoding/base64"
-	"encoding/json"
-	"errors"
-	"strings"
-	"time"
-
-	inferencemodel "menlo.ai/jan-api-gateway/app/domain/inference_model"
-	"menlo.ai/jan-api-gateway/app/infrastructure/cache"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-)
-
-type InferenceModelRegistry struct {
-	cache     *cache.RedisCacheService
-	janClient *janinference.JanInferenceClient
-}
-
-const (
-	// Consistent timeout for all Jan client operations
-	janClientTimeout = 20 * time.Second
-	ModelsCacheTTL   = 10 * time.Minute
-)
-
-// sanitizeKeyPart encodes dynamic key parts to be Redis-key safe
-func sanitizeKeyPart(s string) string { return base64.RawURLEncoding.EncodeToString([]byte(s)) }
-
-// NewInferenceModelRegistry creates a new registry instance with cache service
-func NewInferenceModelRegistry(cacheService *cache.RedisCacheService, janClient *janinference.JanInferenceClient) *InferenceModelRegistry {
-	return &InferenceModelRegistry{
-		cache:     cacheService,
-		janClient: janClient,
-	}
-}
-
-func (r *InferenceModelRegistry) ListModels(ctx context.Context) []inferencemodel.Model {
-	var models []inferencemodel.Model
-
-	// Try to get from cache first
-	cachedModelsJSON, err := r.cache.Get(ctx, cache.ModelsCacheKey)
-	if err == nil && cachedModelsJSON != "" {
-		if jsonErr := json.Unmarshal([]byte(cachedModelsJSON), &models); jsonErr == nil {
-			return models
-		}
-	}
-
-	// Cache miss - rebuild from JanInferenceClient
-	models = r.rebuildModelsFromJanClient(ctx)
-	return models
-}
-
-// hasModelsChanged checks if the models for a service have changed compared to cached data
-func (r *InferenceModelRegistry) hasModelsChanged(ctx context.Context, serviceName string, newModels []inferencemodel.Model) bool {
-	// Compare by model IDs only to avoid relying on per-model detail cache
-	cacheKey := cache.RegistryEndpointModelsKey + ":" + sanitizeKeyPart(serviceName)
-	cachedIDsJSON, err := r.cache.Get(ctx, cacheKey)
-	if err != nil {
-		// Cache miss or error - treat as changed so we populate
-		return true
-	}
-
-	var cachedIDs []string
-	if jsonErr := json.Unmarshal([]byte(cachedIDsJSON), &cachedIDs); jsonErr != nil {
-		return true
-	}
-
-	if len(cachedIDs) != len(newModels) {
-		return true
-	}
-
-	newIDs := functional.Map(newModels, func(model inferencemodel.Model) string { return model.ID })
-	idSet := make(map[string]struct{}, len(cachedIDs))
-	for _, id := range cachedIDs {
-		idSet[id] = struct{}{}
-	}
-	for _, id := range newIDs {
-		if _, ok := idSet[id]; !ok {
-			return true
-		}
-	}
-	return false
-}
-
-func (r *InferenceModelRegistry) SetModels(ctx context.Context, serviceName string, models []inferencemodel.Model) error {
-	if strings.TrimSpace(serviceName) == "" {
-		return errors.New("service name cannot be empty")
-	}
-
-	if !r.hasModelsChanged(ctx, serviceName, models) {
-		return nil
-	}
-
-	// Clear all existing cache
-	r.cache.Unlink(ctx, cache.RegistryModelEndpointsKey)
-	r.cache.Unlink(ctx, cache.ModelsCacheKey)
-
-	// Clear pattern-based entries
-	pattern := cache.RegistryEndpointModelsKey + ":*"
-	r.cache.DeletePattern(ctx, pattern)
-
-	// Add back all models
-	serviceCacheKey := cache.RegistryEndpointModelsKey + ":" + sanitizeKeyPart(serviceName)
-	modelIDs := functional.Map(models, func(m inferencemodel.Model) string { return m.ID })
-
-	// Convert to JSON strings for cache storage
-	modelIDsJSON, err := json.Marshal(modelIDs)
-	if err != nil {
-		return err
-	}
-	modelsJSON, err := json.Marshal(models)
-	if err != nil {
-		return err
-	}
-
-	if err := r.cache.Set(ctx, serviceCacheKey, string(modelIDsJSON), ModelsCacheTTL); err != nil {
-		return err
-	}
-	if err := r.cache.Set(ctx, cache.ModelsCacheKey, string(modelsJSON), ModelsCacheTTL); err != nil {
-		return err
-	}
-
-	// Rebuild reverse mapping
-	return r.rebuildModelToEndpointsMapping(ctx)
-}
-
-func (r *InferenceModelRegistry) RemoveServiceModels(ctx context.Context, serviceName string) error {
-	if strings.TrimSpace(serviceName) == "" {
-		return errors.New("service name cannot be empty")
-	}
-
-	serviceCacheKey := cache.RegistryEndpointModelsKey + ":" + sanitizeKeyPart(serviceName)
-
-	// 1) Read BEFORE deleting
-	serviceModelIDsJSON, err := r.cache.Get(ctx, serviceCacheKey)
-	if err != nil {
-		// nothing to do
-		return nil
-	}
-
-	var serviceModelIDs []string
-	if jsonErr := json.Unmarshal([]byte(serviceModelIDsJSON), &serviceModelIDs); jsonErr != nil {
-		return nil
-	}
-	serviceModelSet := make(map[string]struct{}, len(serviceModelIDs))
-	for _, id := range serviceModelIDs {
-		serviceModelSet[id] = struct{}{}
-	}
-
-	// 2) Delete mapping
-	if err := r.cache.Unlink(ctx, serviceCacheKey); err != nil {
-		return err
-	}
-
-	// 3) Remove those models from the global list
-	existingJSON, _ := r.cache.Get(ctx, cache.ModelsCacheKey)
-	var existing []inferencemodel.Model
-	if existingJSON != "" {
-		json.Unmarshal([]byte(existingJSON), &existing)
-	}
-
-	var filtered []inferencemodel.Model
-	for _, m := range existing {
-		if _, ok := serviceModelSet[m.ID]; !ok {
-			filtered = append(filtered, m)
-		}
-	}
-
-	filteredJSON, err := json.Marshal(filtered)
-	if err != nil {
-		return err
-	}
-	if err := r.cache.Set(ctx, cache.ModelsCacheKey, string(filteredJSON), ModelsCacheTTL); err != nil {
-		return err
-	}
-
-	// 4) Rebuild reverse mapping
-	return r.rebuildModelToEndpointsMapping(ctx)
-}
-
-func (r *InferenceModelRegistry) GetEndpointToModels(ctx context.Context, serviceName string) ([]string, bool) {
-	// Try to get from cache first
-	cacheKey := cache.RegistryEndpointModelsKey + ":" + sanitizeKeyPart(serviceName)
-	modelsJSON, err := r.cache.Get(ctx, cacheKey)
-	if err != nil {
-		// Cache miss - this service has no models yet
-		// Return empty result and don't populate cache
-		return nil, false
-	}
-
-	var models []string
-	if jsonErr := json.Unmarshal([]byte(modelsJSON), &models); jsonErr != nil {
-		return nil, false
-	}
-
-	return models, len(models) > 0
-}
-
-func (r *InferenceModelRegistry) GetModelToEndpoints(ctx context.Context) map[string][]string {
-	// Try to get from cache first
-	modelToEndpointsJSON, err := r.cache.Get(ctx, cache.RegistryModelEndpointsKey)
-	if err != nil {
-		// Cache miss - rebuild from JanInferenceClient
-		r.rebuildModelsFromJanClient(ctx)
-
-		// Try to get again after rebuild
-		modelToEndpointsJSON, err = r.cache.Get(ctx, cache.RegistryModelEndpointsKey)
-		if err != nil {
-			return make(map[string][]string)
-		}
-	}
-
-	var modelToEndpoints map[string][]string
-	if jsonErr := json.Unmarshal([]byte(modelToEndpointsJSON), &modelToEndpoints); jsonErr != nil {
-		return make(map[string][]string)
-	}
-
-	return modelToEndpoints
-}
-
-// rebuildModelsFromJanClient fetches models from JanInferenceClient and rebuilds cache
-func (r *InferenceModelRegistry) rebuildModelsFromJanClient(ctx context.Context) []inferencemodel.Model {
-	if r.janClient == nil {
-		return []inferencemodel.Model{}
-	}
-
-	// Apply consistent timeout for Jan client operations
-	timeoutCtx, cancel := context.WithTimeout(ctx, janClientTimeout)
-	defer cancel()
-
-	janModelResp, err := r.janClient.GetModels(timeoutCtx)
-	if err != nil {
-		return []inferencemodel.Model{}
-	}
-
-	models := make([]inferencemodel.Model, 0)
-	for _, model := range janModelResp.Data {
-		models = append(models, inferencemodel.Model{
-			ID:      model.ID,
-			Object:  model.Object,
-			Created: model.Created,
-			OwnedBy: model.OwnedBy,
-		})
-	}
-
-	// Store models in cache
-	if len(models) > 0 {
-		modelsJSON, _ := json.Marshal(models)
-		r.cache.Set(ctx, cache.ModelsCacheKey, string(modelsJSON), ModelsCacheTTL)
-
-		// Store service models mapping
-		serviceCacheKey := cache.RegistryEndpointModelsKey + ":" + sanitizeKeyPart(r.janClient.BaseURL)
-		modelIDs := functional.Map(models, func(model inferencemodel.Model) string {
-			return model.ID
-		})
-		modelIDsJSON, _ := json.Marshal(modelIDs)
-		r.cache.Set(ctx, serviceCacheKey, string(modelIDsJSON), ModelsCacheTTL)
-
-		// Build model-to-endpoints mapping
-		modelToEndpoints := make(map[string][]string)
-		for _, model := range models {
-			modelToEndpoints[model.ID] = append(modelToEndpoints[model.ID], r.janClient.BaseURL)
-		}
-		modelToEndpointsJSON, _ := json.Marshal(modelToEndpoints)
-		r.cache.Set(ctx, cache.RegistryModelEndpointsKey, string(modelToEndpointsJSON), ModelsCacheTTL)
-	}
-
-	return models
-}
-
-// rebuildModelToEndpointsMapping rebuilds the model-to-endpoints mapping from all service mappings
-func (r *InferenceModelRegistry) rebuildModelToEndpointsMapping(ctx context.Context) error {
-	modelToEndpoints := make(map[string][]string)
-
-	// This is a simplified implementation - in production you'd scan all service keys
-	// For now, we'll just rebuild from known models
-	allModelsJSON, err := r.cache.Get(ctx, cache.ModelsCacheKey)
-	if err != nil {
-		return err
-	}
-
-	var allModels []inferencemodel.Model
-	if jsonErr := json.Unmarshal([]byte(allModelsJSON), &allModels); jsonErr != nil {
-		return jsonErr
-	}
-
-	// For each model, find which services have it (this is not optimal but works)
-	for _, model := range allModels {
-		// You could optimize this by scanning service keys pattern
-		if r.janClient != nil {
-			modelToEndpoints[model.ID] = append(modelToEndpoints[model.ID], r.janClient.BaseURL)
-		}
-	}
-
-	modelToEndpointsJSON, err := json.Marshal(modelToEndpoints)
-	if err != nil {
-		return err
-	}
-	return r.cache.Set(ctx, cache.RegistryModelEndpointsKey, string(modelToEndpointsJSON), ModelsCacheTTL)
-}
-
-// CheckInferenceModels checks and updates models from JanInferenceClient (moved from cron service)
-func (r *InferenceModelRegistry) CheckInferenceModels(ctx context.Context) {
-	if r.janClient == nil {
-		return
-	}
-
-	// Apply consistent timeout for Jan client operations
-	timeoutCtx, cancel := context.WithTimeout(ctx, janClientTimeout)
-	defer cancel()
-
-	janModelResp, err := r.janClient.GetModels(timeoutCtx)
-	if err != nil {
-		_ = r.RemoveServiceModels(ctx, r.janClient.BaseURL) // Ignore error in cron context
-	} else {
-		models := make([]inferencemodel.Model, 0)
-		for _, model := range janModelResp.Data {
-			models = append(models, inferencemodel.Model{
-				ID:      model.ID,
-				Object:  model.Object,
-				Created: model.Created,
-				OwnedBy: model.OwnedBy,
-			})
-		}
-
-		// Clean and add new models (no merging or change checking)
-		_ = r.SetModels(ctx, r.janClient.BaseURL, models) // Ignore error in cron context
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/domain/invite/invite.go b/apps/jan-api-gateway/application/app/domain/invite/invite.go
deleted file mode 100644
index da68fa96..00000000
--- a/apps/jan-api-gateway/application/app/domain/invite/invite.go
+++ /dev/null
@@ -1,71 +0,0 @@
-package invite
-
-import (
-	"context"
-	"encoding/json"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-)
-
-type Invite struct {
-	ID             uint
-	PublicID       string
-	Email          string
-	Role           string
-	Status         string
-	InvitedAt      time.Time
-	ExpiresAt      time.Time
-	AcceptedAt     *time.Time
-	OrganizationID uint
-	Secrets        *string
-	Projects       string
-}
-
-func (i *Invite) GetProjects() ([]InviteProject, error) {
-	var projects []InviteProject
-	byteData := []byte(i.Projects)
-	err := json.Unmarshal(byteData, &projects)
-	if err != nil {
-		return nil, err
-	}
-	return projects, nil
-}
-
-func (i *Invite) IsExpired() bool {
-	return time.Now().After(i.ExpiresAt)
-}
-
-type InviteStatus string
-
-const (
-	InviteStatusAccepted InviteStatus = "accepted"
-	InviteStatusExpired  InviteStatus = "expired"
-	InviteStatusPending  InviteStatus = "pending"
-)
-
-type InviteProjectRole string
-
-const (
-	InviteProjectRoleMember InviteProjectRole = "member"
-	InviteProjectRoleOwner  InviteProjectRole = "owner"
-)
-
-type InviteProject struct {
-	ID   string `json:"id"`
-	Role string `json:"role"`
-}
-
-type InvitesFilter struct {
-	PublicID       *string
-	OrganizationID *uint
-	Secrets        *string
-}
-
-type InviteRepository interface {
-	Create(ctx context.Context, p *Invite) error
-	Update(ctx context.Context, p *Invite) error
-	DeleteByID(ctx context.Context, id uint) error
-	FindByFilter(ctx context.Context, filter InvitesFilter, p *query.Pagination) ([]*Invite, error)
-	Count(ctx context.Context, filter InvitesFilter) (int64, error)
-}
diff --git a/apps/jan-api-gateway/application/app/domain/invite/invite_service.go b/apps/jan-api-gateway/application/app/domain/invite/invite_service.go
deleted file mode 100644
index 06f742a6..00000000
--- a/apps/jan-api-gateway/application/app/domain/invite/invite_service.go
+++ /dev/null
@@ -1,132 +0,0 @@
-package invite
-
-import (
-	"bytes"
-	"context"
-	"fmt"
-	"html/template"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/utils/emailservice"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-)
-
-// InviteService provides business logic for managing invitations.
-type InviteService struct {
-	repo InviteRepository
-}
-
-// NewInviteService is the constructor for InviteService.
-func NewInviteService(repo InviteRepository) *InviteService {
-	return &InviteService{
-		repo: repo,
-	}
-}
-
-func (s *InviteService) createPublicID() (string, error) {
-	return idgen.GenerateSecureID("invite", 16)
-}
-
-// CreateInviteWithPublicID creates a new invitation and assigns it a unique
-// public ID before saving it to the repository.
-func (s *InviteService) CreateInviteWithPublicID(ctx context.Context, invite *Invite) (*Invite, error) {
-	publicID, err := s.createPublicID()
-	if err != nil {
-		return nil, err
-	}
-	invite.PublicID = publicID
-	invite.ExpiresAt = time.Now().Add(7 * 24 * time.Hour)
-	if err := s.repo.Create(ctx, invite); err != nil {
-		return nil, err
-	}
-	return invite, nil
-}
-
-// UpdateInvite updates an existing invitation.
-func (s *InviteService) UpdateInvite(ctx context.Context, invite *Invite) (*Invite, error) {
-	if invite.ID == 0 {
-		return nil, fmt.Errorf("cannot update invite with an ID of 0")
-	}
-	if err := s.repo.Update(ctx, invite); err != nil {
-		return nil, fmt.Errorf("failed to update invite: %w", err)
-	}
-	return invite, nil
-}
-
-// DeleteInviteByID deletes an invitation by its ID.
-func (s *InviteService) DeleteInviteByID(ctx context.Context, id uint) error {
-	if err := s.repo.DeleteByID(ctx, id); err != nil {
-		return fmt.Errorf("failed to delete invite by ID: %w", err)
-	}
-	return nil
-}
-
-// FindInvites retrieves a list of invitations based on a filter and pagination.
-func (s *InviteService) FindInvites(ctx context.Context, filter InvitesFilter, pagination *query.Pagination) ([]*Invite, error) {
-	return s.repo.FindByFilter(ctx, filter, pagination)
-}
-
-func (s *InviteService) FindOne(ctx context.Context, filter InvitesFilter) (*Invite, error) {
-	entities, err := s.repo.FindByFilter(ctx, filter, nil)
-	if err != nil {
-		return nil, err
-	}
-	if len(entities) == 0 {
-		return nil, nil
-	}
-	if len(entities) != 1 {
-		return nil, fmt.Errorf("more than one record")
-	}
-	return entities[0], err
-}
-
-// CountInvites counts the number of invitations matching a given filter.
-func (s *InviteService) CountInvites(ctx context.Context, filter InvitesFilter) (int64, error) {
-	return s.repo.Count(ctx, filter)
-}
-
-type EmailMetadata struct {
-	InviterEmail string
-	OrgName      string
-	OrgPublicID  string
-	InviteLink   string
-}
-
-func (s *InviteService) SendInviteEmail(ctx context.Context, e EmailMetadata, to string) error {
-	templateString := `<html><body><div style="font-family: Arial, sans-serif; max-width: 600px; margin: auto; border: 1px solid #ddd; border-radius: 8px; overflow: hidden;">
-    <div style="background-color: #f7f7f7; padding: 20px; text-align: center; border-bottom: 1px solid #ddd;">
-        <h2 style="margin: 0; color: #333;">You were invited to the organization {{.OrgName}} on JanAI</h2>
-    </div>
-    <div style="padding: 20px; background-color: #ffffff;">
-        <p style="font-size: 16px; color: #555; line-height: 1.6;">
-            <strong>{{.InviterEmail}}</strong> invited you to be a member of the organization {{.OrgName}} ({{.OrgPublicID}}) on the JanAI API.
-        </p>
-        <div style="text-align: center; margin: 30px 0;">
-            <a href="{{.InviteLink}}" style="background-color: #007bff; color: #ffffff; padding: 12px 25px; text-decoration: none; border-radius: 5px; font-size: 16px; font-weight: bold;">
-                Accept Invite
-            </a>
-        </div>
-        <p style="font-size: 14px; color: #888; text-align: center; margin-top: 20px;">
-            This invite will expire in 7 days.
-        </p>
-    </div>
-    <div style="background-color: #f7f7f7; padding: 15px; text-align: center; font-size: 12px; color: #999; border-top: 1px solid #ddd;">
-        If you don't recognize this, you may safely ignore it. If you have any additional questions or concerns, please visit our help center.
-    </div>
-</div></body></html>`
-	tmpl, err := template.New("email").Parse(templateString)
-	if err != nil {
-		return err
-	}
-	var buffer bytes.Buffer
-	if err := tmpl.Execute(&buffer, e); err != nil {
-		return err
-	}
-	emailBody := buffer.String()
-	return emailservice.SendEmail(
-		to,
-		fmt.Sprintf("You were invited to the organization %s on JanAI", e.OrgName),
-		emailBody,
-	)
-}
diff --git a/apps/jan-api-gateway/application/app/domain/mcp/mcp_service.go b/apps/jan-api-gateway/application/app/domain/mcp/mcp_service.go
deleted file mode 100644
index 2d164522..00000000
--- a/apps/jan-api-gateway/application/app/domain/mcp/mcp_service.go
+++ /dev/null
@@ -1,76 +0,0 @@
-package mcp
-
-import (
-	"reflect"
-	"strings"
-
-	mcpgo "github.com/mark3labs/mcp-go/mcp"
-)
-
-func ReflectToMCPOptions(description string, v interface{}) []mcpgo.ToolOption {
-	t := reflect.TypeOf(v)
-	if t.Kind() == reflect.Ptr {
-		t = t.Elem()
-	}
-
-	var opts []mcpgo.ToolOption
-	opts = append(opts, mcpgo.WithDescription(description))
-	for i := 0; i < t.NumField(); i++ {
-		f := t.Field(i)
-
-		// Get JSON field name
-		jsonTag := f.Tag.Get("json")
-		if jsonTag == "" || jsonTag == "-" {
-			continue
-		}
-		name := strings.Split(jsonTag, ",")[0]
-
-		// Parse jsonschema tag
-		jsSchema := f.Tag.Get("jsonschema")
-		required := strings.Contains(jsSchema, "required")
-		desc := extractDescription(jsSchema)
-
-		// Determine mcpgo arg type based on Go type
-		baseType := f.Type
-		if baseType.Kind() == reflect.Ptr {
-			baseType = baseType.Elem()
-		}
-
-		var arg mcpgo.ToolOption
-		switch baseType.Kind() {
-		case reflect.String:
-			if required {
-				arg = mcpgo.WithString(name, mcpgo.Required(), mcpgo.Description(desc))
-			} else {
-				arg = mcpgo.WithString(name, mcpgo.Description(desc))
-			}
-		case reflect.Int:
-			if required {
-				arg = mcpgo.WithNumber(name, mcpgo.Required(), mcpgo.Description(desc))
-			} else {
-				arg = mcpgo.WithNumber(name, mcpgo.Description(desc))
-			}
-		case reflect.Bool:
-			if required {
-				arg = mcpgo.WithBoolean(name, mcpgo.Required(), mcpgo.Description(desc))
-			} else {
-				arg = mcpgo.WithBoolean(name, mcpgo.Description(desc))
-			}
-		default:
-			continue
-		}
-		opts = append(opts, arg)
-	}
-
-	return opts
-}
-
-func extractDescription(tag string) string {
-	parts := strings.Split(tag, ",")
-	for _, p := range parts {
-		if strings.HasPrefix(p, "description=") {
-			return strings.TrimPrefix(p, "description=")
-		}
-	}
-	return ""
-}
diff --git a/apps/jan-api-gateway/application/app/domain/mcp/serpermcp/serper_domain.go b/apps/jan-api-gateway/application/app/domain/mcp/serpermcp/serper_domain.go
deleted file mode 100644
index 6c6ffdef..00000000
--- a/apps/jan-api-gateway/application/app/domain/mcp/serpermcp/serper_domain.go
+++ /dev/null
@@ -1,42 +0,0 @@
-package serpermcp
-
-type TBSTimeRange string
-
-const (
-	TBSAny       TBSTimeRange = ""
-	TBSPastHour  TBSTimeRange = "qdr:h"
-	TBSPastDay   TBSTimeRange = "qdr:d"
-	TBSPastWeek  TBSTimeRange = "qdr:w"
-	TBSPastMonth TBSTimeRange = "qdr:m"
-	TBSPastYear  TBSTimeRange = "qdr:y"
-)
-
-type SearchRequest struct {
-	Q           string        `json:"q"`
-	GL          *string       `json:"gl,omitempty"`
-	HL          *string       `json:"hl,omitempty"`
-	Location    *string       `json:"location,omitempty"`
-	Num         *int          `json:"num,omitempty"`
-	Page        *int          `json:"page,omitempty"`
-	Autocorrect *bool         `json:"autocorrect,omitempty"`
-	TBS         *TBSTimeRange `json:"tbs,omitempty"`
-}
-
-type SearchResponse struct {
-	SearchParameters map[string]interface{}   `json:"searchParameters"`
-	Organic          []map[string]interface{} `json:"organic"`
-	KnowledgeGraph   map[string]interface{}   `json:"knowledgeGraph,omitempty"`
-	Images           []map[string]interface{} `json:"images,omitempty"`
-	News             []map[string]interface{} `json:"news,omitempty"`
-	AnswerBox        map[string]interface{}   `json:"answerBox,omitempty"`
-}
-
-type FetchWebpageRequest struct {
-	Url             string `json:"url"`
-	IncludeMarkdown *bool  `json:"includeMarkdown,omitempty"`
-}
-
-type FetchWebpageResponse struct {
-	Text     string                 `json:"text"`
-	Metadata map[string]interface{} `json:"metadata"`
-}
diff --git a/apps/jan-api-gateway/application/app/domain/mcp/serpermcp/serper_service.go b/apps/jan-api-gateway/application/app/domain/mcp/serpermcp/serper_service.go
deleted file mode 100644
index e103b26b..00000000
--- a/apps/jan-api-gateway/application/app/domain/mcp/serpermcp/serper_service.go
+++ /dev/null
@@ -1,60 +0,0 @@
-package serpermcp
-
-import (
-	"context"
-
-	"menlo.ai/jan-api-gateway/app/utils/httpclients/serper"
-)
-
-type SerperService struct {
-	SerperClient *serper.SerperClient
-}
-
-func NewSerperService() *SerperService {
-	return &SerperService{
-		SerperClient: serper.NewSerperClient(),
-	}
-}
-
-func (s *SerperService) Search(ctx context.Context, query SearchRequest) (*SearchResponse, error) {
-	var tbs *serper.TBSTimeRange
-	request := serper.SearchRequest{
-		Q:           query.Q,
-		GL:          query.GL,
-		HL:          query.HL,
-		Location:    query.Location,
-		Num:         query.Num,
-		Page:        query.Page,
-		Autocorrect: query.Autocorrect,
-		TBS:         tbs,
-	}
-	resp, err := s.SerperClient.Search(ctx, request)
-	if err != nil {
-		return nil, err
-	}
-
-	return &SearchResponse{
-		SearchParameters: resp.SearchParameters,
-		Organic:          resp.Organic,
-		KnowledgeGraph:   resp.KnowledgeGraph,
-		Images:           resp.Images,
-		News:             resp.News,
-		AnswerBox:        resp.AnswerBox,
-	}, nil
-}
-
-func (s *SerperService) FetchWebpage(ctx context.Context, query FetchWebpageRequest) (*FetchWebpageResponse, error) {
-	request := serper.FetchWebpageRequest{
-		Url:             query.Url,
-		IncludeMarkdown: query.IncludeMarkdown,
-	}
-	resp, err := s.SerperClient.FetchWebpage(ctx, request)
-	if err != nil {
-		return nil, err
-	}
-
-	return &FetchWebpageResponse{
-		Text:     resp.Text,
-		Metadata: resp.Metadata,
-	}, nil
-}
diff --git a/apps/jan-api-gateway/application/app/domain/organization/organization.go b/apps/jan-api-gateway/application/app/domain/organization/organization.go
deleted file mode 100644
index c9915a80..00000000
--- a/apps/jan-api-gateway/application/app/domain/organization/organization.go
+++ /dev/null
@@ -1,55 +0,0 @@
-package organization
-
-import (
-	"context"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-)
-
-type Organization struct {
-	ID        uint
-	Name      string
-	PublicID  string
-	CreatedAt time.Time
-	UpdatedAt time.Time
-	Enabled   bool
-}
-
-type OrganizationMemberRole string
-
-const (
-	OrganizationMemberRoleOwner  OrganizationMemberRole = "owner"
-	OrganizationMemberRoleReader OrganizationMemberRole = "reader"
-)
-
-type OrganizationMember struct {
-	ID             uint
-	UserID         uint
-	OrganizationID uint
-	Role           OrganizationMemberRole
-	CreatedAt      time.Time
-}
-
-type OrganizationFilter struct {
-	PublicID *string
-	Enabled  *bool
-}
-
-type OrganizationMemberFilter struct {
-	UserID         *uint
-	OrganizationID *uint
-	Role           *string
-}
-
-type OrganizationRepository interface {
-	Create(ctx context.Context, o *Organization) error
-	Update(ctx context.Context, o *Organization) error
-	DeleteByID(ctx context.Context, id uint) error
-	FindByID(ctx context.Context, id uint) (*Organization, error)
-	FindByPublicID(ctx context.Context, publicID string) (*Organization, error)
-	FindByFilter(ctx context.Context, filter OrganizationFilter, pagination *query.Pagination) ([]*Organization, error)
-	Count(ctx context.Context, filter OrganizationFilter) (int64, error)
-	AddMember(ctx context.Context, m *OrganizationMember) error
-	FindMemberByFilter(ctx context.Context, filter OrganizationMemberFilter, pagination *query.Pagination) ([]*OrganizationMember, error)
-}
diff --git a/apps/jan-api-gateway/application/app/domain/organization/organization_service.go b/apps/jan-api-gateway/application/app/domain/organization/organization_service.go
deleted file mode 100644
index b2057dd4..00000000
--- a/apps/jan-api-gateway/application/app/domain/organization/organization_service.go
+++ /dev/null
@@ -1,146 +0,0 @@
-package organization
-
-import (
-	"context"
-	"fmt"
-	"sync"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-// OrganizationService provides business logic for managing organizations.
-type OrganizationService struct {
-	// The service has a dependency on the repository interface.
-	repo OrganizationRepository
-}
-
-// NewService is the constructor for OrganizationService.
-// It injects the repository dependency.
-func NewService(repo OrganizationRepository) *OrganizationService {
-	return &OrganizationService{
-		repo: repo,
-	}
-}
-
-var DEFAULT_ORGANIZATION_ONCE sync.Once
-var DEFAULT_ORGANIZATION *Organization
-
-func UpdateDefaultOrganization(o *Organization) {
-	DEFAULT_ORGANIZATION_ONCE.Do(func() {
-		DEFAULT_ORGANIZATION = o
-	})
-}
-
-func (s *OrganizationService) createPublicID() (string, error) {
-	return idgen.GenerateSecureID("org", 16)
-}
-
-// CreateOrganizationWithPublicID creates a new organization and automatically
-// assigns a unique public ID before saving it to the repository.
-func (s *OrganizationService) CreateOrganizationWithPublicID(ctx context.Context, o *Organization) (*Organization, error) {
-	publicID, err := s.createPublicID()
-	if err != nil {
-		return nil, err
-	}
-	o.PublicID = publicID
-	if err := s.repo.Create(ctx, o); err != nil {
-		return nil, err
-	}
-	return o, nil
-}
-
-// UpdateOrganization updates an existing organization.
-func (s *OrganizationService) UpdateOrganization(ctx context.Context, o *Organization) (*Organization, error) {
-	// Basic validation could be added here before calling the repository.
-	if o.ID == 0 {
-		return nil, fmt.Errorf("cannot update organization with an ID of 0")
-	}
-	if err := s.repo.Update(ctx, o); err != nil {
-		return nil, fmt.Errorf("failed to update organization: %w", err)
-	}
-	return o, nil
-}
-
-// DeleteOrganizationByID deletes an organization by its ID.
-func (s *OrganizationService) DeleteOrganizationByID(ctx context.Context, id uint) error {
-	if err := s.repo.DeleteByID(ctx, id); err != nil {
-		return fmt.Errorf("failed to delete organization by ID: %w", err)
-	}
-	return nil
-}
-
-// FindOrganizationByID finds an organization by its unique ID.
-func (s *OrganizationService) FindOrganizationByID(ctx context.Context, id uint) (*Organization, error) {
-	return s.repo.FindByID(ctx, id)
-}
-
-// FindOrganizationByPublicID finds an organization by its unique public ID.
-func (s *OrganizationService) FindOrganizationByPublicID(ctx context.Context, publicID string) (*Organization, error) {
-	return s.repo.FindByPublicID(ctx, publicID)
-}
-
-// FindOrganizations retrieves a list of organizations based on a filter and pagination.
-func (s *OrganizationService) FindOrganizations(ctx context.Context, filter OrganizationFilter, pagination *query.Pagination) ([]*Organization, error) {
-	return s.repo.FindByFilter(ctx, filter, pagination)
-}
-
-func (s *OrganizationService) FindOneByFilter(ctx context.Context, filter OrganizationFilter) (*Organization, error) {
-	orgEntities, err := s.repo.FindByFilter(ctx, filter, nil)
-	if err != nil {
-		return nil, err
-	}
-	if len(orgEntities) == 0 {
-		return nil, nil
-	}
-	if len(orgEntities) != 1 {
-		return nil, fmt.Errorf("no records found")
-	}
-	return orgEntities[0], nil
-}
-
-// CountOrganizations counts the number of organizations matching a given filter.
-func (s *OrganizationService) CountOrganizations(ctx context.Context, filter OrganizationFilter) (int64, error) {
-	return s.repo.Count(ctx, filter)
-}
-
-// CountOrganizations counts the number of organizations matching a given filter.
-func (s *OrganizationService) AddMember(ctx context.Context, m *OrganizationMember) error {
-	return s.repo.AddMember(ctx, m)
-}
-
-func (s *OrganizationService) FindMembersByFilter(ctx context.Context, f OrganizationMemberFilter, p *query.Pagination) ([]*OrganizationMember, error) {
-	return s.repo.FindMemberByFilter(ctx, f, p)
-}
-
-func (s *OrganizationService) FindOneMemberByFilter(ctx context.Context, f OrganizationMemberFilter) (*OrganizationMember, error) {
-	entities, err := s.repo.FindMemberByFilter(ctx, f, nil)
-	if err != nil {
-		return nil, err
-	}
-	if len(entities) == 0 {
-		return nil, nil
-	}
-	if len(entities) != 1 {
-		return nil, fmt.Errorf("no records")
-	}
-	return entities[0], err
-}
-
-func (s *OrganizationService) FindOrCreateDefaultOrganization(ctx context.Context) (*Organization, error) {
-	orgEntity, err := s.FindOneByFilter(ctx, OrganizationFilter{
-		Enabled: ptr.ToBool(true),
-	})
-	if err != nil {
-		return nil, err
-	}
-	if orgEntity != nil {
-		return orgEntity, nil
-	}
-
-	return s.CreateOrganizationWithPublicID(ctx, &Organization{
-		Name:    "Default Organization",
-		Enabled: true,
-	})
-}
diff --git a/apps/jan-api-gateway/application/app/domain/project/project.go b/apps/jan-api-gateway/application/app/domain/project/project.go
deleted file mode 100644
index a238862e..00000000
--- a/apps/jan-api-gateway/application/app/domain/project/project.go
+++ /dev/null
@@ -1,72 +0,0 @@
-package project
-
-import (
-	"context"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-)
-
-type Project struct {
-	ID             uint
-	Name           string
-	PublicID       string
-	Status         string
-	OrganizationID uint
-	CreatedAt      time.Time
-	UpdatedAt      time.Time
-	ArchivedAt     *time.Time
-	IsDefault      bool
-}
-
-type ProjectMember struct {
-	ID        uint
-	UserID    uint
-	ProjectID uint
-	Role      string
-}
-
-type ProjectFilter struct {
-	PublicID       *string
-	Status         *string
-	OrganizationID *uint
-	Archived       *bool
-	PublicIDs      *[]string
-	MemberID       *uint
-}
-
-type ProjectMemberFilter struct {
-	UserID    *uint
-	ProjectID *uint
-	Role      *string
-}
-
-type ProjectStatus string
-
-const (
-	ProjectStatusActive   ProjectStatus = "active"
-	ProjectStatusArchived ProjectStatus = "archived"
-)
-
-type ProjectMemberRole string
-
-const (
-	ProjectMemberRoleOwner  ProjectMemberRole = "owner"
-	ProjectMemberRoleMember ProjectMemberRole = "member"
-)
-
-type ProjectRepository interface {
-	Create(ctx context.Context, p *Project) error
-	Update(ctx context.Context, p *Project) error
-	DeleteByID(ctx context.Context, id uint) error
-
-	FindByID(ctx context.Context, id uint) (*Project, error)
-	FindByPublicID(ctx context.Context, publicID string) (*Project, error)
-	FindByFilter(ctx context.Context, filter ProjectFilter, p *query.Pagination) ([]*Project, error)
-	Count(ctx context.Context, filter ProjectFilter) (int64, error)
-
-	AddMember(ctx context.Context, m *ProjectMember) error
-	RemoveMember(ctx context.Context, projectID, userID uint) error
-	FindMembersByFilter(ctx context.Context, filter ProjectMemberFilter, p *query.Pagination) ([]*ProjectMember, error)
-	UpdateMemberRole(ctx context.Context, projectID, userID uint, role string) error
-}
diff --git a/apps/jan-api-gateway/application/app/domain/project/project_service.go b/apps/jan-api-gateway/application/app/domain/project/project_service.go
deleted file mode 100644
index 554e9fce..00000000
--- a/apps/jan-api-gateway/application/app/domain/project/project_service.go
+++ /dev/null
@@ -1,114 +0,0 @@
-package project
-
-import (
-	"context"
-	"fmt"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-)
-
-// ProjectService provides business logic for managing projects.
-type ProjectService struct {
-	// The service has a dependency on the repository interface.
-	repo ProjectRepository
-}
-
-// NewService is the constructor for ProjectService.
-// It injects the repository dependency.
-func NewService(repo ProjectRepository) *ProjectService {
-	return &ProjectService{
-		repo: repo,
-	}
-}
-
-func (s *ProjectService) createPublicID() (string, error) {
-	return idgen.GenerateSecureID("proj", 16)
-}
-
-// CreateProjectWithPublicID creates a new project and automatically
-// assigns a unique public ID before saving it to the repository.
-func (s *ProjectService) CreateProjectWithPublicID(ctx context.Context, p *Project) (*Project, error) {
-	publicID, err := s.createPublicID()
-	if err != nil {
-		return nil, err
-	}
-	p.PublicID = publicID
-
-	if err := s.repo.Create(ctx, p); err != nil {
-		return nil, fmt.Errorf("failed to create project in repository: %w", err)
-	}
-	return p, nil
-}
-
-// UpdateProject updates an existing project.
-func (s *ProjectService) UpdateProject(ctx context.Context, p *Project) (*Project, error) {
-	// Basic validation could be added here before calling the repository.
-	if p.ID == 0 {
-		return nil, fmt.Errorf("cannot update project with an ID of 0")
-	}
-	if err := s.repo.Update(ctx, p); err != nil {
-		return nil, fmt.Errorf("failed to update project: %w", err)
-	}
-	return p, nil
-}
-
-// DeleteProjectByID deletes a project by its ID.
-func (s *ProjectService) DeleteProjectByID(ctx context.Context, id uint) error {
-	if err := s.repo.DeleteByID(ctx, id); err != nil {
-		return fmt.Errorf("failed to delete project by ID: %w", err)
-	}
-	return nil
-}
-
-// FindProjectByID finds a project by its unique ID.
-func (s *ProjectService) FindProjectByID(ctx context.Context, id uint) (*Project, error) {
-	return s.repo.FindByID(ctx, id)
-}
-
-// FindProjectByPublicID finds a project by its unique public ID.
-func (s *ProjectService) FindProjectByPublicID(ctx context.Context, publicID string) (*Project, error) {
-	return s.repo.FindByPublicID(ctx, publicID)
-}
-
-// FindProjects retrieves a list of projects based on a filter and pagination.
-func (s *ProjectService) Find(ctx context.Context, filter ProjectFilter, pagination *query.Pagination) ([]*Project, error) {
-	return s.repo.FindByFilter(ctx, filter, pagination)
-}
-
-func (s *ProjectService) FindOne(ctx context.Context, filter ProjectFilter) (*Project, error) {
-	projectEntities, err := s.repo.FindByFilter(ctx, filter, nil)
-	if err != nil {
-		return nil, err
-	}
-	if len(projectEntities) == 0 {
-		return nil, nil
-	}
-	if len(projectEntities) != 1 {
-		return nil, err
-	}
-	return projectEntities[0], nil
-}
-
-func (s *ProjectService) FindOneMemberByFilter(ctx context.Context, filter ProjectMemberFilter) (*ProjectMember, error) {
-	memberEntities, err := s.repo.FindMembersByFilter(ctx, filter, nil)
-	if err != nil {
-		return nil, err
-	}
-	if len(memberEntities) == 0 {
-		return nil, nil
-	}
-	if len(memberEntities) != 1 {
-		return nil, err
-	}
-	return memberEntities[0], nil
-}
-
-// CountProjects counts the number of projects matching a given filter.
-func (s *ProjectService) CountProjects(ctx context.Context, filter ProjectFilter) (int64, error) {
-	return s.repo.Count(ctx, filter)
-}
-
-func (s *ProjectService) AddMember(ctx context.Context, member *ProjectMember) error {
-	return s.repo.AddMember(ctx, member)
-}
diff --git a/apps/jan-api-gateway/application/app/domain/query/query.go b/apps/jan-api-gateway/application/app/domain/query/query.go
deleted file mode 100644
index bcb66f17..00000000
--- a/apps/jan-api-gateway/application/app/domain/query/query.go
+++ /dev/null
@@ -1,64 +0,0 @@
-package query
-
-import (
-	"fmt"
-	"strconv"
-
-	"github.com/gin-gonic/gin"
-)
-
-type Pagination struct {
-	Limit  *int
-	Offset *int
-	After  *uint
-	Order  string
-}
-
-func GetCursorPaginationFromQuery(reqCtx *gin.Context, findByLastID func(string) (*uint, error)) (*Pagination, error) {
-	limitStr := reqCtx.DefaultQuery("limit", "20")
-	offsetStr := reqCtx.Query("offset")
-	order := reqCtx.DefaultQuery("order", "asc")
-	lastStr := reqCtx.DefaultQuery("last", "")
-
-	var limit *int
-	if limitStr != "" {
-		limitInt, err := strconv.Atoi(limitStr)
-		if err != nil || limitInt < 1 {
-			return nil, fmt.Errorf("invalid limit number")
-		}
-		limit = &limitInt
-	}
-
-	var offset *int
-	var after *uint
-	if offsetStr != "" {
-		offsetInt, err := strconv.Atoi(offsetStr)
-		if err != nil {
-			return nil, fmt.Errorf("invalid offset number")
-		}
-		offset = &offsetInt
-	} else if lastStr != "" {
-		lastID, err := findByLastID(lastStr)
-		if err != nil {
-			return nil, fmt.Errorf("invalid offset number")
-		}
-		after = lastID
-	}
-
-	if order != "asc" && order != "desc" {
-		return nil, fmt.Errorf("invalid order")
-	}
-
-	return &Pagination{
-		Limit:  limit,
-		Offset: offset,
-		Order:  order,
-		After:  after,
-	}, nil
-}
-
-func GetPaginationFromQuery(reqCtx *gin.Context) (*Pagination, error) {
-	return GetCursorPaginationFromQuery(reqCtx, func(s string) (*uint, error) {
-		return nil, fmt.Errorf("invalid query parameter: last")
-	})
-}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response.go b/apps/jan-api-gateway/application/app/domain/response/response.go
deleted file mode 100644
index f6bae5d8..00000000
--- a/apps/jan-api-gateway/application/app/domain/response/response.go
+++ /dev/null
@@ -1,312 +0,0 @@
-package response
-
-import (
-	"context"
-	"encoding/json"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-)
-
-// Response represents a model response stored in the database
-type Response struct {
-	ID                 uint
-	PublicID           string
-	UserID             uint
-	ConversationID     *uint
-	PreviousResponseID *string // Public ID of the previous response
-	Model              string
-	Status             ResponseStatus
-	Input              string  // JSON string of the input
-	Output             *string // JSON string of the output
-	SystemPrompt       *string
-	MaxTokens          *int
-	Temperature        *float64
-	TopP               *float64
-	TopK               *int
-	RepetitionPenalty  *float64
-	Seed               *int
-	Stop               *string // JSON string of stop sequences
-	PresencePenalty    *float64
-	FrequencyPenalty   *float64
-	LogitBias          *string // JSON string of logit bias
-	ResponseFormat     *string // JSON string of response format
-	Tools              *string // JSON string of tools
-	ToolChoice         *string // JSON string of tool choice
-	Metadata           *string // JSON string of metadata
-	Stream             *bool
-	Background         *bool
-	Timeout            *int
-	User               *string
-	Usage              *string // JSON string of usage statistics
-	Error              *string // JSON string of error details
-	CompletedAt        *time.Time
-	CancelledAt        *time.Time
-	FailedAt           *time.Time
-	CreatedAt          time.Time
-	UpdatedAt          time.Time
-	Items              []conversation.Item // Items that belong to this response
-}
-
-// ResponseStatus represents the status of a response
-type ResponseStatus string
-
-const (
-	ResponseStatusPending   ResponseStatus = "pending"
-	ResponseStatusRunning   ResponseStatus = "running"
-	ResponseStatusCompleted ResponseStatus = "completed"
-	ResponseStatusCancelled ResponseStatus = "cancelled"
-	ResponseStatusFailed    ResponseStatus = "failed"
-)
-
-// ResponseFilter represents filters for querying responses
-type ResponseFilter struct {
-	PublicID       *string
-	UserID         *uint
-	ConversationID *uint
-	Model          *string
-	Status         *ResponseStatus
-	CreatedAfter   *time.Time
-	CreatedBefore  *time.Time
-}
-
-// ResponseRepository defines the interface for response data operations
-type ResponseRepository interface {
-	Create(ctx context.Context, r *Response) error
-	Update(ctx context.Context, r *Response) error
-	DeleteByID(ctx context.Context, id uint) error
-	FindByID(ctx context.Context, id uint) (*Response, error)
-	FindByPublicID(ctx context.Context, publicID string) (*Response, error)
-	FindByFilter(ctx context.Context, filter ResponseFilter, pagination *query.Pagination) ([]*Response, error)
-	Count(ctx context.Context, filter ResponseFilter) (int64, error)
-	FindByUserID(ctx context.Context, userID uint, pagination *query.Pagination) ([]*Response, error)
-	FindByConversationID(ctx context.Context, conversationID uint, pagination *query.Pagination) ([]*Response, error)
-}
-
-// ResponseParams represents parameters for creating a response
-type ResponseParams struct {
-	MaxTokens         *int
-	Temperature       *float64
-	TopP              *float64
-	TopK              *int
-	RepetitionPenalty *float64
-	Seed              *int
-	Stop              []string
-	PresencePenalty   *float64
-	FrequencyPenalty  *float64
-	LogitBias         map[string]float64
-	ResponseFormat    any
-	Tools             any
-	ToolChoice        any
-	Metadata          map[string]any
-	Stream            *bool
-	Background        *bool
-	Timeout           *int
-	User              *string
-}
-
-// NewResponse creates a new Response object with the given parameters
-func NewResponse(userID uint, conversationID *uint, model, input string, systemPrompt *string, params *ResponseParams) *Response {
-	response := &Response{
-		UserID:         userID,
-		ConversationID: conversationID,
-		Model:          model,
-		Input:          input,
-		SystemPrompt:   systemPrompt,
-		Status:         ResponseStatusPending,
-	}
-
-	// Apply response parameters
-	if params != nil {
-		response.MaxTokens = params.MaxTokens
-		response.Temperature = params.Temperature
-		response.TopP = params.TopP
-		response.TopK = params.TopK
-		response.RepetitionPenalty = params.RepetitionPenalty
-		response.Seed = params.Seed
-		response.PresencePenalty = params.PresencePenalty
-		response.FrequencyPenalty = params.FrequencyPenalty
-		response.Stream = params.Stream
-		response.Background = params.Background
-		response.Timeout = params.Timeout
-		response.User = params.User
-
-		// Convert complex fields to JSON strings
-		if params.Stop != nil {
-			if stopJSON, err := json.Marshal(params.Stop); err == nil {
-				stopStr := string(stopJSON)
-				if stopStr != "[]" && stopStr != "{}" {
-					response.Stop = &stopStr
-				}
-			}
-		}
-
-		if params.LogitBias != nil {
-			if logitBiasJSON, err := json.Marshal(params.LogitBias); err == nil {
-				logitBiasStr := string(logitBiasJSON)
-				if logitBiasStr != "[]" && logitBiasStr != "{}" {
-					response.LogitBias = &logitBiasStr
-				}
-			}
-		}
-
-		if params.ResponseFormat != nil {
-			if responseFormatJSON, err := json.Marshal(params.ResponseFormat); err == nil {
-				responseFormatStr := string(responseFormatJSON)
-				if responseFormatStr != "[]" && responseFormatStr != "{}" {
-					response.ResponseFormat = &responseFormatStr
-				}
-			}
-		}
-
-		if params.Tools != nil {
-			if toolsJSON, err := json.Marshal(params.Tools); err == nil {
-				toolsStr := string(toolsJSON)
-				if toolsStr != "[]" && toolsStr != "{}" {
-					response.Tools = &toolsStr
-				}
-			}
-		}
-
-		if params.ToolChoice != nil {
-			if toolChoiceJSON, err := json.Marshal(params.ToolChoice); err == nil {
-				toolChoiceStr := string(toolChoiceJSON)
-				if toolChoiceStr != "[]" && toolChoiceStr != "{}" {
-					response.ToolChoice = &toolChoiceStr
-				}
-			}
-		}
-
-		if params.Metadata != nil {
-			if metadataJSON, err := json.Marshal(params.Metadata); err == nil {
-				metadataStr := string(metadataJSON)
-				if metadataStr != "[]" && metadataStr != "{}" {
-					response.Metadata = &metadataStr
-				}
-			}
-		}
-	}
-
-	return response
-}
-
-// ResponseUpdates represents multiple updates to be applied to a response
-type ResponseUpdates struct {
-	Status *string `json:"status,omitempty"`
-	Output any     `json:"output,omitempty"`
-	Usage  any     `json:"usage,omitempty"`
-	Error  any     `json:"error,omitempty"`
-}
-
-// ApplyResponseUpdates applies multiple updates to a response object (no DB access)
-func ApplyResponseUpdates(response *Response, updates *ResponseUpdates) *common.Error {
-	// Update status if provided
-	if updates.Status != nil {
-		UpdateResponseStatusOnObject(response, ResponseStatus(*updates.Status))
-	}
-
-	// Update output if provided
-	if updates.Output != nil {
-		if err := UpdateResponseOutputOnObject(response, updates.Output); err != nil {
-			return err
-		}
-	}
-
-	// Update usage if provided
-	if updates.Usage != nil {
-		if err := UpdateResponseUsageOnObject(response, updates.Usage); err != nil {
-			return err
-		}
-	}
-
-	// Update error if provided
-	if updates.Error != nil {
-		if err := UpdateResponseErrorOnObject(response, updates.Error); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-// UpdateResponseStatusOnObject updates the status on a response object (no DB access)
-func UpdateResponseStatusOnObject(response *Response, status ResponseStatus) {
-	response.Status = status
-	response.UpdatedAt = time.Now()
-
-	// Set completion timestamps based on status
-	now := time.Now()
-	switch status {
-	case ResponseStatusCompleted:
-		response.CompletedAt = &now
-	case ResponseStatusCancelled:
-		response.CancelledAt = &now
-	case ResponseStatusFailed:
-		response.FailedAt = &now
-	}
-}
-
-// UpdateResponseOutputOnObject updates the output on a response object (no DB access)
-func UpdateResponseOutputOnObject(response *Response, output any) *common.Error {
-	// Convert output to JSON string
-	outputJSON, err := json.Marshal(output)
-	if err != nil {
-		return common.NewError(err, "s9t0u1v2-w3x4-5678-stuv-901234567890")
-	}
-
-	outputStr := string(outputJSON)
-	// For JSON columns, use null for empty arrays/objects
-	if outputStr == "[]" || outputStr == "{}" {
-		response.Output = nil
-	} else {
-		response.Output = &outputStr
-	}
-	response.UpdatedAt = time.Now()
-
-	return nil
-}
-
-// UpdateResponseUsageOnObject updates the usage statistics on a response object (no DB access)
-func UpdateResponseUsageOnObject(response *Response, usage any) *common.Error {
-	// Convert usage to JSON string
-	usageJSON, err := json.Marshal(usage)
-	if err != nil {
-		return common.NewError(err, "w3x4y5z6-a7b8-9012-wxyz-345678901234")
-	}
-
-	usageStr := string(usageJSON)
-	// For JSON columns, use null for empty arrays/objects
-	if usageStr == "[]" || usageStr == "{}" {
-		response.Usage = nil
-	} else {
-		response.Usage = &usageStr
-	}
-	response.UpdatedAt = time.Now()
-
-	return nil
-}
-
-// UpdateResponseErrorOnObject updates the error information on a response object (no DB access)
-func UpdateResponseErrorOnObject(response *Response, error any) *common.Error {
-	// Convert error to JSON string
-	errorJSON, err := json.Marshal(error)
-	if err != nil {
-		return common.NewError(err, "a7b8c9d0-e1f2-3456-abcd-789012345678")
-	}
-
-	errorStr := string(errorJSON)
-	// For JSON columns, use null for empty arrays/objects
-	if errorStr == "[]" || errorStr == "{}" {
-		response.Error = nil
-	} else {
-		response.Error = &errorStr
-	}
-	response.Status = ResponseStatusFailed
-	response.UpdatedAt = time.Now()
-	now := time.Now()
-	response.FailedAt = &now
-
-	return nil
-}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_model_nonstream_service.go b/apps/jan-api-gateway/application/app/domain/response/response_model_nonstream_service.go
deleted file mode 100644
index cfdbfcc7..00000000
--- a/apps/jan-api-gateway/application/app/domain/response/response_model_nonstream_service.go
+++ /dev/null
@@ -1,213 +0,0 @@
-package response
-
-import (
-	"context"
-	"net/http"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
-	responsetypes "menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-const (
-	// DefaultTimeout is the default timeout for non-streaming requests
-	DefaultTimeout = 120 * time.Second
-)
-
-// NonStreamModelService handles non-streaming response requests
-type NonStreamModelService struct {
-	*ResponseModelService
-}
-
-// NewNonStreamModelService creates a new NonStreamModelService instance
-func NewNonStreamModelService(responseModelService *ResponseModelService) *NonStreamModelService {
-	return &NonStreamModelService{
-		ResponseModelService: responseModelService,
-	}
-}
-
-// CreateNonStreamResponse handles the business logic for creating a non-streaming response
-func (h *NonStreamModelService) CreateNonStreamResponseHandler(reqCtx *gin.Context, request *requesttypes.CreateResponseRequest, key string, conv *conversation.Conversation, responseEntity *Response, chatCompletionRequest *openai.ChatCompletionRequest) {
-
-	result, err := h.CreateNonStreamResponse(reqCtx, request, key, conv, responseEntity, chatCompletionRequest)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(
-			http.StatusBadRequest,
-			responsetypes.ErrorResponse{
-				Code:  err.GetCode(),
-				Error: err.Error(),
-			})
-		return
-	}
-
-	reqCtx.JSON(http.StatusOK, result)
-}
-
-// doCreateNonStreamResponse performs the business logic for creating a non-streaming response
-func (h *NonStreamModelService) CreateNonStreamResponse(reqCtx *gin.Context, request *requesttypes.CreateResponseRequest, key string, conv *conversation.Conversation, responseEntity *Response, chatCompletionRequest *openai.ChatCompletionRequest) (responsetypes.Response, *common.Error) {
-	// Process with Jan inference client for non-streaming with timeout
-	janInferenceClient := janinference.NewJanInferenceClient(reqCtx)
-	ctx, cancel := context.WithTimeout(reqCtx.Request.Context(), DefaultTimeout)
-	defer cancel()
-	chatResponse, err := janInferenceClient.CreateChatCompletion(ctx, key, *chatCompletionRequest)
-	if err != nil {
-		return responsetypes.Response{}, common.NewError(err, "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4")
-	}
-
-	// Process reasoning content
-	var processedResponse *openai.ChatCompletionResponse = chatResponse
-
-	// Append assistant's response to conversation (only if conversation exists)
-	if conv != nil && len(processedResponse.Choices) > 0 && processedResponse.Choices[0].Message.Content != "" {
-		assistantMessage := openai.ChatCompletionMessage{
-			Role:    openai.ChatMessageRoleAssistant,
-			Content: processedResponse.Choices[0].Message.Content,
-		}
-		success, err := h.responseService.AppendMessagesToConversation(reqCtx, conv, []openai.ChatCompletionMessage{assistantMessage}, &responseEntity.ID)
-		if !success {
-			// Log error but don't fail the response
-			logger.GetLogger().Errorf("Failed to append assistant response to conversation: %s - %s", err.GetCode(), err.Error())
-		}
-	}
-
-	// Convert chat completion response to response format
-	responseData := h.convertFromChatCompletionResponse(processedResponse, request, conv, responseEntity)
-
-	// Update response with all fields at once (optimized to prevent N+1 queries)
-	updates := &ResponseUpdates{
-		Status: ptr.ToString(string(ResponseStatusCompleted)),
-		Output: responseData.Output,
-		Usage:  responseData.Usage,
-	}
-	success, updateErr := h.responseService.UpdateResponseFields(reqCtx, responseEntity.ID, updates)
-	if !success {
-		// Log error but don't fail the request since response is already generated
-		logger.GetLogger().Errorf("Failed to update response fields: %s - %s\n", updateErr.GetCode(), updateErr.Error())
-	}
-
-	return responseData, nil
-}
-
-// convertFromChatCompletionResponse converts a ChatCompletionResponse to a Response
-func (h *NonStreamModelService) convertFromChatCompletionResponse(chatResp *openai.ChatCompletionResponse, req *requesttypes.CreateResponseRequest, conv *conversation.Conversation, responseEntity *Response) responsetypes.Response {
-
-	// Extract the content and reasoning from the first choice
-	var outputText string
-	var reasoningContent string
-
-	if len(chatResp.Choices) > 0 {
-		choice := chatResp.Choices[0]
-		outputText = choice.Message.Content
-
-		// Extract reasoning content if present
-		if choice.Message.ReasoningContent != "" {
-			reasoningContent = choice.Message.ReasoningContent
-		}
-	}
-
-	// Convert input back to the original format for response
-	var responseInput any
-	switch v := req.Input.(type) {
-	case string:
-		responseInput = v
-	case []any:
-		responseInput = v
-	default:
-		responseInput = req.Input
-	}
-
-	// Create output using proper ResponseOutput structure
-	var output []responsetypes.ResponseOutput
-
-	// Add reasoning content if present
-	if reasoningContent != "" {
-		output = append(output, responsetypes.ResponseOutput{
-			Type: responsetypes.OutputTypeReasoning,
-			Reasoning: &responsetypes.ReasoningOutput{
-				Task:   "reasoning",
-				Result: reasoningContent,
-				Steps:  []responsetypes.ReasoningStep{},
-			},
-		})
-	}
-
-	// Add text content if present
-	if outputText != "" {
-		output = append(output, responsetypes.ResponseOutput{
-			Type: responsetypes.OutputTypeText,
-			Text: &responsetypes.TextOutput{
-				Value:       outputText,
-				Annotations: []responsetypes.Annotation{},
-			},
-		})
-	}
-
-	// Create usage information using proper DetailedUsage struct
-	usage := &responsetypes.DetailedUsage{
-		InputTokens:  chatResp.Usage.PromptTokens,
-		OutputTokens: chatResp.Usage.CompletionTokens,
-		TotalTokens:  chatResp.Usage.TotalTokens,
-		InputTokensDetails: &responsetypes.TokenDetails{
-			CachedTokens: 0,
-		},
-		OutputTokensDetails: &responsetypes.TokenDetails{
-			ReasoningTokens: 0,
-		},
-	}
-
-	// Create conversation info
-	var conversationInfo *responsetypes.ConversationInfo
-	if conv != nil {
-		conversationInfo = &responsetypes.ConversationInfo{
-			ID: conv.PublicID,
-		}
-	}
-
-	response := responsetypes.Response{
-		ID:           responseEntity.PublicID,
-		Object:       "response",
-		Created:      chatResp.Created,
-		Model:        chatResp.Model,
-		Status:       responsetypes.ResponseStatusCompleted,
-		Input:        responseInput,
-		Output:       output,
-		Usage:        usage,
-		Conversation: conversationInfo,
-		// Add other OpenAI response fields
-		Error:              nil,
-		IncompleteDetails:  nil,
-		Instructions:       nil,
-		MaxOutputTokens:    req.MaxTokens,
-		ParallelToolCalls:  false,
-		PreviousResponseID: nil,
-		Reasoning: &responsetypes.Reasoning{
-			Effort: nil,
-			Summary: func() *string {
-				if reasoningContent != "" {
-					return &reasoningContent
-				}
-				return nil
-			}(),
-		},
-		Store:       true,
-		Temperature: req.Temperature,
-		Text: &responsetypes.TextFormat{
-			Format: &responsetypes.FormatType{
-				Type: "text",
-			},
-		},
-		TopP:       req.TopP,
-		Truncation: "disabled",
-		User:       nil,
-		Metadata:   req.Metadata,
-	}
-
-	return response
-}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_model_service.go b/apps/jan-api-gateway/application/app/domain/response/response_model_service.go
deleted file mode 100644
index 119a2453..00000000
--- a/apps/jan-api-gateway/application/app/domain/response/response_model_service.go
+++ /dev/null
@@ -1,399 +0,0 @@
-package response
-
-import (
-	"context"
-	"encoding/json"
-	"net/http"
-	"strconv"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/apikey"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	inferencemodelregistry "menlo.ai/jan-api-gateway/app/domain/inference_model_registry"
-	"menlo.ai/jan-api-gateway/app/domain/user"
-	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
-	responsetypes "menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-// ResponseCreationResult represents the result of creating a response
-type ResponseCreationResult struct {
-	Response              *Response
-	Conversation          *conversation.Conversation
-	ChatCompletionRequest *openai.ChatCompletionRequest
-	APIKey                string
-	IsStreaming           bool
-}
-
-// ResponseModelService handles the business logic for response API endpoints
-type ResponseModelService struct {
-	UserService           *user.UserService
-	authService           *auth.AuthService
-	apikeyService         *apikey.ApiKeyService
-	conversationService   *conversation.ConversationService
-	responseService       *ResponseService
-	streamModelService    *StreamModelService
-	nonStreamModelService *NonStreamModelService
-	modelRegistry         *inferencemodelregistry.InferenceModelRegistry
-}
-
-// NewResponseModelService creates a new ResponseModelService instance
-func NewResponseModelService(
-	userService *user.UserService,
-	authService *auth.AuthService,
-	apikeyService *apikey.ApiKeyService,
-	conversationService *conversation.ConversationService,
-	responseService *ResponseService,
-	modelRegistry *inferencemodelregistry.InferenceModelRegistry,
-) *ResponseModelService {
-	responseModelService := &ResponseModelService{
-		UserService:         userService,
-		authService:         authService,
-		apikeyService:       apikeyService,
-		conversationService: conversationService,
-		responseService:     responseService,
-		modelRegistry:       modelRegistry,
-	}
-
-	// Initialize specialized handlers
-	responseModelService.streamModelService = NewStreamModelService(responseModelService)
-	responseModelService.nonStreamModelService = NewNonStreamModelService(responseModelService)
-
-	return responseModelService
-}
-
-// CreateResponse handles the business logic for creating a response
-// Returns domain objects and business logic results, no HTTP concerns
-func (h *ResponseModelService) CreateResponse(ctx context.Context, userID uint, request *requesttypes.CreateResponseRequest) (*ResponseCreationResult, *common.Error) {
-	// Validate the request
-	success, err := ValidateCreateResponseRequest(request)
-	if !success {
-		return nil, err
-	}
-
-	// TODO add the logic to get the API key for the user
-	key := ""
-
-	// Check if model exists in registry
-	mToE := h.modelRegistry.GetModelToEndpoints(ctx)
-	endpoints, ok := mToE[request.Model]
-	if !ok {
-		return nil, common.NewErrorWithMessage("Model validation error", "h8i9j0k1-l2m3-4567-hijk-890123456789")
-	}
-
-	// Convert response request to chat completion request using domain service
-	chatCompletionRequest := h.responseService.ConvertToChatCompletionRequest(request)
-	if chatCompletionRequest == nil {
-		return nil, common.NewErrorWithMessage("Input validation error", "i9j0k1l2-m3n4-5678-ijkl-901234567890")
-	}
-
-	// Check if model endpoint exists
-	janInferenceClient := janinference.NewJanInferenceClient(ctx)
-	endpointExists := false
-	for _, endpoint := range endpoints {
-		if endpoint == janInferenceClient.BaseURL {
-			endpointExists = true
-			break
-		}
-	}
-
-	if !endpointExists {
-		return nil, common.NewErrorWithMessage("Model validation error", "h8i9j0k1-l2m3-4567-hijk-890123456789")
-	}
-
-	// Handle conversation logic using domain service
-	conversation, err := h.responseService.HandleConversation(ctx, userID, request)
-	if err != nil {
-		return nil, err
-	}
-
-	// If previous_response_id is provided, prepend conversation history to input messages
-	if request.PreviousResponseID != nil && *request.PreviousResponseID != "" {
-		conversationMessages, err := h.responseService.ConvertConversationItemsToMessages(ctx, conversation)
-		if err != nil {
-			return nil, err
-		}
-		// Prepend conversation history to the input messages
-		chatCompletionRequest.Messages = append(conversationMessages, chatCompletionRequest.Messages...)
-	}
-
-	// Create response parameters
-	responseParams := &ResponseParams{
-		MaxTokens:         request.MaxTokens,
-		Temperature:       request.Temperature,
-		TopP:              request.TopP,
-		TopK:              request.TopK,
-		RepetitionPenalty: request.RepetitionPenalty,
-		Seed:              request.Seed,
-		Stop:              request.Stop,
-		PresencePenalty:   request.PresencePenalty,
-		FrequencyPenalty:  request.FrequencyPenalty,
-		LogitBias:         request.LogitBias,
-		ResponseFormat:    request.ResponseFormat,
-		Metadata:          request.Metadata,
-		Stream:            request.Stream,
-		Background:        request.Background,
-		Timeout:           request.Timeout,
-		User:              request.User,
-	}
-
-	// Create response record in database
-	var conversationID *uint
-	if conversation != nil {
-		conversationID = &conversation.ID
-	}
-
-	// Convert input to JSON string
-	inputJSON, jsonErr := json.Marshal(request.Input)
-	if jsonErr != nil {
-		return nil, common.NewError(jsonErr, "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
-	}
-
-	// Build Response object from parameters
-	response := NewResponse(userID, conversationID, request.Model, string(inputJSON), request.SystemPrompt, responseParams)
-
-	responseEntity, err := h.responseService.CreateResponse(ctx, response)
-	if err != nil {
-		return nil, err
-	}
-
-	// Append input messages to conversation (only if conversation exists)
-	if conversation != nil {
-		success, err := h.responseService.AppendMessagesToConversation(ctx, conversation, chatCompletionRequest.Messages, &responseEntity.ID)
-		if !success {
-			return nil, err
-		}
-	}
-
-	// Return the result for the interface layer to handle
-	isStreaming := request.Stream != nil && *request.Stream
-	return &ResponseCreationResult{
-		Response:              responseEntity,
-		Conversation:          conversation,
-		ChatCompletionRequest: chatCompletionRequest,
-		APIKey:                key,
-		IsStreaming:           isStreaming,
-	}, nil
-}
-
-// handleConversation handles conversation creation or loading based on the request
-
-// GetResponse handles the business logic for getting a response
-func (h *ResponseModelService) GetResponseHandler(reqCtx *gin.Context) {
-	// Get response from middleware context
-	responseEntity, ok := GetResponseFromContext(reqCtx)
-	if !ok {
-		h.sendErrorResponse(reqCtx, http.StatusBadRequest, "a1b2c3d4-e5f6-7890-abcd-ef1234567890", "response not found in context")
-		return
-	}
-
-	result, err := h.GetResponse(responseEntity)
-	if err != nil {
-		h.sendErrorResponse(reqCtx, http.StatusBadRequest, err.GetCode(), err.Error())
-		return
-	}
-
-	h.sendSuccessResponse(reqCtx, result)
-}
-
-// doGetResponse performs the business logic for getting a response
-func (h *ResponseModelService) GetResponse(responseEntity *Response) (responsetypes.Response, *common.Error) {
-	// Convert domain response to API response using domain service
-	apiResponse := h.responseService.ConvertDomainResponseToAPIResponse(responseEntity)
-	return apiResponse, nil
-}
-
-// DeleteResponse handles the business logic for deleting a response
-func (h *ResponseModelService) DeleteResponseHandler(reqCtx *gin.Context) {
-	// Get response from middleware context
-	responseEntity, ok := GetResponseFromContext(reqCtx)
-	if !ok {
-		h.sendErrorResponse(reqCtx, http.StatusBadRequest, "b2c3d4e5-f6g7-8901-bcde-f23456789012", "response not found in context")
-		return
-	}
-
-	result, err := h.DeleteResponse(reqCtx, responseEntity)
-	if err != nil {
-		h.sendErrorResponse(reqCtx, http.StatusBadRequest, err.GetCode(), err.Error())
-		return
-	}
-
-	h.sendSuccessResponse(reqCtx, result)
-}
-
-// doDeleteResponse performs the business logic for deleting a response
-func (h *ResponseModelService) DeleteResponse(reqCtx *gin.Context, responseEntity *Response) (responsetypes.Response, *common.Error) {
-	// Delete the response from database
-	success, err := h.responseService.DeleteResponse(reqCtx, responseEntity.ID)
-	if !success {
-		return responsetypes.Response{}, err
-	}
-
-	// Return the deleted response data
-	deletedResponse := responsetypes.Response{
-		ID:          responseEntity.PublicID,
-		Object:      "response",
-		Created:     responseEntity.CreatedAt.Unix(),
-		Model:       responseEntity.Model,
-		Status:      responsetypes.ResponseStatusCancelled,
-		CancelledAt: ptr.ToInt64(time.Now().Unix()),
-	}
-
-	return deletedResponse, nil
-}
-
-// CancelResponse handles the business logic for cancelling a response
-func (h *ResponseModelService) CancelResponseHandler(reqCtx *gin.Context) {
-	// Get response from middleware context
-	responseEntity, ok := GetResponseFromContext(reqCtx)
-	if !ok {
-		h.sendErrorResponse(reqCtx, http.StatusBadRequest, "d4e5f6g7-h8i9-0123-defg-456789012345", "response not found in context")
-		return
-	}
-
-	result, err := h.CancelResponse(responseEntity)
-	if err != nil {
-		h.sendErrorResponse(reqCtx, http.StatusBadRequest, err.GetCode(), err.Error())
-		return
-	}
-
-	h.sendSuccessResponse(reqCtx, result)
-}
-
-// doCancelResponse performs the business logic for cancelling a response
-func (h *ResponseModelService) CancelResponse(responseEntity *Response) (responsetypes.Response, *common.Error) {
-	// TODO: Implement actual cancellation logic
-	// For now, return the response with cancelled status
-	mockResponse := responsetypes.Response{
-		ID:          responseEntity.PublicID,
-		Object:      "response",
-		Created:     responseEntity.CreatedAt.Unix(),
-		Model:       responseEntity.Model,
-		Status:      responsetypes.ResponseStatusCancelled,
-		CancelledAt: ptr.ToInt64(time.Now().Unix()),
-	}
-
-	return mockResponse, nil
-}
-
-// ListInputItems handles the business logic for listing input items
-func (h *ResponseModelService) ListInputItemsHandler(reqCtx *gin.Context) {
-	// Get response from middleware context
-	responseEntity, ok := GetResponseFromContext(reqCtx)
-	if !ok {
-		h.sendErrorResponse(reqCtx, http.StatusBadRequest, "e5f6g7h8-i9j0-1234-efgh-567890123456", "response not found in context")
-		return
-	}
-
-	result, err := h.ListInputItems(reqCtx, responseEntity)
-	if err != nil {
-		h.sendErrorResponse(reqCtx, http.StatusBadRequest, err.GetCode(), err.Error())
-		return
-	}
-
-	reqCtx.JSON(http.StatusOK, result)
-}
-
-// doListInputItems performs the business logic for listing input items
-func (h *ResponseModelService) ListInputItems(reqCtx *gin.Context, responseEntity *Response) (responsetypes.OpenAIListResponse[responsetypes.InputItem], *common.Error) {
-	// Parse pagination parameters
-	limit := 20 // default limit
-	if limitStr := reqCtx.Query("limit"); limitStr != "" {
-		if parsedLimit, err := strconv.Atoi(limitStr); err == nil && parsedLimit > 0 && parsedLimit <= 100 {
-			limit = parsedLimit
-		}
-	}
-
-	// Get input items for the response (only user role messages)
-	userRole := conversation.ItemRole("user")
-	items, err := h.responseService.GetItemsForResponse(reqCtx, responseEntity.ID, &userRole)
-	if err != nil {
-		return responsetypes.OpenAIListResponse[responsetypes.InputItem]{}, err
-	}
-
-	// Convert conversation items to input items using domain service
-	inputItems := make([]responsetypes.InputItem, 0, len(items))
-	for _, item := range items {
-		inputItem := h.responseService.ConvertConversationItemToInputItem(item)
-		inputItems = append(inputItems, inputItem)
-	}
-
-	// Apply pagination (simple implementation - in production you'd want cursor-based pagination)
-	after := reqCtx.Query("after")
-	before := reqCtx.Query("before")
-
-	var paginatedItems []responsetypes.InputItem
-	var hasMore bool
-
-	if after != "" {
-		// Find items after the specified ID
-		found := false
-		for _, item := range inputItems {
-			if found {
-				paginatedItems = append(paginatedItems, item)
-				if len(paginatedItems) >= limit {
-					break
-				}
-			}
-			if item.ID == after {
-				found = true
-			}
-		}
-	} else if before != "" {
-		// Find items before the specified ID
-		for _, item := range inputItems {
-			if item.ID == before {
-				break
-			}
-			paginatedItems = append(paginatedItems, item)
-			if len(paginatedItems) >= limit {
-				break
-			}
-		}
-	} else {
-		// No pagination, return first N items
-		if len(inputItems) > limit {
-			paginatedItems = inputItems[:limit]
-			hasMore = true
-		} else {
-			paginatedItems = inputItems
-		}
-	}
-
-	// Set pagination metadata
-	var firstID, lastID *string
-	if len(paginatedItems) > 0 {
-		firstID = &paginatedItems[0].ID
-		lastID = &paginatedItems[len(paginatedItems)-1].ID
-	}
-
-	status := responsetypes.ResponseCodeOk
-	objectType := responsetypes.ObjectTypeList
-
-	return responsetypes.OpenAIListResponse[responsetypes.InputItem]{
-		JanStatus: &status,
-		Object:    &objectType,
-		HasMore:   &hasMore,
-		FirstID:   firstID,
-		LastID:    lastID,
-		T:         paginatedItems,
-	}, nil
-}
-
-// sendErrorResponse sends a standardized error response
-func (h *ResponseModelService) sendErrorResponse(reqCtx *gin.Context, statusCode int, errorCode, errorMessage string) {
-	reqCtx.AbortWithStatusJSON(statusCode, responsetypes.ErrorResponse{
-		Code:  errorCode,
-		Error: errorMessage,
-	})
-}
-
-// sendSuccessResponse sends a standardized success response
-func (h *ResponseModelService) sendSuccessResponse(reqCtx *gin.Context, data any) {
-	reqCtx.JSON(http.StatusOK, data.(responsetypes.Response))
-}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_model_stream_service.go b/apps/jan-api-gateway/application/app/domain/response/response_model_stream_service.go
deleted file mode 100644
index 4545765a..00000000
--- a/apps/jan-api-gateway/application/app/domain/response/response_model_stream_service.go
+++ /dev/null
@@ -1,765 +0,0 @@
-package response
-
-import (
-	"bufio"
-	"context"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
-	responsetypes "menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-// StreamModelService handles streaming response requests
-type StreamModelService struct {
-	*ResponseModelService
-}
-
-// NewStreamModelService creates a new StreamModelService instance
-func NewStreamModelService(responseModelService *ResponseModelService) *StreamModelService {
-	return &StreamModelService{
-		ResponseModelService: responseModelService,
-	}
-}
-
-// Constants for streaming configuration
-const (
-	RequestTimeout    = 120 * time.Second
-	MinWordsPerChunk  = 6
-	DataPrefix        = "data: "
-	DoneMarker        = "[DONE]"
-	SSEEventFormat    = "event: %s\ndata: %s\n\n"
-	SSEDataFormat     = "data: %s\n\n"
-	ChannelBufferSize = 100
-	ErrorBufferSize   = 10
-)
-
-// validateRequest validates the incoming request
-func (h *StreamModelService) validateRequest(request *requesttypes.CreateResponseRequest) (bool, *common.Error) {
-	if request.Model == "" {
-		return false, common.NewErrorWithMessage("Model is required", "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
-	}
-	if request.Input == nil {
-		return false, common.NewErrorWithMessage("Input is required", "b2c3d4e5-f6g7-8901-bcde-f23456789012")
-	}
-	return true, nil
-}
-
-// checkContextCancellation checks if context was cancelled and sends error to channel
-func (h *StreamModelService) checkContextCancellation(ctx context.Context, errChan chan<- error) bool {
-	select {
-	case <-ctx.Done():
-		errChan <- ctx.Err()
-		return true
-	default:
-		return false
-	}
-}
-
-// marshalAndSendEvent marshals data and sends it to the data channel with proper error handling
-func (h *StreamModelService) marshalAndSendEvent(dataChan chan<- string, eventType string, data any) {
-	eventJSON, err := json.Marshal(data)
-	if err != nil {
-		logger.GetLogger().Errorf("Failed to marshal event: %v", err)
-		return
-	}
-	dataChan <- fmt.Sprintf(SSEEventFormat, eventType, string(eventJSON))
-}
-
-// logStreamingMetrics logs streaming completion metrics
-func (h *StreamModelService) logStreamingMetrics(responseID string, startTime time.Time, wordCount int) {
-	duration := time.Since(startTime)
-	logger.GetLogger().Infof("Streaming completed - ID: %s, Duration: %v, Words: %d",
-		responseID, duration, wordCount)
-}
-
-// createTextDeltaEvent creates a text delta event
-func (h *StreamModelService) createTextDeltaEvent(itemID string, sequenceNumber int, delta string) responsetypes.ResponseOutputTextDeltaEvent {
-	return responsetypes.ResponseOutputTextDeltaEvent{
-		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-			Type:           "response.output_text.delta",
-			SequenceNumber: sequenceNumber,
-		},
-		ItemID:       itemID,
-		OutputIndex:  0,
-		ContentIndex: 0,
-		Delta:        delta,
-		Logprobs:     []responsetypes.Logprob{},
-		Obfuscation:  fmt.Sprintf("%x", time.Now().UnixNano())[:10], // Simple obfuscation
-	}
-}
-
-// CreateStreamResponse handles the business logic for creating a streaming response
-func (h *StreamModelService) CreateStreamResponse(reqCtx *gin.Context, request *requesttypes.CreateResponseRequest, key string, conv *conversation.Conversation, responseEntity *Response, chatCompletionRequest *openai.ChatCompletionRequest) {
-	// Validate request
-	success, err := h.validateRequest(request)
-	if !success {
-		reqCtx.JSON(http.StatusBadRequest, responsetypes.ErrorResponse{
-			Code:  err.GetCode(),
-			Error: err.GetMessage(),
-		})
-		return
-	}
-
-	// Add timeout context
-	ctx, cancel := context.WithTimeout(reqCtx.Request.Context(), RequestTimeout)
-	defer cancel()
-
-	// Use ctx for long-running operations
-	reqCtx.Request = reqCtx.Request.WithContext(ctx)
-
-	// Set up streaming headers (matching completion API format)
-	reqCtx.Header("Content-Type", "text/event-stream")
-	reqCtx.Header("Cache-Control", "no-cache")
-	reqCtx.Header("Connection", "keep-alive")
-	reqCtx.Header("Access-Control-Allow-Origin", "*")
-	reqCtx.Header("Access-Control-Allow-Headers", "Cache-Control")
-
-	// Use the public ID from the response entity
-	responseID := responseEntity.PublicID
-
-	// Create conversation info
-	var conversationInfo *responsetypes.ConversationInfo
-	if conv != nil {
-		conversationInfo = &responsetypes.ConversationInfo{
-			ID: conv.PublicID,
-		}
-	}
-
-	// Convert input back to the original format for response
-	var responseInput any
-	switch v := request.Input.(type) {
-	case string:
-		responseInput = v
-	case []any:
-		responseInput = v
-	default:
-		responseInput = request.Input
-	}
-
-	// Create initial response object
-	response := responsetypes.Response{
-		ID:           responseID,
-		Object:       "response",
-		Created:      time.Now().Unix(),
-		Model:        request.Model,
-		Status:       responsetypes.ResponseStatusRunning,
-		Input:        responseInput,
-		Conversation: conversationInfo,
-		Stream:       ptr.ToBool(true),
-		Temperature:  request.Temperature,
-		TopP:         request.TopP,
-		MaxTokens:    request.MaxTokens,
-		Metadata:     request.Metadata,
-	}
-
-	// Emit response.created event
-	h.emitStreamEvent(reqCtx, "response.created", responsetypes.ResponseCreatedEvent{
-		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-			Type:           "response.created",
-			SequenceNumber: 0,
-		},
-		Response: response,
-	})
-
-	// Note: User messages are already added to conversation by the main response handler
-	// No need to add them again here to avoid duplication
-
-	// Process with Jan inference client for streaming
-	janInferenceClient := janinference.NewJanInferenceClient(reqCtx)
-	streamErr := h.processStreamingResponse(reqCtx, janInferenceClient, key, *chatCompletionRequest, responseID, conv)
-	if streamErr != nil {
-		// Check if context was cancelled (timeout)
-		if reqCtx.Request.Context().Err() == context.DeadlineExceeded {
-			h.emitStreamEvent(reqCtx, "response.error", responsetypes.ResponseErrorEvent{
-				Event:      "response.error",
-				Created:    time.Now().Unix(),
-				ResponseID: responseID,
-				Data: responsetypes.ResponseError{
-					Code: "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
-				},
-			})
-		} else if reqCtx.Request.Context().Err() == context.Canceled {
-			h.emitStreamEvent(reqCtx, "response.error", responsetypes.ResponseErrorEvent{
-				Event:      "response.error",
-				Created:    time.Now().Unix(),
-				ResponseID: responseID,
-				Data: responsetypes.ResponseError{
-					Code: "b2c3d4e5-f6g7-8901-bcde-f23456789012",
-				},
-			})
-		} else {
-			h.emitStreamEvent(reqCtx, "response.error", responsetypes.ResponseErrorEvent{
-				Event:      "response.error",
-				Created:    time.Now().Unix(),
-				ResponseID: responseID,
-				Data: responsetypes.ResponseError{
-					Code: "c3af973c-eada-4e8b-96d9-e92546588cd3",
-				},
-			})
-		}
-		return
-	}
-
-	// Emit response.completed event
-	response.Status = responsetypes.ResponseStatusCompleted
-	h.emitStreamEvent(reqCtx, "response.completed", responsetypes.ResponseCompletedEvent{
-		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-			Type:           "response.completed",
-			SequenceNumber: 9999, // High number to indicate completion
-		},
-		Response: response,
-	})
-}
-
-// emitStreamEvent emits a streaming event (matching completion API SSE format)
-func (h *StreamModelService) emitStreamEvent(reqCtx *gin.Context, eventType string, data any) {
-	// Marshal the data directly without wrapping
-	eventJSON, err := json.Marshal(data)
-	if err != nil {
-		logger.GetLogger().Errorf("Failed to marshal streaming event: %v", err)
-		return
-	}
-
-	// Use proper SSE format
-	reqCtx.Writer.Write([]byte(fmt.Sprintf(SSEEventFormat, eventType, string(eventJSON))))
-	reqCtx.Writer.Flush()
-}
-
-// processStreamingResponse processes the streaming response from Jan inference using two channels
-func (h *StreamModelService) processStreamingResponse(reqCtx *gin.Context, _ *janinference.JanInferenceClient, _ string, request openai.ChatCompletionRequest, responseID string, conv *conversation.Conversation) error {
-	// Create buffered channels for data and errors
-	dataChan := make(chan string, ChannelBufferSize)
-	errChan := make(chan error, ErrorBufferSize)
-
-	var wg sync.WaitGroup
-	wg.Add(1)
-
-	// Start streaming in a goroutine
-	go h.streamResponseToChannel(reqCtx, request, dataChan, errChan, responseID, conv, &wg)
-
-	// Wait for streaming to complete and close channels
-	go func() {
-		wg.Wait()
-		close(dataChan)
-		close(errChan)
-	}()
-
-	// Process data and errors from channels
-	for {
-		select {
-		case line, ok := <-dataChan:
-			if !ok {
-				return nil
-			}
-			_, err := reqCtx.Writer.Write([]byte(line))
-			if err != nil {
-				reqCtx.AbortWithStatusJSON(
-					http.StatusBadRequest,
-					responsetypes.ErrorResponse{
-						Code: "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4",
-					})
-				return err
-			}
-			reqCtx.Writer.Flush()
-		case err := <-errChan:
-			if err != nil {
-				reqCtx.AbortWithStatusJSON(
-					http.StatusBadRequest,
-					responsetypes.ErrorResponse{
-						Code: "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4",
-					})
-				return err
-			}
-		}
-	}
-}
-
-// OpenAIStreamData represents the structure of OpenAI streaming data
-type OpenAIStreamData struct {
-	Choices []struct {
-		Delta struct {
-			Content          string `json:"content"`
-			ReasoningContent string `json:"reasoning_content"`
-		} `json:"delta"`
-	} `json:"choices"`
-}
-
-// parseOpenAIStreamData parses OpenAI streaming data and extracts content
-func (h *StreamModelService) parseOpenAIStreamData(jsonStr string) string {
-	var data OpenAIStreamData
-	if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
-		return ""
-	}
-
-	// Check if choices array is empty to prevent panic
-	if len(data.Choices) == 0 {
-		return ""
-	}
-
-	// Use reasoning_content if content is empty (jan-v1-4b model format)
-	content := data.Choices[0].Delta.Content
-	if content == "" {
-		content = data.Choices[0].Delta.ReasoningContent
-	}
-
-	return content
-}
-
-// extractContentFromOpenAIStream extracts content from OpenAI streaming format
-func (h *StreamModelService) extractContentFromOpenAIStream(chunk string) string {
-	// Format 1: data: {"choices":[{"delta":{"content":"chunk"}}]}
-	if len(chunk) >= 6 && chunk[:6] == DataPrefix {
-		return h.parseOpenAIStreamData(chunk[6:])
-	}
-
-	// Format 2: Direct JSON without "data: " prefix
-	if content := h.parseOpenAIStreamData(chunk); content != "" {
-		return content
-	}
-
-	// Format 3: Simple content string (fallback)
-	if len(chunk) > 0 && chunk[0] == '"' && chunk[len(chunk)-1] == '"' {
-		var content string
-		if err := json.Unmarshal([]byte(chunk), &content); err == nil {
-			return content
-		}
-	}
-
-	return ""
-}
-
-// extractReasoningContentFromOpenAIStream extracts reasoning content from OpenAI streaming format
-func (h *StreamModelService) extractReasoningContentFromOpenAIStream(chunk string) string {
-	// Format 1: data: {"choices":[{"delta":{"reasoning_content":"chunk"}}]}
-	if len(chunk) >= 6 && chunk[:6] == DataPrefix {
-		return h.parseOpenAIStreamReasoningData(chunk[6:])
-	}
-
-	// Format 2: Direct JSON without "data: " prefix
-	if reasoningContent := h.parseOpenAIStreamReasoningData(chunk); reasoningContent != "" {
-		return reasoningContent
-	}
-
-	return ""
-}
-
-// parseOpenAIStreamReasoningData parses OpenAI streaming data and extracts reasoning content
-func (h *StreamModelService) parseOpenAIStreamReasoningData(jsonStr string) string {
-	var data OpenAIStreamData
-	if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
-		return ""
-	}
-
-	// Check if choices array is empty to prevent panic
-	if len(data.Choices) == 0 {
-		return ""
-	}
-
-	// Extract reasoning content
-	return data.Choices[0].Delta.ReasoningContent
-}
-
-// streamResponseToChannel handles the streaming response and sends data/errors to channels
-func (h *StreamModelService) streamResponseToChannel(reqCtx *gin.Context, request openai.ChatCompletionRequest, dataChan chan<- string, errChan chan<- error, responseID string, conv *conversation.Conversation, wg *sync.WaitGroup) {
-	defer wg.Done()
-
-	startTime := time.Now()
-
-	// Generate item ID for the message
-	itemID, _ := idgen.GenerateSecureID("msg", 42)
-	sequenceNumber := 1
-
-	// Emit response.in_progress event
-	inProgressEvent := responsetypes.ResponseInProgressEvent{
-		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-			Type:           "response.in_progress",
-			SequenceNumber: sequenceNumber,
-		},
-		Response: map[string]any{
-			"id":     responseID,
-			"status": "in_progress",
-		},
-	}
-	eventJSON, _ := json.Marshal(inProgressEvent)
-	dataChan <- fmt.Sprintf("event: response.in_progress\ndata: %s\n\n", string(eventJSON))
-	sequenceNumber++
-
-	// Emit response.output_item.added event
-	outputItemAddedEvent := responsetypes.ResponseOutputItemAddedEvent{
-		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-			Type:           "response.output_item.added",
-			SequenceNumber: sequenceNumber,
-		},
-		OutputIndex: 0,
-		Item: responsetypes.ResponseOutputItem{
-			ID:      itemID,
-			Type:    "message",
-			Status:  string(conversation.ItemStatusInProgress),
-			Content: []responsetypes.ResponseContentPart{},
-			Role:    "assistant",
-		},
-	}
-	eventJSON, _ = json.Marshal(outputItemAddedEvent)
-	dataChan <- fmt.Sprintf("event: response.output_item.added\ndata: %s\n\n", string(eventJSON))
-	sequenceNumber++
-
-	// Emit response.content_part.added event
-	contentPartAddedEvent := responsetypes.ResponseContentPartAddedEvent{
-		BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-			Type:           "response.content_part.added",
-			SequenceNumber: sequenceNumber,
-		},
-		ItemID:       itemID,
-		OutputIndex:  0,
-		ContentIndex: 0,
-		Part: responsetypes.ResponseContentPart{
-			Type:        "output_text",
-			Annotations: []responsetypes.Annotation{},
-			Logprobs:    []responsetypes.Logprob{},
-			Text:        "",
-		},
-	}
-	eventJSON, _ = json.Marshal(contentPartAddedEvent)
-	dataChan <- fmt.Sprintf("event: response.content_part.added\ndata: %s\n\n", string(eventJSON))
-	sequenceNumber++
-
-	// Create a custom streaming client that processes OpenAI streaming format
-	req := janinference.JanInferenceRestyClient.R().SetBody(request)
-	resp, err := req.
-		SetContext(reqCtx.Request.Context()).
-		SetDoNotParseResponse(true).
-		Post("/v1/chat/completions")
-	if err != nil {
-		errChan <- err
-		return
-	}
-	defer resp.RawResponse.Body.Close()
-
-	// Buffer for accumulating content chunks
-	var contentBuffer strings.Builder
-	var fullResponse strings.Builder
-
-	// Buffer for accumulating reasoning content chunks
-	var reasoningBuffer strings.Builder
-	var fullReasoningResponse strings.Builder
-	var reasoningItemID string
-	var reasoningSequenceNumber int
-	var hasReasoningContent bool
-	var reasoningComplete bool
-
-	// Process the stream line by line
-	scanner := bufio.NewScanner(resp.RawResponse.Body)
-	for scanner.Scan() {
-		// Check if context was cancelled
-		if h.checkContextCancellation(reqCtx, errChan) {
-			return
-		}
-
-		line := scanner.Text()
-		if strings.HasPrefix(line, DataPrefix) {
-			data := strings.TrimPrefix(line, DataPrefix)
-			if data == DoneMarker {
-				break
-			}
-
-			// Extract content from OpenAI streaming format
-			content := h.extractContentFromOpenAIStream(data)
-
-			// Handle content - buffer until reasoning is complete
-			if content != "" {
-				contentBuffer.WriteString(content)
-				fullResponse.WriteString(content)
-
-				// Only send content if reasoning is complete or there's no reasoning content
-				if reasoningComplete || !hasReasoningContent {
-					// Check if we have enough words to send
-					bufferedContent := contentBuffer.String()
-					words := strings.Fields(bufferedContent)
-
-					if len(words) >= MinWordsPerChunk {
-						// Create delta event using helper method
-						deltaEvent := h.createTextDeltaEvent(itemID, sequenceNumber, bufferedContent)
-						h.marshalAndSendEvent(dataChan, "response.output_text.delta", deltaEvent)
-						sequenceNumber++
-						// Clear the buffer
-						contentBuffer.Reset()
-					}
-				}
-			}
-
-			// Handle reasoning content separately
-			reasoningContent := h.extractReasoningContentFromOpenAIStream(data)
-			if reasoningContent != "" {
-				// Initialize reasoning item if not already done
-				if !hasReasoningContent {
-					reasoningItemID = fmt.Sprintf("rs_%d", time.Now().UnixNano())
-					reasoningSequenceNumber = sequenceNumber
-					hasReasoningContent = true
-
-					// Emit response.output_item.added event for reasoning
-					reasoningItemAddedEvent := responsetypes.ResponseOutputItemAddedEvent{
-						BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-							Type:           "response.output_item.added",
-							SequenceNumber: reasoningSequenceNumber,
-						},
-						OutputIndex: 0,
-						Item: responsetypes.ResponseOutputItem{
-							ID:      reasoningItemID,
-							Type:    "reasoning",
-							Status:  string(conversation.ItemStatusInProgress),
-							Content: []responsetypes.ResponseContentPart{},
-							Role:    "assistant",
-						},
-					}
-					eventJSON, _ := json.Marshal(reasoningItemAddedEvent)
-					dataChan <- fmt.Sprintf("event: response.output_item.added\ndata: %s\n\n", string(eventJSON))
-					reasoningSequenceNumber++
-
-					// Emit response.reasoning_summary_part.added event
-					reasoningSummaryPartAddedEvent := responsetypes.ResponseReasoningSummaryPartAddedEvent{
-						BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-							Type:           "response.reasoning_summary_part.added",
-							SequenceNumber: reasoningSequenceNumber,
-						},
-						ItemID:       reasoningItemID,
-						OutputIndex:  0,
-						SummaryIndex: 0,
-						Part: struct {
-							Type string `json:"type"`
-							Text string `json:"text"`
-						}{
-							Type: "summary_text",
-							Text: "",
-						},
-					}
-					eventJSON, _ = json.Marshal(reasoningSummaryPartAddedEvent)
-					dataChan <- fmt.Sprintf("event: response.reasoning_summary_part.added\ndata: %s\n\n", string(eventJSON))
-					reasoningSequenceNumber++
-				}
-
-				reasoningBuffer.WriteString(reasoningContent)
-				fullReasoningResponse.WriteString(reasoningContent)
-
-				// Check if we have enough words to send reasoning content
-				bufferedReasoningContent := reasoningBuffer.String()
-				reasoningWords := strings.Fields(bufferedReasoningContent)
-
-				if len(reasoningWords) >= MinWordsPerChunk {
-					// Emit reasoning summary text delta event
-					reasoningSummaryTextDeltaEvent := responsetypes.ResponseReasoningSummaryTextDeltaEvent{
-						BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-							Type:           "response.reasoning_summary_text.delta",
-							SequenceNumber: reasoningSequenceNumber,
-						},
-						ItemID:       reasoningItemID,
-						OutputIndex:  0,
-						SummaryIndex: 0,
-						Delta:        bufferedReasoningContent,
-						Obfuscation:  fmt.Sprintf("%x", time.Now().UnixNano())[:10], // Simple obfuscation
-					}
-					eventJSON, _ := json.Marshal(reasoningSummaryTextDeltaEvent)
-					dataChan <- fmt.Sprintf("event: response.reasoning_summary_text.delta\ndata: %s\n\n", string(eventJSON))
-					reasoningSequenceNumber++
-					// Clear the reasoning buffer
-					reasoningBuffer.Reset()
-				}
-			}
-
-		}
-	}
-
-	// Send any remaining buffered reasoning content
-	if hasReasoningContent && reasoningBuffer.Len() > 0 {
-		reasoningSummaryTextDeltaEvent := responsetypes.ResponseReasoningSummaryTextDeltaEvent{
-			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-				Type:           "response.reasoning_summary_text.delta",
-				SequenceNumber: reasoningSequenceNumber,
-			},
-			ItemID:       reasoningItemID,
-			OutputIndex:  0,
-			SummaryIndex: 0,
-			Delta:        reasoningBuffer.String(),
-			Obfuscation:  fmt.Sprintf("%x", time.Now().UnixNano())[:10], // Simple obfuscation
-		}
-		eventJSON, _ := json.Marshal(reasoningSummaryTextDeltaEvent)
-		dataChan <- fmt.Sprintf("event: response.reasoning_summary_text.delta\ndata: %s\n\n", string(eventJSON))
-		reasoningSequenceNumber++
-	}
-
-	// Handle reasoning completion events
-	if hasReasoningContent && fullReasoningResponse.Len() > 0 {
-		// Emit reasoning summary text done event
-		reasoningSummaryTextDoneEvent := responsetypes.ResponseReasoningSummaryTextDoneEvent{
-			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-				Type:           "response.reasoning_summary_text.done",
-				SequenceNumber: reasoningSequenceNumber,
-			},
-			ItemID:       reasoningItemID,
-			OutputIndex:  0,
-			SummaryIndex: 0,
-			Text:         fullReasoningResponse.String(),
-		}
-		eventJSON, _ := json.Marshal(reasoningSummaryTextDoneEvent)
-		dataChan <- fmt.Sprintf("event: response.reasoning_summary_text.done\ndata: %s\n\n", string(eventJSON))
-		reasoningSequenceNumber++
-
-		// Emit reasoning summary part done event
-		reasoningSummaryPartDoneEvent := responsetypes.ResponseReasoningSummaryPartDoneEvent{
-			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-				Type:           "response.reasoning_summary_part.done",
-				SequenceNumber: reasoningSequenceNumber,
-			},
-			ItemID:       reasoningItemID,
-			OutputIndex:  0,
-			SummaryIndex: 0,
-			Part: struct {
-				Type string `json:"type"`
-				Text string `json:"text"`
-			}{
-				Type: "summary_text",
-				Text: fullReasoningResponse.String(),
-			},
-		}
-		eventJSON, _ = json.Marshal(reasoningSummaryPartDoneEvent)
-		dataChan <- fmt.Sprintf("event: response.reasoning_summary_part.done\ndata: %s\n\n", string(eventJSON))
-		reasoningSequenceNumber++
-
-		// Mark reasoning as complete
-		reasoningComplete = true
-	}
-
-	// Send any remaining buffered content (only once, after reasoning is complete or if there's no reasoning content)
-	if (reasoningComplete || !hasReasoningContent) && contentBuffer.Len() > 0 {
-		deltaEvent := h.createTextDeltaEvent(itemID, sequenceNumber, contentBuffer.String())
-		h.marshalAndSendEvent(dataChan, "response.output_text.delta", deltaEvent)
-		sequenceNumber++
-		contentBuffer.Reset()
-	}
-
-	// Append assistant's complete response to conversation
-	if fullResponse.Len() > 0 && conv != nil {
-		assistantMessage := openai.ChatCompletionMessage{
-			Role:    openai.ChatMessageRoleAssistant,
-			Content: fullResponse.String(),
-		}
-		// Get response entity to get the internal ID
-		responseEntity, err := h.responseService.GetResponseByPublicID(reqCtx, responseID)
-		if err == nil && responseEntity != nil {
-			success, err := h.responseService.AppendMessagesToConversation(reqCtx, conv, []openai.ChatCompletionMessage{assistantMessage}, &responseEntity.ID)
-			if !success {
-				// Log error but don't fail the response
-				logger.GetLogger().Errorf("Failed to append assistant response to conversation: %s - %s", err.GetCode(), err.Error())
-			}
-		}
-	}
-
-	// Emit text done event
-	if fullResponse.Len() > 0 {
-		doneEvent := responsetypes.ResponseOutputTextDoneEvent{
-			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-				Type:           "response.output_text.done",
-				SequenceNumber: sequenceNumber,
-			},
-			ItemID:       itemID,
-			OutputIndex:  0,
-			ContentIndex: 0,
-			Text:         fullResponse.String(),
-			Logprobs:     []responsetypes.Logprob{},
-		}
-		eventJSON, _ := json.Marshal(doneEvent)
-		dataChan <- fmt.Sprintf("event: response.output_text.done\ndata: %s\n\n", string(eventJSON))
-		sequenceNumber++
-
-		// Emit response.content_part.done event
-		contentPartDoneEvent := responsetypes.ResponseContentPartDoneEvent{
-			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-				Type:           "response.content_part.done",
-				SequenceNumber: sequenceNumber,
-			},
-			ItemID:       itemID,
-			OutputIndex:  0,
-			ContentIndex: 0,
-			Part: responsetypes.ResponseContentPart{
-				Type:        "output_text",
-				Annotations: []responsetypes.Annotation{},
-				Logprobs:    []responsetypes.Logprob{},
-				Text:        fullResponse.String(),
-			},
-		}
-		eventJSON, _ = json.Marshal(contentPartDoneEvent)
-		dataChan <- fmt.Sprintf("event: response.content_part.done\ndata: %s\n\n", string(eventJSON))
-		sequenceNumber++
-
-		// Emit response.output_item.done event
-		outputItemDoneEvent := responsetypes.ResponseOutputItemDoneEvent{
-			BaseStreamingEvent: responsetypes.BaseStreamingEvent{
-				Type:           "response.output_item.done",
-				SequenceNumber: sequenceNumber,
-			},
-			OutputIndex: 0,
-			Item: responsetypes.ResponseOutputItem{
-				ID:     itemID,
-				Type:   "message",
-				Status: string(conversation.ItemStatusCompleted),
-				Content: []responsetypes.ResponseContentPart{
-					{
-						Type:        "output_text",
-						Annotations: []responsetypes.Annotation{},
-						Logprobs:    []responsetypes.Logprob{},
-						Text:        fullResponse.String(),
-					},
-				},
-				Role: "assistant",
-			},
-		}
-		eventJSON, _ = json.Marshal(outputItemDoneEvent)
-		dataChan <- fmt.Sprintf("event: response.output_item.done\ndata: %s\n\n", string(eventJSON))
-		sequenceNumber++
-	}
-
-	// Send [DONE] to close the stream
-	dataChan <- fmt.Sprintf(SSEDataFormat, DoneMarker)
-
-	// Update response status to completed and save output
-	// Get response entity by public ID to update status
-	responseEntity, getErr := h.responseService.GetResponseByPublicID(reqCtx, responseID)
-	if getErr == nil && responseEntity != nil {
-		// Prepare output data
-		outputData := map[string]any{
-			"type": "text",
-			"text": map[string]any{
-				"value": fullResponse.String(),
-			},
-		}
-
-		// Update response with all fields at once (optimized to prevent N+1 queries)
-		updates := &ResponseUpdates{
-			Status: ptr.ToString(string(ResponseStatusCompleted)),
-			Output: outputData,
-		}
-		success, updateErr := h.responseService.UpdateResponseFields(reqCtx, responseEntity.ID, updates)
-		if !success {
-			// Log error but don't fail the request since streaming is already complete
-			fmt.Printf("Failed to update response fields: %s - %s\n", updateErr.GetCode(), updateErr.Error())
-		}
-	} else {
-		fmt.Printf("Failed to get response entity for status update: %s - %s\n", getErr.GetCode(), getErr.Error())
-	}
-
-	// Log streaming metrics
-	wordCount := len(strings.Fields(fullResponse.String()))
-	h.logStreamingMetrics(responseID, startTime, wordCount)
-}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_service.go b/apps/jan-api-gateway/application/app/domain/response/response_service.go
deleted file mode 100644
index 0f6e40f9..00000000
--- a/apps/jan-api-gateway/application/app/domain/response/response_service.go
+++ /dev/null
@@ -1,729 +0,0 @@
-package response
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"net/http"
-
-	"github.com/gin-gonic/gin"
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
-	responsetypes "menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-// ResponseService handles business logic for responses
-type ResponseService struct {
-	responseRepo        ResponseRepository
-	itemRepo            conversation.ItemRepository
-	conversationService *conversation.ConversationService
-}
-
-// ResponseContextKey represents context keys for responses
-type ResponseContextKey string
-
-const (
-	ResponseContextKeyPublicID ResponseContextKey = "response_id"
-	ResponseContextEntity      ResponseContextKey = "ResponseContextEntity"
-
-	// ClientCreatedRootConversationID is the special conversation ID that indicates a new conversation should be created
-	ClientCreatedRootConversationID = "client-created-root"
-)
-
-// NewResponseService creates a new response service
-func NewResponseService(responseRepo ResponseRepository, itemRepo conversation.ItemRepository, conversationService *conversation.ConversationService) *ResponseService {
-	return &ResponseService{
-		responseRepo:        responseRepo,
-		itemRepo:            itemRepo,
-		conversationService: conversationService,
-	}
-}
-
-// CreateResponse creates a new response using a Response domain object
-func (s *ResponseService) CreateResponse(ctx context.Context, response *Response) (*Response, *common.Error) {
-	return s.CreateResponseWithPrevious(ctx, response, nil)
-}
-
-// CreateResponseWithPrevious creates a new response, optionally linking to a previous response
-func (s *ResponseService) CreateResponseWithPrevious(ctx context.Context, response *Response, previousResponseID *string) (*Response, *common.Error) {
-	// Handle previous_response_id logic
-	if previousResponseID != nil {
-		// Load the previous response
-		previousResponse, err := s.responseRepo.FindByPublicID(ctx, *previousResponseID)
-		if err != nil {
-			return nil, common.NewError(err, "b2c3d4e5-f6g7-8901-bcde-f23456789012")
-		}
-		if previousResponse == nil {
-			return nil, common.NewErrorWithMessage("Previous response not found", "c3d4e5f6-g7h8-9012-cdef-345678901234")
-		}
-
-		// Validate that the previous response belongs to the same user
-		if previousResponse.UserID != response.UserID {
-			return nil, common.NewErrorWithMessage("Previous response does not belong to the current user", "d4e5f6g7-h8i9-0123-defg-456789012345")
-		}
-
-		// Use the previous response's conversation ID
-		response.ConversationID = previousResponse.ConversationID
-		if response.ConversationID == nil {
-			return nil, common.NewErrorWithMessage("Previous response does not belong to any conversation", "e5f6g7h8-i9j0-1234-efgh-567890123456")
-		}
-	}
-
-	// Set the previous response ID
-	response.PreviousResponseID = previousResponseID
-
-	// Generate public ID if not already set
-	if response.PublicID == "" {
-		publicID, err := idgen.GenerateSecureID("resp", 42)
-		if err != nil {
-			return nil, common.NewError(err, "f6g7h8i9-j0k1-2345-fghi-678901234567")
-		}
-		response.PublicID = publicID
-	}
-
-	// Set default values
-	if response.Status == "" {
-		response.Status = ResponseStatusPending
-	}
-
-	// Validate required fields
-	if response.UserID == 0 {
-		return nil, common.NewErrorWithMessage("UserID is required", "m3n4o5p6-q7r8-9012-mnop-345678901234")
-	}
-	if response.Model == "" {
-		return nil, common.NewErrorWithMessage("Model is required", "n4o5p6q7-r8s9-0123-nopq-456789012345")
-	}
-	if response.Input == "" {
-		return nil, common.NewErrorWithMessage("Input is required", "o5p6q7r8-s9t0-1234-opqr-567890123456")
-	}
-
-	if err := s.responseRepo.Create(ctx, response); err != nil {
-		return nil, common.NewError(err, "m3n4o5p6-q7r8-9012-mnop-345678901234")
-	}
-
-	return response, nil
-}
-
-// UpdateResponseStatus updates the status of a response
-func (s *ResponseService) UpdateResponseStatus(ctx context.Context, responseID uint, status ResponseStatus) (bool, *common.Error) {
-	response, err := s.responseRepo.FindByID(ctx, responseID)
-	if err != nil {
-		return false, common.NewError(err, "n4o5p6q7-r8s9-0123-nopq-456789012345")
-	}
-	if response == nil {
-		return false, common.NewErrorWithMessage("Response not found", "o5p6q7r8-s9t0-1234-opqr-567890123456")
-	}
-
-	// Update the response object
-	UpdateResponseStatusOnObject(response, status)
-
-	// Save to database
-	if err := s.responseRepo.Update(ctx, response); err != nil {
-		return false, common.NewError(err, "p6q7r8s9-t0u1-2345-pqrs-678901234567")
-	}
-
-	return true, nil
-}
-
-// UpdateResponseOutput updates the output of a response
-func (s *ResponseService) UpdateResponseOutput(ctx context.Context, responseID uint, output any) (bool, *common.Error) {
-	response, err := s.responseRepo.FindByID(ctx, responseID)
-	if err != nil {
-		return false, common.NewError(err, "q7r8s9t0-u1v2-3456-qrst-789012345678")
-	}
-	if response == nil {
-		return false, common.NewErrorWithMessage("Response not found", "r8s9t0u1-v2w3-4567-rstu-890123456789")
-	}
-
-	// Update the response object
-	if err := UpdateResponseOutputOnObject(response, output); err != nil {
-		return false, err
-	}
-
-	// Save to database
-	if err := s.responseRepo.Update(ctx, response); err != nil {
-		return false, common.NewError(err, "t0u1v2w3-x4y5-6789-tuvw-012345678901")
-	}
-
-	return true, nil
-}
-
-// UpdateResponseUsage updates the usage statistics of a response
-func (s *ResponseService) UpdateResponseUsage(ctx context.Context, responseID uint, usage any) (bool, *common.Error) {
-	response, err := s.responseRepo.FindByID(ctx, responseID)
-	if err != nil {
-		return false, common.NewError(err, "u1v2w3x4-y5z6-7890-uvwx-123456789012")
-	}
-	if response == nil {
-		return false, common.NewErrorWithMessage("Response not found", "v2w3x4y5-z6a7-8901-vwxy-234567890123")
-	}
-
-	// Update the response object
-	if err := UpdateResponseUsageOnObject(response, usage); err != nil {
-		return false, err
-	}
-
-	// Save to database
-	if err := s.responseRepo.Update(ctx, response); err != nil {
-		return false, common.NewError(err, "x4y5z6a7-b8c9-0123-xyza-456789012345")
-	}
-
-	return true, nil
-}
-
-// UpdateResponseError updates the error information of a response
-func (s *ResponseService) UpdateResponseError(ctx context.Context, responseID uint, error any) (bool, *common.Error) {
-	response, err := s.responseRepo.FindByID(ctx, responseID)
-	if err != nil {
-		return false, common.NewError(err, "y5z6a7b8-c9d0-1234-yzab-567890123456")
-	}
-	if response == nil {
-		return false, common.NewErrorWithMessage("Response not found", "z6a7b8c9-d0e1-2345-zabc-678901234567")
-	}
-
-	// Update the response object
-	if err := UpdateResponseErrorOnObject(response, error); err != nil {
-		return false, err
-	}
-
-	// Save to database
-	if err := s.responseRepo.Update(ctx, response); err != nil {
-		return false, common.NewError(err, "b8c9d0e1-f2g3-4567-bcde-890123456789")
-	}
-
-	return true, nil
-}
-
-// UpdateResponseFields updates multiple fields on a response object and saves it once (optimized for N+1 prevention)
-func (s *ResponseService) UpdateResponseFields(ctx context.Context, responseID uint, updates *ResponseUpdates) (bool, *common.Error) {
-	response, err := s.responseRepo.FindByID(ctx, responseID)
-	if err != nil {
-		return false, common.NewError(err, "c9d0e1f2-g3h4-5678-cdef-901234567890")
-	}
-	if response == nil {
-		return false, common.NewErrorWithMessage("Response not found", "d0e1f2g3-h4i5-6789-defg-012345678901")
-	}
-
-	// Apply all updates to the response object
-	if err := ApplyResponseUpdates(response, updates); err != nil {
-		return false, err
-	}
-
-	// Save to database once
-	if err := s.responseRepo.Update(ctx, response); err != nil {
-		return false, common.NewError(err, "e1f2g3h4-i5j6-7890-efgh-123456789012")
-	}
-
-	return true, nil
-}
-
-// GetResponseByPublicID gets a response by public ID
-func (s *ResponseService) GetResponseByPublicID(ctx context.Context, publicID string) (*Response, *common.Error) {
-	response, err := s.responseRepo.FindByPublicID(ctx, publicID)
-	if err != nil {
-		return nil, common.NewError(err, "c9d0e1f2-g3h4-5678-cdef-901234567890")
-	}
-	return response, nil
-}
-
-// GetResponsesByUserID gets responses for a specific user
-func (s *ResponseService) GetResponsesByUserID(ctx context.Context, userID uint, pagination *query.Pagination) ([]*Response, *common.Error) {
-	responses, err := s.responseRepo.FindByUserID(ctx, userID, pagination)
-	if err != nil {
-		return nil, common.NewError(err, "d0e1f2g3-h4i5-6789-defg-012345678901")
-	}
-	return responses, nil
-}
-
-// GetResponsesByConversationID gets responses for a specific conversation
-func (s *ResponseService) GetResponsesByConversationID(ctx context.Context, conversationID uint, pagination *query.Pagination) ([]*Response, *common.Error) {
-	responses, err := s.responseRepo.FindByConversationID(ctx, conversationID, pagination)
-	if err != nil {
-		return nil, common.NewError(err, "e1f2g3h4-i5j6-7890-efgh-123456789012")
-	}
-	return responses, nil
-}
-
-// DeleteResponse deletes a response
-func (s *ResponseService) DeleteResponse(ctx context.Context, responseID uint) (bool, *common.Error) {
-	if err := s.responseRepo.DeleteByID(ctx, responseID); err != nil {
-		return false, common.NewError(err, "f2g3h4i5-j6k7-8901-fghi-234567890123")
-	}
-	return true, nil
-}
-
-// CreateItemsForResponse creates items for a specific response
-func (s *ResponseService) CreateItemsForResponse(ctx context.Context, responseID uint, conversationID uint, items []*conversation.Item) ([]*conversation.Item, *common.Error) {
-	response, err := s.responseRepo.FindByID(ctx, responseID)
-	if err != nil {
-		return nil, common.NewError(err, "g3h4i5j6-k7l8-9012-ghij-345678901234")
-	}
-	if response == nil {
-		return nil, common.NewErrorWithMessage("Response not found", "h4i5j6k7-l8m9-0123-hijk-456789012345")
-	}
-
-	// Validate that the response belongs to the specified conversation
-	if response.ConversationID == nil || *response.ConversationID != conversationID {
-		return nil, common.NewErrorWithMessage("Response does not belong to the specified conversation", "i5j6k7l8-m9n0-1234-ijkl-567890123456")
-	}
-
-	var createdItems []*conversation.Item
-	for _, itemData := range items {
-		// Generate public ID for the item
-		publicID, err := idgen.GenerateSecureID("msg", 42)
-		if err != nil {
-			return nil, common.NewError(err, "j6k7l8m9-n0o1-2345-jklm-678901234567")
-		}
-
-		item := conversation.NewItem(
-			publicID,
-			itemData.Type,
-			*itemData.Role,
-			itemData.Content,
-			conversationID,
-			&responseID,
-		)
-
-		if err := s.itemRepo.Create(ctx, item); err != nil {
-			return nil, common.NewError(err, "k7l8m9n0-o1p2-3456-klmn-789012345678")
-		}
-
-		createdItems = append(createdItems, item)
-	}
-
-	return createdItems, nil
-}
-
-// GetItemsForResponse gets items that belong to a specific response, optionally filtered by role
-func (s *ResponseService) GetItemsForResponse(ctx context.Context, responseID uint, itemRole *conversation.ItemRole) ([]*conversation.Item, *common.Error) {
-	response, err := s.responseRepo.FindByID(ctx, responseID)
-	if err != nil {
-		return nil, common.NewError(err, "l8m9n0o1-p2q3-4567-lmno-890123456789")
-	}
-	if response == nil {
-		return nil, common.NewErrorWithMessage("Response not found", "m9n0o1p2-q3r4-5678-mnop-901234567890")
-	}
-
-	// Create filter for database query
-	filter := conversation.ItemFilter{
-		ConversationID: response.ConversationID,
-		ResponseID:     &responseID,
-		Role:           itemRole,
-	}
-
-	// Get items using database filter (more efficient than in-memory filtering)
-	items, err := s.itemRepo.FindByFilter(ctx, filter, nil)
-	if err != nil {
-		return nil, common.NewError(err, "n0o1p2q3-r4s5-6789-nopq-012345678901")
-	}
-
-	return items, nil
-}
-
-// CreateResponseFromRequest creates a response from an API request structure
-func (s *ResponseService) CreateResponseFromRequest(ctx context.Context, userID uint, req *ResponseRequest) (*Response, *common.Error) {
-	// Convert input to JSON string
-	inputJSON, jsonErr := json.Marshal(req.Input)
-	if jsonErr != nil {
-		return nil, common.NewError(jsonErr, "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
-	}
-
-	// Build Response object from request
-	response := &Response{
-		UserID:             userID,
-		ConversationID:     nil, // Will be set by CreateResponseWithPrevious if previousResponseID is provided
-		PreviousResponseID: req.PreviousResponseID,
-		Model:              req.Model,
-		Input:              string(inputJSON),
-		SystemPrompt:       nil,
-		Status:             ResponseStatusPending,
-		Stream:             req.Stream,
-	}
-
-	// Create the response with previous_response_id handling
-	return s.CreateResponseWithPrevious(ctx, response, req.PreviousResponseID)
-}
-
-// ResponseRequest represents the API request structure for creating a response
-type ResponseRequest struct {
-	Model              string  `json:"model"`
-	PreviousResponseID *string `json:"previous_response_id,omitempty"`
-	Input              any     `json:"input"`
-	Stream             *bool   `json:"stream,omitempty"`
-}
-
-// GetResponseMiddleWare creates middleware to load response by public ID and set it in context
-func (s *ResponseService) GetResponseMiddleWare() gin.HandlerFunc {
-	return func(reqCtx *gin.Context) {
-		ctx := reqCtx.Request.Context()
-		publicID := reqCtx.Param(string(ResponseContextKeyPublicID))
-		if publicID == "" {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responsetypes.ErrorResponse{
-				Code:  "r8s9t0u1-v2w3-4567-rstu-890123456789",
-				Error: "missing response public ID",
-			})
-			return
-		}
-		user, ok := auth.GetUserFromContext(reqCtx)
-		if !ok {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responsetypes.ErrorResponse{
-				Code: "s9t0u1v2-w3x4-5678-stuv-901234567890",
-			})
-			return
-		}
-		entities, err := s.responseRepo.FindByFilter(ctx, ResponseFilter{
-			PublicID: &publicID,
-			UserID:   &user.ID,
-		}, nil)
-
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responsetypes.ErrorResponse{
-				Code:  "t0u1v2w3-x4y5-6789-tuvw-012345678901",
-				Error: err.Error(),
-			})
-			return
-		}
-
-		if len(entities) == 0 {
-			reqCtx.AbortWithStatusJSON(http.StatusNotFound, responsetypes.ErrorResponse{
-				Code: "u1v2w3x4-y5z6-7890-uvwx-123456789012",
-			})
-			return
-		}
-
-		SetResponseFromContext(reqCtx, entities[0])
-		reqCtx.Next()
-	}
-}
-
-// SetResponseFromContext sets a response in the gin context
-func SetResponseFromContext(reqCtx *gin.Context, resp *Response) {
-	reqCtx.Set(string(ResponseContextEntity), resp)
-}
-
-// GetResponseFromContext gets a response from the gin context
-func GetResponseFromContext(reqCtx *gin.Context) (*Response, bool) {
-	resp, ok := reqCtx.Get(string(ResponseContextEntity))
-	if !ok {
-		return nil, false
-	}
-	response, ok := resp.(*Response)
-	return response, ok
-}
-
-// ProcessResponseRequest processes a response request and returns the appropriate handler
-func (s *ResponseService) ProcessResponseRequest(ctx context.Context, userID uint, req *ResponseRequest) (*Response, *common.Error) {
-	// Create response from request
-	responseEntity, err := s.CreateResponseFromRequest(ctx, userID, req)
-	if err != nil {
-		return nil, err
-	}
-
-	return responseEntity, nil
-}
-
-// ConvertDomainResponseToAPIResponse converts a domain response to API response format
-func (s *ResponseService) ConvertDomainResponseToAPIResponse(responseEntity *Response) responsetypes.Response {
-	apiResponse := responsetypes.Response{
-		ID:      responseEntity.PublicID,
-		Object:  "response",
-		Created: responseEntity.CreatedAt.Unix(),
-		Model:   responseEntity.Model,
-		Status:  responsetypes.ResponseStatus(responseEntity.Status),
-		Input:   responseEntity.Input,
-	}
-
-	// Add conversation if exists
-	if responseEntity.ConversationID != nil {
-		apiResponse.Conversation = &responsetypes.ConversationInfo{
-			ID: fmt.Sprintf("conv_%d", *responseEntity.ConversationID),
-		}
-	}
-
-	// Add timestamps
-	if responseEntity.CompletedAt != nil {
-		apiResponse.CompletedAt = ptr.ToInt64(responseEntity.CompletedAt.Unix())
-	}
-	if responseEntity.CancelledAt != nil {
-		apiResponse.CancelledAt = ptr.ToInt64(responseEntity.CancelledAt.Unix())
-	}
-	if responseEntity.FailedAt != nil {
-		apiResponse.FailedAt = ptr.ToInt64(responseEntity.FailedAt.Unix())
-	}
-
-	// Parse output if exists
-	if responseEntity.Output != nil {
-		var output any
-		if err := json.Unmarshal([]byte(*responseEntity.Output), &output); err == nil {
-			apiResponse.Output = output
-		}
-	}
-
-	// Parse usage if exists
-	if responseEntity.Usage != nil {
-		var usage responsetypes.DetailedUsage
-		if err := json.Unmarshal([]byte(*responseEntity.Usage), &usage); err == nil {
-			apiResponse.Usage = &usage
-		}
-	}
-
-	// Parse error if exists
-	if responseEntity.Error != nil {
-		var errorData responsetypes.ResponseError
-		if err := json.Unmarshal([]byte(*responseEntity.Error), &errorData); err == nil {
-			apiResponse.Error = &errorData
-		}
-	}
-
-	return apiResponse
-}
-
-// ConvertConversationItemToInputItem converts a conversation item to input item format
-func (s *ResponseService) ConvertConversationItemToInputItem(item *conversation.Item) responsetypes.InputItem {
-	inputItem := responsetypes.InputItem{
-		ID:      item.PublicID,
-		Object:  "input_item",
-		Created: item.CreatedAt.Unix(),
-		Type:    requesttypes.InputType(item.Type),
-	}
-
-	if len(item.Content) > 0 {
-		for _, content := range item.Content {
-			if content.Type == "text" && content.Text != nil {
-				inputItem.Text = &content.Text.Value
-				break
-			} else if content.Type == "input_text" && content.InputText != nil {
-				inputItem.Text = content.InputText
-				break
-			}
-		}
-	}
-
-	return inputItem
-}
-
-// HandleConversation handles conversation creation and management for responses
-func (s *ResponseService) HandleConversation(ctx context.Context, userID uint, request *requesttypes.CreateResponseRequest) (*conversation.Conversation, *common.Error) {
-	// If store is explicitly set to false, don't create or use any conversation
-	if request.Store != nil && !*request.Store {
-		return nil, nil
-	}
-
-	// If previous_response_id is provided, load the conversation from the previous response
-	if request.PreviousResponseID != nil && *request.PreviousResponseID != "" {
-		// Load the previous response
-		previousResponse, err := s.GetResponseByPublicID(ctx, *request.PreviousResponseID)
-		if err != nil {
-			return nil, err
-		}
-		if previousResponse == nil {
-			return nil, common.NewErrorWithMessage("Previous response not found", "o1p2q3r4-s5t6-7890-opqr-123456789012")
-		}
-
-		// Validate that the previous response belongs to the same user
-		if previousResponse.UserID != userID {
-			return nil, common.NewErrorWithMessage("Previous response does not belong to the current user", "p2q3r4s5-t6u7-8901-pqrs-234567890123")
-		}
-
-		// Load the conversation from the previous response
-		if previousResponse.ConversationID == nil {
-			return nil, common.NewErrorWithMessage("Previous response does not belong to any conversation", "q3r4s5t6-u7v8-9012-qrst-345678901234")
-		}
-
-		conv, err := s.conversationService.GetConversationByID(ctx, *previousResponse.ConversationID)
-		if err != nil {
-			return nil, err
-		}
-		return conv, nil
-	}
-
-	// Check if conversation is specified and not 'client-created-root'
-	if request.Conversation != nil && *request.Conversation != "" && *request.Conversation != ClientCreatedRootConversationID {
-		// Load existing conversation
-		conv, err := s.conversationService.GetConversationByPublicIDAndUserID(ctx, *request.Conversation, userID)
-		if err != nil {
-			return nil, err
-		}
-		return conv, nil
-	}
-
-	// Create new conversation
-	conv, err := s.conversationService.CreateConversation(ctx, userID, nil, true, nil)
-	if err != nil {
-		return nil, err
-	}
-
-	return conv, nil
-}
-
-// AppendMessagesToConversation appends messages to a conversation
-func (s *ResponseService) AppendMessagesToConversation(ctx context.Context, conv *conversation.Conversation, messages []openai.ChatCompletionMessage, responseID *uint) (bool, *common.Error) {
-	// Convert OpenAI messages to conversation items
-	items := make([]*conversation.Item, 0, len(messages))
-	for _, msg := range messages {
-		// Generate public ID for the item
-		publicID, err := idgen.GenerateSecureID("msg", 42)
-		if err != nil {
-			return false, common.NewErrorWithMessage("Failed to generate item ID", "u7v8w9x0-y1z2-3456-uvwx-789012345678")
-		}
-
-		// Convert role
-		var role conversation.ItemRole
-		switch msg.Role {
-		case openai.ChatMessageRoleSystem:
-			role = conversation.ItemRoleSystem
-		case openai.ChatMessageRoleUser:
-			role = conversation.ItemRoleUser
-		case openai.ChatMessageRoleAssistant:
-			role = conversation.ItemRoleAssistant
-		default:
-			role = conversation.ItemRoleUser
-		}
-
-		// Convert content
-		content := make([]conversation.Content, 0, len(msg.MultiContent))
-		for _, contentPart := range msg.MultiContent {
-			if contentPart.Type == openai.ChatMessagePartTypeText {
-				content = append(content, conversation.NewTextContent(contentPart.Text))
-			}
-		}
-
-		// If no multi-content, use simple text content
-		if len(content) == 0 && msg.Content != "" {
-			content = append(content, conversation.NewTextContent(msg.Content))
-		}
-
-		item := conversation.NewItem(
-			publicID,
-			conversation.ItemTypeMessage,
-			role,
-			content,
-			conv.ID,
-			responseID,
-		)
-
-		items = append(items, item)
-	}
-
-	// Add items to conversation
-	if len(items) > 0 {
-		_, err := s.conversationService.AddMultipleItems(ctx, conv, conv.UserID, items)
-		if err != nil {
-			return false, err
-		}
-	}
-
-	return true, nil
-}
-
-// ConvertToChatCompletionRequest converts a response request to OpenAI chat completion request
-func (s *ResponseService) ConvertToChatCompletionRequest(req *requesttypes.CreateResponseRequest) *openai.ChatCompletionRequest {
-	chatReq := &openai.ChatCompletionRequest{
-		Model:    req.Model,
-		Messages: make([]openai.ChatCompletionMessage, 0),
-	}
-
-	// Add system message if provided
-	if req.SystemPrompt != nil && *req.SystemPrompt != "" {
-		chatReq.Messages = append(chatReq.Messages, openai.ChatCompletionMessage{
-			Role:    openai.ChatMessageRoleSystem,
-			Content: *req.SystemPrompt,
-		})
-	}
-
-	// Add user input as message
-	if req.Input != nil {
-		// Try to parse input as JSON array of messages first
-		var messages []openai.ChatCompletionMessage
-		if err := json.Unmarshal([]byte(fmt.Sprintf("%v", req.Input)), &messages); err == nil {
-			// Input is an array of messages
-			chatReq.Messages = append(chatReq.Messages, messages...)
-		} else {
-			// Input is a single string message
-			chatReq.Messages = append(chatReq.Messages, openai.ChatCompletionMessage{
-				Role:    openai.ChatMessageRoleUser,
-				Content: fmt.Sprintf("%v", req.Input),
-			})
-		}
-	}
-
-	// Set optional parameters
-	if req.MaxTokens != nil {
-		chatReq.MaxTokens = *req.MaxTokens
-	}
-	if req.Temperature != nil {
-		chatReq.Temperature = float32(*req.Temperature)
-	}
-	if req.TopP != nil {
-		chatReq.TopP = float32(*req.TopP)
-	}
-	if req.Stop != nil {
-		chatReq.Stop = req.Stop
-	}
-	if req.PresencePenalty != nil {
-		chatReq.PresencePenalty = float32(*req.PresencePenalty)
-	}
-	if req.FrequencyPenalty != nil {
-		chatReq.FrequencyPenalty = float32(*req.FrequencyPenalty)
-	}
-	if req.User != nil {
-		chatReq.User = *req.User
-	}
-
-	return chatReq
-}
-
-// ConvertConversationItemsToMessages converts conversation items to OpenAI chat completion messages
-func (s *ResponseService) ConvertConversationItemsToMessages(ctx context.Context, conv *conversation.Conversation) ([]openai.ChatCompletionMessage, *common.Error) {
-	// Load conversation with items
-	convWithItems, err := s.conversationService.GetConversationByPublicIDAndUserID(ctx, conv.PublicID, conv.UserID)
-	if err != nil {
-		return nil, err
-	}
-
-	// Convert items to messages
-	messages := make([]openai.ChatCompletionMessage, 0, len(convWithItems.Items))
-	for _, item := range convWithItems.Items {
-		// Skip items that don't have a role or content
-		if item.Role == nil || len(item.Content) == 0 {
-			continue
-		}
-
-		// Convert conversation role to OpenAI role
-		var openaiRole string
-		switch *item.Role {
-		case conversation.ItemRoleSystem:
-			openaiRole = openai.ChatMessageRoleSystem
-		case conversation.ItemRoleUser:
-			openaiRole = openai.ChatMessageRoleUser
-		case conversation.ItemRoleAssistant:
-			openaiRole = openai.ChatMessageRoleAssistant
-		default:
-			openaiRole = openai.ChatMessageRoleUser
-		}
-
-		// Extract text content from the item
-		var content string
-		for _, contentPart := range item.Content {
-			if contentPart.Type == "text" && contentPart.Text != nil {
-				content += contentPart.Text.Value
-			}
-		}
-
-		// Only add message if it has content
-		if content != "" {
-			messages = append(messages, openai.ChatCompletionMessage{
-				Role:    openaiRole,
-				Content: content,
-			})
-		}
-	}
-
-	return messages, nil
-}
diff --git a/apps/jan-api-gateway/application/app/domain/response/response_validator.go b/apps/jan-api-gateway/application/app/domain/response/response_validator.go
deleted file mode 100644
index 365850f7..00000000
--- a/apps/jan-api-gateway/application/app/domain/response/response_validator.go
+++ /dev/null
@@ -1,816 +0,0 @@
-package response
-
-import (
-	"fmt"
-	"strings"
-
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
-)
-
-// ValidationError represents a validation error
-type ValidationError struct {
-	Field   string `json:"field"`
-	Message string `json:"message"`
-}
-
-// ValidationErrors represents multiple validation errors
-type ValidationErrors struct {
-	Errors []ValidationError `json:"errors"`
-}
-
-// ValidateCreateResponseRequest validates a CreateResponseRequest
-func ValidateCreateResponseRequest(req *requesttypes.CreateResponseRequest) (bool, *common.Error) {
-	// Validate model
-	if req.Model == "" {
-		return false, common.NewErrorWithMessage("model is required", "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
-	}
-
-	// Validate input
-	if err := validateInput(req.Input); err != nil {
-		return false, common.NewErrorWithMessage("input validation error", "b2c3d4e5-f6g7-8901-bcde-f23456789012")
-	}
-
-	// Validate temperature
-	if req.Temperature != nil {
-		if *req.Temperature < 0 || *req.Temperature > 2 {
-			return false, common.NewErrorWithMessage("temperature must be between 0 and 2", "c3d4e5f6-g7h8-9012-cdef-345678901234")
-		}
-	}
-
-	// Validate top_p
-	if req.TopP != nil {
-		if *req.TopP < 0 || *req.TopP > 1 {
-			return false, common.NewErrorWithMessage("top_p must be between 0 and 1", "d4e5f6g7-h8i9-0123-defg-456789012345")
-		}
-	}
-
-	// Validate top_k
-	if req.TopK != nil {
-		if *req.TopK < 1 {
-			return false, common.NewErrorWithMessage("top_k must be greater than 0", "e5f6g7h8-i9j0-1234-efgh-567890123456")
-		}
-	}
-
-	// Validate repetition_penalty
-	if req.RepetitionPenalty != nil {
-		if *req.RepetitionPenalty < 0 || *req.RepetitionPenalty > 2 {
-			return false, common.NewErrorWithMessage("repetition_penalty must be between 0 and 2", "f6g7h8i9-j0k1-2345-fghi-678901234567")
-		}
-	}
-
-	// Validate presence_penalty
-	if req.PresencePenalty != nil {
-		if *req.PresencePenalty < -2 || *req.PresencePenalty > 2 {
-			return false, common.NewErrorWithMessage("presence_penalty must be between -2 and 2", "g7h8i9j0-k1l2-3456-ghij-789012345678")
-		}
-	}
-
-	// Validate frequency_penalty
-	if req.FrequencyPenalty != nil {
-		if *req.FrequencyPenalty < -2 || *req.FrequencyPenalty > 2 {
-			return false, common.NewErrorWithMessage("frequency_penalty must be between -2 and 2", "h8i9j0k1-l2m3-4567-hijk-890123456789")
-		}
-	}
-
-	// Validate max_tokens
-	if req.MaxTokens != nil {
-		if *req.MaxTokens < 1 {
-			return false, common.NewErrorWithMessage("max_tokens must be greater than 0", "i9j0k1l2-m3n4-5678-ijkl-901234567890")
-		}
-	}
-
-	// Validate timeout
-	if req.Timeout != nil {
-		if *req.Timeout < 1 {
-			return false, common.NewErrorWithMessage("timeout must be greater than 0", "j0k1l2m3-n4o5-6789-jklm-012345678901")
-		}
-	}
-
-	// Validate response_format
-	if req.ResponseFormat != nil {
-		if err := validateResponseFormat(req.ResponseFormat); err != nil {
-			return false, common.NewErrorWithMessage("response_format validation error", "k1l2m3n4-o5p6-7890-klmn-123456789012")
-		}
-	}
-
-	// Validate tools
-	if req.Tools != nil {
-		if err := validateTools(req.Tools); err != nil {
-			return false, common.NewErrorWithMessage("tools validation error", "l2m3n4o5-p6q7-8901-lmno-234567890123")
-		}
-	}
-
-	// Validate tool_choice
-	if req.ToolChoice != nil {
-		if err := validateToolChoice(req.ToolChoice); err != nil {
-			return false, common.NewErrorWithMessage("tool_choice validation error", "m3n4o5p6-q7r8-9012-mnop-345678901234")
-		}
-	}
-
-	return true, nil
-}
-
-// validateInput validates the input field (can be string, array of strings, or structured CreateResponseInput)
-func validateInput(input any) *[]ValidationError {
-	var errors []ValidationError
-
-	if input == nil {
-		errors = append(errors, ValidationError{
-			Field:   "input",
-			Message: "input is required",
-		})
-		return &errors
-	}
-
-	switch v := input.(type) {
-	case string:
-		if v == "" {
-			errors = append(errors, ValidationError{
-				Field:   "input",
-				Message: "input string cannot be empty",
-			})
-		}
-	case []any:
-		if len(v) == 0 {
-			errors = append(errors, ValidationError{
-				Field:   "input",
-				Message: "input array cannot be empty",
-			})
-		}
-		for i, item := range v {
-			switch itemVal := item.(type) {
-			case string:
-				if itemVal == "" {
-					errors = append(errors, ValidationError{
-						Field:   fmt.Sprintf("input[%d]", i),
-						Message: "input array string items cannot be empty",
-					})
-				}
-			case map[string]any:
-				// Validate message object format
-				if err := validateMessageObject(itemVal, i); err != nil {
-					errors = append(errors, *err...)
-				}
-			default:
-				errors = append(errors, ValidationError{
-					Field:   fmt.Sprintf("input[%d]", i),
-					Message: "input array items must be strings or message objects with 'role' and 'content'",
-				})
-			}
-		}
-	case map[string]any:
-		// Check if this is a structured CreateResponseInput object
-		if structuredInput := convertToCreateResponseInput(v); structuredInput != nil {
-			// Delegate to structured input validation
-			if err := validateCreateResponseInput(structuredInput); err != nil {
-				errors = append(errors, *err...)
-			}
-		} else {
-			// Treat as a single message object
-			if err := validateMessageObject(v, 0); err != nil {
-				errors = append(errors, *err...)
-			}
-		}
-	default:
-		errors = append(errors, ValidationError{
-			Field:   "input",
-			Message: "input must be a string, array of strings/message objects, or structured input object",
-		})
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// convertToCreateResponseInput attempts to convert a map to CreateResponseInput
-// Returns nil if the map doesn't represent a structured input
-func convertToCreateResponseInput(inputMap map[string]any) *requesttypes.CreateResponseInput {
-	// Check if this looks like a structured input by looking for a 'type' field
-	typeField, hasType := inputMap["type"]
-	if !hasType {
-		return nil
-	}
-
-	typeStr, ok := typeField.(string)
-	if !ok {
-		return nil
-	}
-
-	// Check if it's a valid input type
-	switch requesttypes.InputType(typeStr) {
-	case requesttypes.InputTypeText,
-		requesttypes.InputTypeImage,
-		requesttypes.InputTypeFile,
-		requesttypes.InputTypeWebSearch,
-		requesttypes.InputTypeFileSearch,
-		requesttypes.InputTypeStreaming,
-		requesttypes.InputTypeFunctionCalls,
-		requesttypes.InputTypeReasoning:
-		// This looks like a structured input, create the object
-		structuredInput := &requesttypes.CreateResponseInput{
-			Type: requesttypes.InputType(typeStr),
-		}
-
-		// Extract type-specific fields based on the input type
-		switch requesttypes.InputType(typeStr) {
-		case requesttypes.InputTypeText:
-			if text, ok := inputMap["text"].(string); ok {
-				structuredInput.Text = &text
-			}
-		case requesttypes.InputTypeImage:
-			if imageData, ok := inputMap["image"].(map[string]any); ok {
-				imageInput := &requesttypes.ImageInput{}
-				if url, ok := imageData["url"].(string); ok {
-					imageInput.URL = &url
-				}
-				if data, ok := imageData["data"].(string); ok {
-					imageInput.Data = &data
-				}
-				if detail, ok := imageData["detail"].(string); ok {
-					imageInput.Detail = &detail
-				}
-				structuredInput.Image = imageInput
-			}
-		case requesttypes.InputTypeFile:
-			if fileData, ok := inputMap["file"].(map[string]any); ok {
-				if fileID, ok := fileData["file_id"].(string); ok {
-					structuredInput.File = &requesttypes.FileInput{
-						FileID: fileID,
-					}
-				}
-			}
-		case requesttypes.InputTypeWebSearch:
-			if webSearchData, ok := inputMap["web_search"].(map[string]any); ok {
-				if query, ok := webSearchData["query"].(string); ok {
-					webSearchInput := &requesttypes.WebSearchInput{
-						Query: query,
-					}
-					if maxResults, ok := webSearchData["max_results"].(float64); ok {
-						maxResultsInt := int(maxResults)
-						webSearchInput.MaxResults = &maxResultsInt
-					}
-					if searchEngine, ok := webSearchData["search_engine"].(string); ok {
-						webSearchInput.SearchEngine = &searchEngine
-					}
-					structuredInput.WebSearch = webSearchInput
-				}
-			}
-		case requesttypes.InputTypeFileSearch:
-			if fileSearchData, ok := inputMap["file_search"].(map[string]any); ok {
-				if query, ok := fileSearchData["query"].(string); ok {
-					fileSearchInput := &requesttypes.FileSearchInput{
-						Query: query,
-					}
-					if fileIDs, ok := fileSearchData["file_ids"].([]any); ok {
-						fileSearchInput.FileIDs = make([]string, len(fileIDs))
-						for i, id := range fileIDs {
-							if idStr, ok := id.(string); ok {
-								fileSearchInput.FileIDs[i] = idStr
-							}
-						}
-					}
-					if maxResults, ok := fileSearchData["max_results"].(float64); ok {
-						maxResultsInt := int(maxResults)
-						fileSearchInput.MaxResults = &maxResultsInt
-					}
-					structuredInput.FileSearch = fileSearchInput
-				}
-			}
-		case requesttypes.InputTypeStreaming:
-			if streamingData, ok := inputMap["streaming"].(map[string]any); ok {
-				if url, ok := streamingData["url"].(string); ok {
-					streamingInput := &requesttypes.StreamingInput{
-						URL: url,
-					}
-					if method, ok := streamingData["method"].(string); ok {
-						streamingInput.Method = &method
-					}
-					if body, ok := streamingData["body"].(string); ok {
-						streamingInput.Body = &body
-					}
-					if headers, ok := streamingData["headers"].(map[string]any); ok {
-						streamingInput.Headers = make(map[string]string)
-						for k, v := range headers {
-							if vStr, ok := v.(string); ok {
-								streamingInput.Headers[k] = vStr
-							}
-						}
-					}
-					structuredInput.Streaming = streamingInput
-				}
-			}
-		case requesttypes.InputTypeFunctionCalls:
-			if functionCallsData, ok := inputMap["function_calls"].(map[string]any); ok {
-				if calls, ok := functionCallsData["calls"].([]any); ok {
-					functionCallsInput := &requesttypes.FunctionCallsInput{
-						Calls: make([]requesttypes.FunctionCall, len(calls)),
-					}
-					for i, call := range calls {
-						if callData, ok := call.(map[string]any); ok {
-							if name, ok := callData["name"].(string); ok {
-								functionCallsInput.Calls[i] = requesttypes.FunctionCall{
-									Name: name,
-								}
-								if args, ok := callData["arguments"].(map[string]any); ok {
-									functionCallsInput.Calls[i].Arguments = args
-								}
-							}
-						}
-					}
-					structuredInput.FunctionCalls = functionCallsInput
-				}
-			}
-		case requesttypes.InputTypeReasoning:
-			if reasoningData, ok := inputMap["reasoning"].(map[string]any); ok {
-				if task, ok := reasoningData["task"].(string); ok {
-					reasoningInput := &requesttypes.ReasoningInput{
-						Task: task,
-					}
-					if context, ok := reasoningData["context"].(string); ok {
-						reasoningInput.Context = &context
-					}
-					structuredInput.Reasoning = reasoningInput
-				}
-			}
-		}
-
-		return structuredInput
-	default:
-		return nil
-	}
-}
-
-// validateMessageObject validates a message object in the input array
-func validateMessageObject(msg map[string]any, index int) *[]ValidationError {
-	var errors []ValidationError
-
-	// Check for required role field
-	role, hasRole := msg["role"]
-	if !hasRole {
-		errors = append(errors, ValidationError{
-			Field:   fmt.Sprintf("input[%d].role", index),
-			Message: "role is required for message objects",
-		})
-	} else if roleStr, ok := role.(string); !ok || roleStr == "" {
-		errors = append(errors, ValidationError{
-			Field:   fmt.Sprintf("input[%d].role", index),
-			Message: "role must be a non-empty string",
-		})
-	} else if roleStr != "system" && roleStr != "user" && roleStr != "assistant" {
-		errors = append(errors, ValidationError{
-			Field:   fmt.Sprintf("input[%d].role", index),
-			Message: "role must be one of: system, user, assistant",
-		})
-	}
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateCreateResponseInput validates a CreateResponseInput (legacy function for backward compatibility)
-func validateCreateResponseInput(input *requesttypes.CreateResponseInput) *[]ValidationError {
-	var errors []ValidationError
-
-	// Validate type
-	if input.Type == "" {
-		errors = append(errors, ValidationError{
-			Field:   "input.type",
-			Message: "input.type is required",
-		})
-		return &errors
-	}
-
-	// Validate type-specific fields
-	switch input.Type {
-	case requesttypes.InputTypeText:
-		if input.Text == nil || *input.Text == "" {
-			errors = append(errors, ValidationError{
-				Field:   "input.text",
-				Message: "input.text is required for text type",
-			})
-		}
-	case requesttypes.InputTypeImage:
-		if input.Image == nil {
-			errors = append(errors, ValidationError{
-				Field:   "input.image",
-				Message: "input.image is required for image type",
-			})
-		} else {
-			if err := validateImageInput(input.Image); err != nil {
-				errors = append(errors, *err...)
-			}
-		}
-	case requesttypes.InputTypeFile:
-		if input.File == nil {
-			errors = append(errors, ValidationError{
-				Field:   "input.file",
-				Message: "input.file is required for file type",
-			})
-		} else {
-			if err := validateFileInput(input.File); err != nil {
-				errors = append(errors, *err...)
-			}
-		}
-	case requesttypes.InputTypeWebSearch:
-		if input.WebSearch == nil {
-			errors = append(errors, ValidationError{
-				Field:   "input.web_search",
-				Message: "input.web_search is required for web_search type",
-			})
-		} else {
-			if err := validateWebSearchInput(input.WebSearch); err != nil {
-				errors = append(errors, *err...)
-			}
-		}
-	case requesttypes.InputTypeFileSearch:
-		if input.FileSearch == nil {
-			errors = append(errors, ValidationError{
-				Field:   "input.file_search",
-				Message: "input.file_search is required for file_search type",
-			})
-		} else {
-			if err := validateFileSearchInput(input.FileSearch); err != nil {
-				errors = append(errors, *err...)
-			}
-		}
-	case requesttypes.InputTypeStreaming:
-		if input.Streaming == nil {
-			errors = append(errors, ValidationError{
-				Field:   "input.streaming",
-				Message: "input.streaming is required for streaming type",
-			})
-		} else {
-			if err := validateStreamingInput(input.Streaming); err != nil {
-				errors = append(errors, *err...)
-			}
-		}
-	case requesttypes.InputTypeFunctionCalls:
-		if input.FunctionCalls == nil {
-			errors = append(errors, ValidationError{
-				Field:   "input.function_calls",
-				Message: "input.function_calls is required for function_calls type",
-			})
-		} else {
-			if err := validateFunctionCallsInput(input.FunctionCalls); err != nil {
-				errors = append(errors, *err...)
-			}
-		}
-	case requesttypes.InputTypeReasoning:
-		if input.Reasoning == nil {
-			errors = append(errors, ValidationError{
-				Field:   "input.reasoning",
-				Message: "input.reasoning is required for reasoning type",
-			})
-		} else {
-			if err := validateReasoningInput(input.Reasoning); err != nil {
-				errors = append(errors, *err...)
-			}
-		}
-	default:
-		errors = append(errors, ValidationError{
-			Field:   "input.type",
-			Message: fmt.Sprintf("invalid input type: %s", input.Type),
-		})
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateImageInput validates an ImageInput
-func validateImageInput(image *requesttypes.ImageInput) *[]ValidationError {
-	var errors []ValidationError
-
-	// Either URL or data must be provided
-	if image.URL == nil && image.Data == nil {
-		errors = append(errors, ValidationError{
-			Field:   "input.image",
-			Message: "either url or data must be provided for image input",
-		})
-	}
-
-	// Both URL and data cannot be provided
-	if image.URL != nil && image.Data != nil {
-		errors = append(errors, ValidationError{
-			Field:   "input.image",
-			Message: "either url or data must be provided, not both",
-		})
-	}
-
-	// Validate URL if provided
-	if image.URL != nil && *image.URL != "" {
-		if !strings.HasPrefix(*image.URL, "http://") && !strings.HasPrefix(*image.URL, "https://") {
-			errors = append(errors, ValidationError{
-				Field:   "input.image.url",
-				Message: "url must be a valid HTTP or HTTPS URL",
-			})
-		}
-	}
-
-	// Validate data if provided
-	if image.Data != nil && *image.Data != "" {
-		if !strings.HasPrefix(*image.Data, "data:image/") {
-			errors = append(errors, ValidationError{
-				Field:   "input.image.data",
-				Message: "data must be a valid base64-encoded image with data URL format",
-			})
-		}
-	}
-
-	// Validate detail if provided
-	if image.Detail != nil {
-		if *image.Detail != "low" && *image.Detail != "high" && *image.Detail != "auto" {
-			errors = append(errors, ValidationError{
-				Field:   "input.image.detail",
-				Message: "detail must be one of: low, high, auto",
-			})
-		}
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateFileInput validates a FileInput
-func validateFileInput(file *requesttypes.FileInput) *[]ValidationError {
-	var errors []ValidationError
-
-	if file.FileID == "" {
-		errors = append(errors, ValidationError{
-			Field:   "input.file.file_id",
-			Message: "file_id is required",
-		})
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateWebSearchInput validates a WebSearchInput
-func validateWebSearchInput(webSearch *requesttypes.WebSearchInput) *[]ValidationError {
-	var errors []ValidationError
-
-	if webSearch.Query == "" {
-		errors = append(errors, ValidationError{
-			Field:   "input.web_search.query",
-			Message: "query is required",
-		})
-	}
-
-	if webSearch.MaxResults != nil {
-		if *webSearch.MaxResults < 1 || *webSearch.MaxResults > 20 {
-			errors = append(errors, ValidationError{
-				Field:   "input.web_search.max_results",
-				Message: "max_results must be between 1 and 20",
-			})
-		}
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateFileSearchInput validates a FileSearchInput
-func validateFileSearchInput(fileSearch *requesttypes.FileSearchInput) *[]ValidationError {
-	var errors []ValidationError
-
-	if fileSearch.Query == "" {
-		errors = append(errors, ValidationError{
-			Field:   "input.file_search.query",
-			Message: "query is required",
-		})
-	}
-
-	if len(fileSearch.FileIDs) == 0 {
-		errors = append(errors, ValidationError{
-			Field:   "input.file_search.file_ids",
-			Message: "file_ids is required and cannot be empty",
-		})
-	}
-
-	if fileSearch.MaxResults != nil {
-		if *fileSearch.MaxResults < 1 || *fileSearch.MaxResults > 20 {
-			errors = append(errors, ValidationError{
-				Field:   "input.file_search.max_results",
-				Message: "max_results must be between 1 and 20",
-			})
-		}
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateStreamingInput validates a StreamingInput
-func validateStreamingInput(streaming *requesttypes.StreamingInput) *[]ValidationError {
-	var errors []ValidationError
-
-	if streaming.URL == "" {
-		errors = append(errors, ValidationError{
-			Field:   "input.streaming.url",
-			Message: "url is required",
-		})
-	} else if !strings.HasPrefix(streaming.URL, "http://") && !strings.HasPrefix(streaming.URL, "https://") {
-		errors = append(errors, ValidationError{
-			Field:   "input.streaming.url",
-			Message: "url must be a valid HTTP or HTTPS URL",
-		})
-	}
-
-	if streaming.Method != nil {
-		method := strings.ToUpper(*streaming.Method)
-		if method != "GET" && method != "POST" && method != "PUT" && method != "DELETE" && method != "PATCH" {
-			errors = append(errors, ValidationError{
-				Field:   "input.streaming.method",
-				Message: "method must be one of: GET, POST, PUT, DELETE, PATCH",
-			})
-		}
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateFunctionCallsInput validates a FunctionCallsInput
-func validateFunctionCallsInput(functionCalls *requesttypes.FunctionCallsInput) *[]ValidationError {
-	var errors []ValidationError
-
-	if len(functionCalls.Calls) == 0 {
-		errors = append(errors, ValidationError{
-			Field:   "input.function_calls.calls",
-			Message: "calls is required and cannot be empty",
-		})
-	}
-
-	for i, call := range functionCalls.Calls {
-		if call.Name == "" {
-			errors = append(errors, ValidationError{
-				Field:   fmt.Sprintf("input.function_calls.calls[%d].name", i),
-				Message: "name is required",
-			})
-		}
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateReasoningInput validates a ReasoningInput
-func validateReasoningInput(reasoning *requesttypes.ReasoningInput) *[]ValidationError {
-	var errors []ValidationError
-
-	if reasoning.Task == "" {
-		errors = append(errors, ValidationError{
-			Field:   "input.reasoning.task",
-			Message: "task is required",
-		})
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateResponseFormat validates a ResponseFormat
-func validateResponseFormat(format *requesttypes.ResponseFormat) *[]ValidationError {
-	var errors []ValidationError
-
-	if format.Type == "" {
-		errors = append(errors, ValidationError{
-			Field:   "response_format.type",
-			Message: "type is required",
-		})
-	} else if format.Type != "text" && format.Type != "json_object" {
-		errors = append(errors, ValidationError{
-			Field:   "response_format.type",
-			Message: "type must be one of: text, json_object",
-		})
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateTools validates a slice of Tools
-func validateTools(tools []requesttypes.Tool) *[]ValidationError {
-	var errors []ValidationError
-
-	for i, tool := range tools {
-		if tool.Type == "" {
-			errors = append(errors, ValidationError{
-				Field:   fmt.Sprintf("tools[%d].type", i),
-				Message: "type is required",
-			})
-		} else if tool.Type != "function" {
-			errors = append(errors, ValidationError{
-				Field:   fmt.Sprintf("tools[%d].type", i),
-				Message: "type must be 'function'",
-			})
-		}
-
-		if tool.Type == "function" && tool.Function == nil {
-			errors = append(errors, ValidationError{
-				Field:   fmt.Sprintf("tools[%d].function", i),
-				Message: "function is required for function type tools",
-			})
-		}
-
-		if tool.Function != nil {
-			if tool.Function.Name == "" {
-				errors = append(errors, ValidationError{
-					Field:   fmt.Sprintf("tools[%d].function.name", i),
-					Message: "function name is required",
-				})
-			}
-		}
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// validateToolChoice validates a ToolChoice
-func validateToolChoice(choice *requesttypes.ToolChoice) *[]ValidationError {
-	var errors []ValidationError
-
-	if choice.Type == "" {
-		errors = append(errors, ValidationError{
-			Field:   "tool_choice.type",
-			Message: "type is required",
-		})
-	} else if choice.Type != "none" && choice.Type != "auto" && choice.Type != "function" {
-		errors = append(errors, ValidationError{
-			Field:   "tool_choice.type",
-			Message: "type must be one of: none, auto, function",
-		})
-	}
-
-	if choice.Type == "function" && choice.Function == nil {
-		errors = append(errors, ValidationError{
-			Field:   "tool_choice.function",
-			Message: "function is required for function type tool choice",
-		})
-	}
-
-	if choice.Function != nil {
-		if choice.Function.Name == "" {
-			errors = append(errors, ValidationError{
-				Field:   "tool_choice.function.name",
-				Message: "function name is required",
-			})
-		}
-	}
-
-	if len(errors) > 0 {
-		return &errors
-	}
-
-	return nil
-}
-
-// ValidateResponseID validates a response ID
-func ValidateResponseID(responseID string) *ValidationError {
-	if responseID == "" {
-		return &ValidationError{
-			Field:   "response_id",
-			Message: "response_id is required",
-		}
-	}
-
-	return nil
-}
diff --git a/apps/jan-api-gateway/application/app/domain/service_provider.go b/apps/jan-api-gateway/application/app/domain/service_provider.go
deleted file mode 100644
index 4b45bccf..00000000
--- a/apps/jan-api-gateway/application/app/domain/service_provider.go
+++ /dev/null
@@ -1,31 +0,0 @@
-package domain
-
-import (
-	"github.com/google/wire"
-	"menlo.ai/jan-api-gateway/app/domain/apikey"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/domain/cron"
-	"menlo.ai/jan-api-gateway/app/domain/invite"
-	"menlo.ai/jan-api-gateway/app/domain/mcp/serpermcp"
-	"menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/domain/project"
-	"menlo.ai/jan-api-gateway/app/domain/response"
-	"menlo.ai/jan-api-gateway/app/domain/user"
-)
-
-var ServiceProvider = wire.NewSet(
-	auth.NewAuthService,
-	invite.NewInviteService,
-	organization.NewService,
-	project.NewService,
-	apikey.NewService,
-	user.NewService,
-	conversation.NewService,
-	response.NewResponseService,
-	response.NewResponseModelService,
-	response.NewStreamModelService,
-	response.NewNonStreamModelService,
-	serpermcp.NewSerperService,
-	cron.NewService,
-)
diff --git a/apps/jan-api-gateway/application/app/domain/user/service.go b/apps/jan-api-gateway/application/app/domain/user/service.go
deleted file mode 100644
index 8208ecd7..00000000
--- a/apps/jan-api-gateway/application/app/domain/user/service.go
+++ /dev/null
@@ -1,119 +0,0 @@
-package user
-
-import (
-	"encoding/json"
-	"fmt"
-	"time"
-
-	"golang.org/x/net/context"
-	"menlo.ai/jan-api-gateway/app/infrastructure/cache"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-)
-
-const (
-	// UserCacheTTL is the TTL for cached user lookups
-	UserCacheTTL = 15 * time.Minute
-)
-
-type UserService struct {
-	userrepo UserRepository
-	cache    *cache.RedisCacheService
-}
-
-func NewService(userrepo UserRepository, cacheService *cache.RedisCacheService) *UserService {
-	return &UserService{
-		userrepo: userrepo,
-		cache:    cacheService,
-	}
-}
-
-func (s *UserService) RegisterUser(ctx context.Context, user *User) (*User, error) {
-	publicId, err := s.generatePublicID()
-	if err != nil {
-		return nil, err
-	}
-	user.PublicID = publicId
-	if err := s.userrepo.Create(ctx, user); err != nil {
-		return nil, err
-	}
-	return user, nil
-}
-
-func (s *UserService) UpdateUser(ctx context.Context, user *User) (*User, error) {
-	if err := s.userrepo.Update(ctx, user); err != nil {
-		return nil, err
-	}
-
-	if user.PublicID != "" {
-		cacheKey := fmt.Sprintf(cache.UserByPublicIDKey, user.PublicID)
-		if cacheErr := s.cache.Unlink(ctx, cacheKey); cacheErr != nil {
-			logger.GetLogger().Errorf("failed to invalidate cache for user %s: %v", user.PublicID, cacheErr)
-		}
-	}
-
-	return user, nil
-}
-
-func (s *UserService) FindByEmail(ctx context.Context, email string) (*User, error) {
-	users, err := s.userrepo.FindByFilter(ctx, UserFilter{
-		Email: &email,
-	}, nil)
-	if err != nil {
-		return nil, err
-	}
-	if len(users) == 0 {
-		return nil, nil
-	}
-	if len(users) != 1 {
-		return nil, fmt.Errorf("invalid email")
-	}
-	return users[0], nil
-}
-
-func (s *UserService) FindByFilter(ctx context.Context, filter UserFilter) ([]*User, error) {
-	return s.userrepo.FindByFilter(ctx, filter, nil)
-}
-
-func (s *UserService) FindByID(ctx context.Context, id uint) (*User, error) {
-	return s.userrepo.FindByID(ctx, id)
-}
-
-func (s *UserService) FindByPublicID(ctx context.Context, publicID string) (*User, error) {
-	// Create cache key
-	cacheKey := fmt.Sprintf(cache.UserByPublicIDKey, publicID)
-
-	// Try to get from cache first
-	cachedUserJSON, err := s.cache.Get(ctx, cacheKey)
-	if err == nil && cachedUserJSON != "" {
-		var cachedUser User
-		if jsonErr := json.Unmarshal([]byte(cachedUserJSON), &cachedUser); jsonErr == nil {
-			return &cachedUser, nil
-		}
-	}
-
-	// Cache miss or error - fetch from database
-	userEntities, err := s.userrepo.FindByFilter(ctx, UserFilter{PublicID: &publicID}, nil)
-	if err != nil {
-		return nil, err
-	}
-	if len(userEntities) != 1 {
-		return nil, fmt.Errorf("user does not exist")
-	}
-
-	user := userEntities[0]
-
-	// Cache the result for future requests
-	if userJSON, jsonErr := json.Marshal(user); jsonErr == nil {
-		if cacheErr := s.cache.Set(ctx, cacheKey, string(userJSON), UserCacheTTL); cacheErr != nil {
-			// Log cache error but don't fail the request
-			logger.GetLogger().Errorf("failed to cache user %s: %v", publicID, cacheErr)
-		}
-	}
-
-	return user, nil
-}
-
-func (s *UserService) generatePublicID() (string, error) {
-	return idgen.GenerateSecureID("user", 24)
-}
diff --git a/apps/jan-api-gateway/application/app/domain/user/user.go b/apps/jan-api-gateway/application/app/domain/user/user.go
deleted file mode 100644
index 90582ade..00000000
--- a/apps/jan-api-gateway/application/app/domain/user/user.go
+++ /dev/null
@@ -1,35 +0,0 @@
-package user
-
-import (
-	"context"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-)
-
-type UserPlatformType string
-
-type User struct {
-	ID        uint
-	Name      string
-	Email     string
-	Enabled   bool
-	PublicID  string
-	CreatedAt time.Time
-	IsGuest   bool
-}
-
-type UserFilter struct {
-	Email          *string
-	Enabled        *bool
-	PublicID       *string
-	OrganizationId *uint
-}
-
-type UserRepository interface {
-	Create(ctx context.Context, u *User) error
-	Update(ctx context.Context, u *User) error
-	FindFirst(ctx context.Context, filter UserFilter) (*User, error)
-	FindByFilter(ctx context.Context, filter UserFilter, p *query.Pagination) ([]*User, error)
-	FindByID(ctx context.Context, id uint) (*User, error)
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/cache/constants.go b/apps/jan-api-gateway/application/app/infrastructure/cache/constants.go
deleted file mode 100644
index 63f96cef..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/cache/constants.go
+++ /dev/null
@@ -1,19 +0,0 @@
-package cache
-
-// Cache key constants
-const (
-	// CacheVersion is the API version prefix for cache keys
-	CacheVersion = "v1"
-
-	// ModelsCacheKey is the cache key for the models list
-	ModelsCacheKey = CacheVersion + ":models:list"
-
-	// RegistryEndpointModelsKey is the cache key for endpoint to models mapping
-	RegistryEndpointModelsKey = CacheVersion + ":registry:endpoint_models"
-
-	// RegistryModelEndpointsKey is the cache key for model to endpoints mapping
-	RegistryModelEndpointsKey = CacheVersion + ":registry:model_endpoints"
-
-	// UserByPublicIDKey is the cache key template for user lookups by public ID
-	UserByPublicIDKey = CacheVersion + ":user:public_id:%s"
-)
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/database.go b/apps/jan-api-gateway/application/app/infrastructure/database/database.go
deleted file mode 100644
index aba55521..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/database.go
+++ /dev/null
@@ -1,51 +0,0 @@
-package database
-
-import (
-	"gorm.io/driver/postgres"
-	"gorm.io/gorm"
-	"gorm.io/gorm/schema"
-	"gorm.io/plugin/dbresolver"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-var SchemaRegistry []interface{}
-
-func RegisterSchemaForAutoMigrate(models ...interface{}) {
-	SchemaRegistry = append(SchemaRegistry, models...)
-}
-
-var DB *gorm.DB
-
-func NewDB() (*gorm.DB, error) {
-	db, err := gorm.Open(postgres.Open(environment_variables.EnvironmentVariables.DB_POSTGRESQL_WRITE_DSN), &gorm.Config{
-		NamingStrategy: schema.NamingStrategy{
-			SingularTable: true,
-		},
-	})
-	if err != nil {
-		logger.GetLogger().
-			WithField("error_code", "5c16fb53-d98c-4fc6-8bb4-9abd3c0b9e88").
-			Fatalf("unable to connect to database: %v", err)
-		return nil, err
-	}
-	err = db.Use(dbresolver.Register(dbresolver.Config{
-		Replicas: []gorm.Dialector{postgres.Open(
-			environment_variables.EnvironmentVariables.DB_POSTGRESQL_READ1_DSN,
-		)},
-		Policy: dbresolver.RandomPolicy{},
-	}))
-	if err != nil {
-		logger.GetLogger().
-			WithField("error_code", "9fab4b2e-1d70-4a4e-928a-5e81c7ee06de").
-			Fatalf("unable to connect to setup replica: %v", err)
-		return nil, err
-	}
-	DB = db
-	return DB, nil
-}
-
-func Migration() error {
-	migrator := NewDBMigrator(DB)
-	return migrator.Migrate()
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/apikey.go b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/apikey.go
deleted file mode 100644
index 0efba086..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/apikey.go
+++ /dev/null
@@ -1,70 +0,0 @@
-package dbschema
-
-import (
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/apikey"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-)
-
-func init() {
-	database.RegisterSchemaForAutoMigrate(ApiKey{})
-}
-
-type ApiKey struct {
-	BaseModel
-	PublicID      string `gorm:"size:128;uniqueIndex;not null"`
-	KeyHash       string `gorm:"size:128;uniqueIndex;not null"`
-	PlaintextHint string `gorm:"size:16"`
-	Description   string `gorm:"size:255"`
-	Enabled       bool   `gorm:"default:true;index"`
-
-	ApikeyType     string `gorm:"size:32;index;not null"` // "admin","project","service","organization","ephemeral"
-	OwnerPublicID  string `gorm:"type:varchar(50);not null"`
-	OrganizationID *uint  `gorm:"index"`
-	ProjectID      *uint  `gorm:"index"`
-
-	Permissions string     `gorm:"type:json"`
-	ExpiresAt   *time.Time `gorm:"type:timestamp"`
-	LastUsedAt  *time.Time `gorm:"type:timestamp"`
-}
-
-func NewSchemaApiKey(a *apikey.ApiKey) *ApiKey {
-	return &ApiKey{
-		BaseModel: BaseModel{
-			ID: a.ID,
-		},
-		PublicID:       a.PublicID,
-		KeyHash:        a.KeyHash,
-		PlaintextHint:  a.PlaintextHint,
-		Description:    a.Description,
-		Enabled:        a.Enabled,
-		ApikeyType:     a.ApikeyType,
-		OwnerPublicID:  a.OwnerPublicID,
-		ProjectID:      a.ProjectID,
-		OrganizationID: a.OrganizationID,
-		Permissions:    a.Permissions,
-		ExpiresAt:      a.ExpiresAt,
-		LastUsedAt:     a.LastUsedAt,
-	}
-}
-
-func (a *ApiKey) EtoD() *apikey.ApiKey {
-	return &apikey.ApiKey{
-		ID:             a.ID,
-		PublicID:       a.PublicID,
-		KeyHash:        a.KeyHash,
-		PlaintextHint:  a.PlaintextHint,
-		Description:    a.Description,
-		Enabled:        a.Enabled,
-		ApikeyType:     a.ApikeyType,
-		OwnerPublicID:  a.OwnerPublicID,
-		ProjectID:      a.ProjectID,
-		OrganizationID: a.OrganizationID,
-		Permissions:    a.Permissions,
-		ExpiresAt:      a.ExpiresAt,
-		CreatedAt:      a.CreatedAt,
-		UpdatedAt:      a.UpdatedAt,
-		LastUsedAt:     a.LastUsedAt,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/conversation.go b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/conversation.go
deleted file mode 100644
index 40f145c3..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/conversation.go
+++ /dev/null
@@ -1,150 +0,0 @@
-package dbschema
-
-import (
-	"encoding/json"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-func init() {
-	database.RegisterSchemaForAutoMigrate(Conversation{})
-	database.RegisterSchemaForAutoMigrate(Item{})
-}
-
-type Conversation struct {
-	BaseModel
-	PublicID  string `gorm:"type:varchar(50);uniqueIndex;not null"`
-	Title     string `gorm:"type:varchar(255)"`
-	UserID    uint   `gorm:"not null;index"`
-	Status    string `gorm:"type:varchar(20);not null;default:'active';index"`
-	Metadata  string `gorm:"type:text"`
-	IsPrivate bool   `gorm:"not null;default:true;index"`
-	Items     []Item `gorm:"foreignKey:ConversationID"`
-	User      User   `gorm:"foreignKey:UserID"`
-}
-
-type Item struct {
-	BaseModel
-	PublicID          string       `gorm:"type:varchar(50);uniqueIndex;not null"`
-	ConversationID    uint         `gorm:"not null;index"`
-	ResponseID        *uint        `gorm:"index"`
-	Type              string       `gorm:"type:varchar(50);not null;index"`
-	Role              string       `gorm:"type:varchar(20);index"`
-	Content           string       `gorm:"type:text"`
-	Status            string       `gorm:"type:varchar(50);index"`
-	IncompleteAt      *time.Time   `gorm:"type:timestamp"`
-	IncompleteDetails string       `gorm:"type:text"`
-	CompletedAt       *time.Time   `gorm:"type:timestamp"`
-	Conversation      Conversation `gorm:"foreignKey:ConversationID"`
-	Response          *Response    `gorm:"foreignKey:ResponseID"`
-}
-
-func NewSchemaConversation(c *conversation.Conversation) *Conversation {
-	var metadataJSON string
-	if c.Metadata != nil {
-		metadataBytes, err := json.Marshal(c.Metadata)
-		if err != nil {
-			metadataJSON = "{}"
-		} else {
-			metadataJSON = string(metadataBytes)
-		}
-	}
-
-	return &Conversation{
-		BaseModel: BaseModel{
-			ID: c.ID,
-		},
-		PublicID:  c.PublicID,
-		Title:     ptr.FromString(c.Title),
-		UserID:    c.UserID,
-		Status:    string(c.Status),
-		Metadata:  metadataJSON,
-		IsPrivate: c.IsPrivate,
-	}
-}
-
-func (c *Conversation) EtoD() *conversation.Conversation {
-	var metadata map[string]string
-	if c.Metadata != "" {
-		json.Unmarshal([]byte(c.Metadata), &metadata)
-	}
-
-	title := ptr.ToString(c.Title)
-
-	return &conversation.Conversation{
-		ID:        c.ID,
-		PublicID:  c.PublicID,
-		Title:     title,
-		UserID:    c.UserID,
-		Status:    conversation.ConversationStatus(c.Status),
-		Metadata:  metadata,
-		IsPrivate: c.IsPrivate,
-		CreatedAt: c.CreatedAt,
-		UpdatedAt: c.UpdatedAt,
-	}
-}
-
-func NewSchemaItem(i *conversation.Item) *Item {
-	// Convert Content slice to JSON string for storage
-	var contentJSON string
-	if i.Content != nil {
-		contentBytes, _ := json.Marshal(i.Content)
-		contentJSON = string(contentBytes)
-	}
-
-	// Convert IncompleteDetails to JSON string
-	var incompleteDetailsJSON string
-	if i.IncompleteDetails != nil {
-		incompleteDetailsBytes, _ := json.Marshal(i.IncompleteDetails)
-		incompleteDetailsJSON = string(incompleteDetailsBytes)
-	}
-
-	return &Item{
-		BaseModel: BaseModel{
-			ID: i.ID,
-		},
-		PublicID:          i.PublicID,
-		ConversationID:    i.ConversationID,
-		ResponseID:        i.ResponseID,
-		Type:              string(i.Type),
-		Role:              string(*i.Role),
-		Content:           contentJSON,
-		Status:            string(*i.Status),
-		IncompleteAt:      i.IncompleteAt,
-		IncompleteDetails: incompleteDetailsJSON,
-		CompletedAt:       i.CompletedAt,
-	}
-}
-
-func (i *Item) EtoD() *conversation.Item {
-	// Parse Content JSON back to slice
-	var content []conversation.Content
-	if i.Content != "" {
-		json.Unmarshal([]byte(i.Content), &content)
-	}
-
-	// Parse IncompleteDetails JSON
-	var incompleteDetails *conversation.IncompleteDetails
-	if i.IncompleteDetails != "" {
-		incompleteDetails = &conversation.IncompleteDetails{}
-		json.Unmarshal([]byte(i.IncompleteDetails), incompleteDetails)
-	}
-
-	return &conversation.Item{
-		ID:                i.ID,
-		PublicID:          i.PublicID, // Add PublicID field
-		Type:              conversation.ItemType(i.Type),
-		Role:              (*conversation.ItemRole)(ptr.ToString(i.Role)),
-		Content:           content,
-		Status:            (*conversation.ItemStatus)(ptr.ToString(i.Status)),
-		IncompleteAt:      i.IncompleteAt,
-		IncompleteDetails: incompleteDetails,
-		CompletedAt:       i.CompletedAt,
-		ConversationID:    i.ConversationID,
-		ResponseID:        i.ResponseID,
-		CreatedAt:         i.CreatedAt,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/invite.go b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/invite.go
deleted file mode 100644
index 13eb6f4b..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/invite.go
+++ /dev/null
@@ -1,60 +0,0 @@
-package dbschema
-
-import (
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/invite"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-)
-
-func init() {
-	database.RegisterSchemaForAutoMigrate(Invite{})
-}
-
-type Invite struct {
-	BaseModel
-	PublicID       string `gorm:"size:64;not null;uniqueIndex"`
-	Email          string `gorm:"size:128;not null"`
-	Role           string `gorm:"type:varchar(20);not null"`
-	Status         string `gorm:"type:varchar(20);not null;index"`
-	InvitedAt      time.Time
-	ExpiresAt      time.Time
-	AcceptedAt     *time.Time
-	Secrets        *string `gorm:"type:text"`
-	Projects       string  `gorm:"type:jsonb"`
-	OrganizationID uint    `gorm:"not null;index"`
-}
-
-func NewSchemaInvite(i *invite.Invite) *Invite {
-	return &Invite{
-		BaseModel: BaseModel{
-			ID: i.ID,
-		},
-		PublicID:       i.PublicID,
-		Email:          i.Email,
-		Role:           i.Role,
-		Status:         i.Status,
-		InvitedAt:      i.InvitedAt,
-		ExpiresAt:      i.ExpiresAt,
-		AcceptedAt:     i.AcceptedAt,
-		Secrets:        i.Secrets,
-		Projects:       i.Projects,
-		OrganizationID: i.OrganizationID,
-	}
-}
-
-func (i *Invite) EtoD() *invite.Invite {
-	return &invite.Invite{
-		ID:             i.ID,
-		PublicID:       i.PublicID,
-		Email:          i.Email,
-		Role:           i.Role,
-		Status:         i.Status,
-		InvitedAt:      i.InvitedAt,
-		ExpiresAt:      i.ExpiresAt,
-		AcceptedAt:     i.AcceptedAt,
-		Secrets:        i.Secrets,
-		Projects:       i.Projects,
-		OrganizationID: i.OrganizationID,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/organization.go b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/organization.go
deleted file mode 100644
index ae159617..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/organization.go
+++ /dev/null
@@ -1,69 +0,0 @@
-package dbschema
-
-import (
-	"menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-)
-
-func init() {
-	database.RegisterSchemaForAutoMigrate(Organization{})
-	database.RegisterSchemaForAutoMigrate(OrganizationMember{})
-}
-
-type Organization struct {
-	BaseModel
-	Name     string               `gorm:"size:128;not null"`
-	PublicID string               `gorm:"size:64;not null;uniqueIndex"`
-	Enabled  bool                 `gorm:"default:true;index"`
-	Members  []OrganizationMember `gorm:"foreignKey:OrganizationID"`
-}
-
-type OrganizationMember struct {
-	BaseModel
-	UserID         uint   `gorm:"not null;index:idx_user_org,unique"`
-	OrganizationID uint   `gorm:"not null;index:idx_user_org,unique"`
-	Role           string `gorm:"type:varchar(20);not null"`
-}
-
-func NewSchemaOrganization(o *organization.Organization) *Organization {
-	return &Organization{
-		BaseModel: BaseModel{
-			ID: o.ID,
-		},
-		Name:     o.Name,
-		PublicID: o.PublicID,
-		Enabled:  o.Enabled,
-	}
-}
-
-func NewSchemaOrganizationMember(o *organization.OrganizationMember) *OrganizationMember {
-	return &OrganizationMember{
-		BaseModel: BaseModel{
-			ID: o.ID,
-		},
-		UserID:         o.UserID,
-		OrganizationID: o.OrganizationID,
-		Role:           string(o.Role),
-	}
-}
-
-func (o *Organization) EtoD() *organization.Organization {
-	return &organization.Organization{
-		ID:        o.ID,
-		Name:      o.Name,
-		PublicID:  o.PublicID,
-		Enabled:   o.Enabled,
-		CreatedAt: o.CreatedAt,
-		UpdatedAt: o.UpdatedAt,
-	}
-}
-
-func (o *OrganizationMember) EtoD() *organization.OrganizationMember {
-	return &organization.OrganizationMember{
-		ID:             o.ID,
-		UserID:         o.UserID,
-		OrganizationID: o.OrganizationID,
-		Role:           organization.OrganizationMemberRole(o.Role),
-		CreatedAt:      o.CreatedAt,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/project.go b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/project.go
deleted file mode 100644
index 1ea1e162..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/project.go
+++ /dev/null
@@ -1,83 +0,0 @@
-package dbschema
-
-import (
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/project"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-)
-
-func init() {
-	database.RegisterSchemaForAutoMigrate(Project{})
-	database.RegisterSchemaForAutoMigrate(ProjectMember{})
-}
-
-type Project struct {
-	BaseModel
-	Name           string          `gorm:"size:128;not null"`
-	PublicID       string          `gorm:"type:varchar(50);uniqueIndex;not null"`
-	Status         string          `gorm:"type:varchar(20);not null;default:'active';index"`
-	OrganizationID uint            `gorm:"not null;index"`
-	ArchivedAt     *time.Time      `gorm:"column:archived_at;index"`
-	Members        []ProjectMember `gorm:"foreignKey:ProjectID"`
-}
-
-type ProjectMemberRole string
-
-const (
-	ProjectMemberRoleOwner  ProjectMemberRole = "owner"
-	ProjectMemberRoleMember ProjectMemberRole = "member"
-)
-
-type ProjectMember struct {
-	BaseModel
-	UserID    uint   `gorm:"not null;index:idx_user_proj,unique"`
-	ProjectID uint   `gorm:"not null;index:idx_user_proj,unique"`
-	Role      string `gorm:"type:varchar(20);not null"`
-}
-
-func (p *ProjectMember) EtoD() *project.ProjectMember {
-	return &project.ProjectMember{
-		ID:        p.ID,
-		UserID:    p.UserID,
-		ProjectID: p.ProjectID,
-		Role:      p.Role,
-	}
-}
-
-func NewSchemaProject(p *project.Project) *Project {
-	return &Project{
-		BaseModel: BaseModel{
-			ID: p.ID,
-		},
-		Name:           p.Name,
-		PublicID:       p.PublicID,
-		Status:         p.Status,
-		ArchivedAt:     p.ArchivedAt,
-		OrganizationID: p.OrganizationID,
-	}
-}
-
-func (p *Project) EtoD() *project.Project {
-	return &project.Project{
-		ID:             p.ID,
-		Name:           p.Name,
-		PublicID:       p.PublicID,
-		ArchivedAt:     p.ArchivedAt,
-		Status:         p.Status,
-		OrganizationID: p.OrganizationID,
-		CreatedAt:      p.CreatedAt,
-		UpdatedAt:      p.UpdatedAt,
-	}
-}
-
-func NewSchemaProjectMember(p *project.ProjectMember) *ProjectMember {
-	return &ProjectMember{
-		BaseModel: BaseModel{
-			ID: p.ID,
-		},
-		UserID:    p.UserID,
-		ProjectID: p.ProjectID,
-		Role:      p.Role,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/response.go b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/response.go
deleted file mode 100644
index 4907df3e..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/response.go
+++ /dev/null
@@ -1,141 +0,0 @@
-package dbschema
-
-import (
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/domain/response"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-)
-
-// Response represents the response table in the database
-type Response struct {
-	BaseModel
-	PublicID           string  `gorm:"size:255;not null;uniqueIndex"`
-	UserID             uint    `gorm:"not null;index"`
-	ConversationID     *uint   `gorm:"index"`
-	PreviousResponseID *string `gorm:"size:255;index"`
-	Model              string  `gorm:"size:255;not null;index"`
-	Status             string  `gorm:"size:50;not null;default:'pending';index"`
-	Input              string  `gorm:"type:text;not null"`
-	Output             *string `gorm:"type:text"`
-	SystemPrompt       *string `gorm:"type:text"`
-	MaxTokens          *int
-	Temperature        *float64
-	TopP               *float64
-	TopK               *int
-	RepetitionPenalty  *float64
-	Seed               *int
-	Stop               *string `gorm:"type:text"`
-	PresencePenalty    *float64
-	FrequencyPenalty   *float64
-	LogitBias          *string `gorm:"type:text"`
-	ResponseFormat     *string `gorm:"type:text"`
-	Tools              *string `gorm:"type:text"`
-	ToolChoice         *string `gorm:"type:text"`
-	Metadata           *string `gorm:"type:text"`
-	Stream             *bool
-	Background         *bool
-	Timeout            *int
-	User               *string `gorm:"size:255"`
-	Usage              *string `gorm:"type:text"`
-	Error              *string `gorm:"type:text"`
-	CompletedAt        *time.Time
-	CancelledAt        *time.Time
-	FailedAt           *time.Time
-
-	// Relationships
-	UserEntity   User          `gorm:"foreignKey:UserID;references:ID"`
-	Conversation *Conversation `gorm:"foreignKey:ConversationID;references:ID"`
-	Items        []Item        `gorm:"foreignKey:ResponseID;references:ID"`
-}
-
-// TableName returns the table name for the Response model
-func (Response) TableName() string {
-	return "responses"
-}
-
-func init() {
-	database.RegisterSchemaForAutoMigrate(Response{})
-}
-
-// NewSchemaResponse converts domain Response to database Response
-func NewSchemaResponse(r *response.Response) *Response {
-	return &Response{
-		BaseModel: BaseModel{
-			ID: r.ID,
-		},
-		PublicID:           r.PublicID,
-		UserID:             r.UserID,
-		ConversationID:     r.ConversationID,
-		PreviousResponseID: r.PreviousResponseID,
-		Model:              r.Model,
-		Status:             string(r.Status),
-		Input:              r.Input,
-		Output:             r.Output,
-		SystemPrompt:       r.SystemPrompt,
-		MaxTokens:          r.MaxTokens,
-		Temperature:        r.Temperature,
-		TopP:               r.TopP,
-		TopK:               r.TopK,
-		RepetitionPenalty:  r.RepetitionPenalty,
-		Seed:               r.Seed,
-		Stop:               r.Stop,
-		PresencePenalty:    r.PresencePenalty,
-		FrequencyPenalty:   r.FrequencyPenalty,
-		LogitBias:          r.LogitBias,
-		ResponseFormat:     r.ResponseFormat,
-		Tools:              r.Tools,
-		ToolChoice:         r.ToolChoice,
-		Metadata:           r.Metadata,
-		Stream:             r.Stream,
-		Background:         r.Background,
-		Timeout:            r.Timeout,
-		User:               r.User,
-		Usage:              r.Usage,
-		Error:              r.Error,
-		CompletedAt:        r.CompletedAt,
-		CancelledAt:        r.CancelledAt,
-		FailedAt:           r.FailedAt,
-	}
-}
-
-// EtoD converts database Response to domain Response
-func (r *Response) EtoD() *response.Response {
-	return &response.Response{
-		ID:                 r.ID,
-		PublicID:           r.PublicID,
-		UserID:             r.UserID,
-		ConversationID:     r.ConversationID,
-		PreviousResponseID: r.PreviousResponseID,
-		Model:              r.Model,
-		Status:             response.ResponseStatus(r.Status),
-		Input:              r.Input,
-		Output:             r.Output,
-		SystemPrompt:       r.SystemPrompt,
-		MaxTokens:          r.MaxTokens,
-		Temperature:        r.Temperature,
-		TopP:               r.TopP,
-		TopK:               r.TopK,
-		RepetitionPenalty:  r.RepetitionPenalty,
-		Seed:               r.Seed,
-		Stop:               r.Stop,
-		PresencePenalty:    r.PresencePenalty,
-		FrequencyPenalty:   r.FrequencyPenalty,
-		LogitBias:          r.LogitBias,
-		ResponseFormat:     r.ResponseFormat,
-		Tools:              r.Tools,
-		ToolChoice:         r.ToolChoice,
-		Metadata:           r.Metadata,
-		Stream:             r.Stream,
-		Background:         r.Background,
-		Timeout:            r.Timeout,
-		User:               r.User,
-		Usage:              r.Usage,
-		Error:              r.Error,
-		CompletedAt:        r.CompletedAt,
-		CancelledAt:        r.CancelledAt,
-		FailedAt:           r.FailedAt,
-		CreatedAt:          r.CreatedAt,
-		UpdatedAt:          r.UpdatedAt,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/user.go b/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/user.go
deleted file mode 100644
index 50c40708..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/user.go
+++ /dev/null
@@ -1,46 +0,0 @@
-package dbschema
-
-import (
-	"menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-)
-
-func init() {
-	database.RegisterSchemaForAutoMigrate(User{})
-}
-
-type User struct {
-	BaseModel
-	Name          string `gorm:"type:varchar(100);not null"`
-	Email         string `gorm:"type:varchar(255);uniqueIndex;not null"`
-	PublicID      string `gorm:"type:varchar(50);uniqueIndex;not null"`
-	Enabled       bool
-	Organizations []OrganizationMember `gorm:"foreignKey:UserID"`
-	Projects      []ProjectMember      `gorm:"foreignKey:UserID"`
-	IsGuest       bool
-}
-
-func NewSchemaUser(u *user.User) *User {
-	return &User{
-		BaseModel: BaseModel{
-			ID: u.ID,
-		},
-		Name:     u.Name,
-		Email:    u.Email,
-		Enabled:  u.Enabled,
-		PublicID: u.PublicID,
-		IsGuest:  u.IsGuest,
-	}
-}
-
-func (u *User) EtoD() *user.User {
-	return &user.User{
-		ID:        u.ID,
-		Name:      u.Name,
-		Email:     u.Email,
-		Enabled:   u.Enabled,
-		PublicID:  u.PublicID,
-		CreatedAt: u.CreatedAt,
-		IsGuest:   u.IsGuest,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/api_keys.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/api_keys.gen.go
deleted file mode 100644
index 542b09f9..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/api_keys.gen.go
+++ /dev/null
@@ -1,443 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package gormgen
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-)
-
-func newApiKey(db *gorm.DB, opts ...gen.DOOption) apiKey {
-	_apiKey := apiKey{}
-
-	_apiKey.apiKeyDo.UseDB(db, opts...)
-	_apiKey.apiKeyDo.UseModel(&dbschema.ApiKey{})
-
-	tableName := _apiKey.apiKeyDo.TableName()
-	_apiKey.ALL = field.NewAsterisk(tableName)
-	_apiKey.ID = field.NewUint(tableName, "id")
-	_apiKey.CreatedAt = field.NewTime(tableName, "created_at")
-	_apiKey.UpdatedAt = field.NewTime(tableName, "updated_at")
-	_apiKey.DeletedAt = field.NewField(tableName, "deleted_at")
-	_apiKey.PublicID = field.NewString(tableName, "public_id")
-	_apiKey.KeyHash = field.NewString(tableName, "key_hash")
-	_apiKey.PlaintextHint = field.NewString(tableName, "plaintext_hint")
-	_apiKey.Description = field.NewString(tableName, "description")
-	_apiKey.Enabled = field.NewBool(tableName, "enabled")
-	_apiKey.ApikeyType = field.NewString(tableName, "apikey_type")
-	_apiKey.OwnerPublicID = field.NewString(tableName, "owner_public_id")
-	_apiKey.OrganizationID = field.NewUint(tableName, "organization_id")
-	_apiKey.ProjectID = field.NewUint(tableName, "project_id")
-	_apiKey.Permissions = field.NewString(tableName, "permissions")
-	_apiKey.ExpiresAt = field.NewTime(tableName, "expires_at")
-	_apiKey.LastUsedAt = field.NewTime(tableName, "last_used_at")
-
-	_apiKey.fillFieldMap()
-
-	return _apiKey
-}
-
-type apiKey struct {
-	apiKeyDo
-
-	ALL            field.Asterisk
-	ID             field.Uint
-	CreatedAt      field.Time
-	UpdatedAt      field.Time
-	DeletedAt      field.Field
-	PublicID       field.String
-	KeyHash        field.String
-	PlaintextHint  field.String
-	Description    field.String
-	Enabled        field.Bool
-	ApikeyType     field.String
-	OwnerPublicID  field.String
-	OrganizationID field.Uint
-	ProjectID      field.Uint
-	Permissions    field.String
-	ExpiresAt      field.Time
-	LastUsedAt     field.Time
-
-	fieldMap map[string]field.Expr
-}
-
-func (a apiKey) Table(newTableName string) *apiKey {
-	a.apiKeyDo.UseTable(newTableName)
-	return a.updateTableName(newTableName)
-}
-
-func (a apiKey) As(alias string) *apiKey {
-	a.apiKeyDo.DO = *(a.apiKeyDo.As(alias).(*gen.DO))
-	return a.updateTableName(alias)
-}
-
-func (a *apiKey) updateTableName(table string) *apiKey {
-	a.ALL = field.NewAsterisk(table)
-	a.ID = field.NewUint(table, "id")
-	a.CreatedAt = field.NewTime(table, "created_at")
-	a.UpdatedAt = field.NewTime(table, "updated_at")
-	a.DeletedAt = field.NewField(table, "deleted_at")
-	a.PublicID = field.NewString(table, "public_id")
-	a.KeyHash = field.NewString(table, "key_hash")
-	a.PlaintextHint = field.NewString(table, "plaintext_hint")
-	a.Description = field.NewString(table, "description")
-	a.Enabled = field.NewBool(table, "enabled")
-	a.ApikeyType = field.NewString(table, "apikey_type")
-	a.OwnerPublicID = field.NewString(table, "owner_public_id")
-	a.OrganizationID = field.NewUint(table, "organization_id")
-	a.ProjectID = field.NewUint(table, "project_id")
-	a.Permissions = field.NewString(table, "permissions")
-	a.ExpiresAt = field.NewTime(table, "expires_at")
-	a.LastUsedAt = field.NewTime(table, "last_used_at")
-
-	a.fillFieldMap()
-
-	return a
-}
-
-func (a *apiKey) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := a.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (a *apiKey) fillFieldMap() {
-	a.fieldMap = make(map[string]field.Expr, 16)
-	a.fieldMap["id"] = a.ID
-	a.fieldMap["created_at"] = a.CreatedAt
-	a.fieldMap["updated_at"] = a.UpdatedAt
-	a.fieldMap["deleted_at"] = a.DeletedAt
-	a.fieldMap["public_id"] = a.PublicID
-	a.fieldMap["key_hash"] = a.KeyHash
-	a.fieldMap["plaintext_hint"] = a.PlaintextHint
-	a.fieldMap["description"] = a.Description
-	a.fieldMap["enabled"] = a.Enabled
-	a.fieldMap["apikey_type"] = a.ApikeyType
-	a.fieldMap["owner_public_id"] = a.OwnerPublicID
-	a.fieldMap["organization_id"] = a.OrganizationID
-	a.fieldMap["project_id"] = a.ProjectID
-	a.fieldMap["permissions"] = a.Permissions
-	a.fieldMap["expires_at"] = a.ExpiresAt
-	a.fieldMap["last_used_at"] = a.LastUsedAt
-}
-
-func (a apiKey) clone(db *gorm.DB) apiKey {
-	a.apiKeyDo.ReplaceConnPool(db.Statement.ConnPool)
-	return a
-}
-
-func (a apiKey) replaceDB(db *gorm.DB) apiKey {
-	a.apiKeyDo.ReplaceDB(db)
-	return a
-}
-
-type apiKeyDo struct{ gen.DO }
-
-type IApiKeyDo interface {
-	gen.SubQuery
-	Debug() IApiKeyDo
-	WithContext(ctx context.Context) IApiKeyDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IApiKeyDo
-	WriteDB() IApiKeyDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IApiKeyDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IApiKeyDo
-	Not(conds ...gen.Condition) IApiKeyDo
-	Or(conds ...gen.Condition) IApiKeyDo
-	Select(conds ...field.Expr) IApiKeyDo
-	Where(conds ...gen.Condition) IApiKeyDo
-	Order(conds ...field.Expr) IApiKeyDo
-	Distinct(cols ...field.Expr) IApiKeyDo
-	Omit(cols ...field.Expr) IApiKeyDo
-	Join(table schema.Tabler, on ...field.Expr) IApiKeyDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IApiKeyDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IApiKeyDo
-	Group(cols ...field.Expr) IApiKeyDo
-	Having(conds ...gen.Condition) IApiKeyDo
-	Limit(limit int) IApiKeyDo
-	Offset(offset int) IApiKeyDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IApiKeyDo
-	Unscoped() IApiKeyDo
-	Create(values ...*dbschema.ApiKey) error
-	CreateInBatches(values []*dbschema.ApiKey, batchSize int) error
-	Save(values ...*dbschema.ApiKey) error
-	First() (*dbschema.ApiKey, error)
-	Take() (*dbschema.ApiKey, error)
-	Last() (*dbschema.ApiKey, error)
-	Find() ([]*dbschema.ApiKey, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ApiKey, err error)
-	FindInBatches(result *[]*dbschema.ApiKey, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*dbschema.ApiKey) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IApiKeyDo
-	Assign(attrs ...field.AssignExpr) IApiKeyDo
-	Joins(fields ...field.RelationField) IApiKeyDo
-	Preload(fields ...field.RelationField) IApiKeyDo
-	FirstOrInit() (*dbschema.ApiKey, error)
-	FirstOrCreate() (*dbschema.ApiKey, error)
-	FindByPage(offset int, limit int) (result []*dbschema.ApiKey, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Rows() (*sql.Rows, error)
-	Row() *sql.Row
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IApiKeyDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (a apiKeyDo) Debug() IApiKeyDo {
-	return a.withDO(a.DO.Debug())
-}
-
-func (a apiKeyDo) WithContext(ctx context.Context) IApiKeyDo {
-	return a.withDO(a.DO.WithContext(ctx))
-}
-
-func (a apiKeyDo) ReadDB() IApiKeyDo {
-	return a.Clauses(dbresolver.Read)
-}
-
-func (a apiKeyDo) WriteDB() IApiKeyDo {
-	return a.Clauses(dbresolver.Write)
-}
-
-func (a apiKeyDo) Session(config *gorm.Session) IApiKeyDo {
-	return a.withDO(a.DO.Session(config))
-}
-
-func (a apiKeyDo) Clauses(conds ...clause.Expression) IApiKeyDo {
-	return a.withDO(a.DO.Clauses(conds...))
-}
-
-func (a apiKeyDo) Returning(value interface{}, columns ...string) IApiKeyDo {
-	return a.withDO(a.DO.Returning(value, columns...))
-}
-
-func (a apiKeyDo) Not(conds ...gen.Condition) IApiKeyDo {
-	return a.withDO(a.DO.Not(conds...))
-}
-
-func (a apiKeyDo) Or(conds ...gen.Condition) IApiKeyDo {
-	return a.withDO(a.DO.Or(conds...))
-}
-
-func (a apiKeyDo) Select(conds ...field.Expr) IApiKeyDo {
-	return a.withDO(a.DO.Select(conds...))
-}
-
-func (a apiKeyDo) Where(conds ...gen.Condition) IApiKeyDo {
-	return a.withDO(a.DO.Where(conds...))
-}
-
-func (a apiKeyDo) Order(conds ...field.Expr) IApiKeyDo {
-	return a.withDO(a.DO.Order(conds...))
-}
-
-func (a apiKeyDo) Distinct(cols ...field.Expr) IApiKeyDo {
-	return a.withDO(a.DO.Distinct(cols...))
-}
-
-func (a apiKeyDo) Omit(cols ...field.Expr) IApiKeyDo {
-	return a.withDO(a.DO.Omit(cols...))
-}
-
-func (a apiKeyDo) Join(table schema.Tabler, on ...field.Expr) IApiKeyDo {
-	return a.withDO(a.DO.Join(table, on...))
-}
-
-func (a apiKeyDo) LeftJoin(table schema.Tabler, on ...field.Expr) IApiKeyDo {
-	return a.withDO(a.DO.LeftJoin(table, on...))
-}
-
-func (a apiKeyDo) RightJoin(table schema.Tabler, on ...field.Expr) IApiKeyDo {
-	return a.withDO(a.DO.RightJoin(table, on...))
-}
-
-func (a apiKeyDo) Group(cols ...field.Expr) IApiKeyDo {
-	return a.withDO(a.DO.Group(cols...))
-}
-
-func (a apiKeyDo) Having(conds ...gen.Condition) IApiKeyDo {
-	return a.withDO(a.DO.Having(conds...))
-}
-
-func (a apiKeyDo) Limit(limit int) IApiKeyDo {
-	return a.withDO(a.DO.Limit(limit))
-}
-
-func (a apiKeyDo) Offset(offset int) IApiKeyDo {
-	return a.withDO(a.DO.Offset(offset))
-}
-
-func (a apiKeyDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IApiKeyDo {
-	return a.withDO(a.DO.Scopes(funcs...))
-}
-
-func (a apiKeyDo) Unscoped() IApiKeyDo {
-	return a.withDO(a.DO.Unscoped())
-}
-
-func (a apiKeyDo) Create(values ...*dbschema.ApiKey) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return a.DO.Create(values)
-}
-
-func (a apiKeyDo) CreateInBatches(values []*dbschema.ApiKey, batchSize int) error {
-	return a.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (a apiKeyDo) Save(values ...*dbschema.ApiKey) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return a.DO.Save(values)
-}
-
-func (a apiKeyDo) First() (*dbschema.ApiKey, error) {
-	if result, err := a.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ApiKey), nil
-	}
-}
-
-func (a apiKeyDo) Take() (*dbschema.ApiKey, error) {
-	if result, err := a.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ApiKey), nil
-	}
-}
-
-func (a apiKeyDo) Last() (*dbschema.ApiKey, error) {
-	if result, err := a.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ApiKey), nil
-	}
-}
-
-func (a apiKeyDo) Find() ([]*dbschema.ApiKey, error) {
-	result, err := a.DO.Find()
-	return result.([]*dbschema.ApiKey), err
-}
-
-func (a apiKeyDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ApiKey, err error) {
-	buf := make([]*dbschema.ApiKey, 0, batchSize)
-	err = a.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (a apiKeyDo) FindInBatches(result *[]*dbschema.ApiKey, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return a.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (a apiKeyDo) Attrs(attrs ...field.AssignExpr) IApiKeyDo {
-	return a.withDO(a.DO.Attrs(attrs...))
-}
-
-func (a apiKeyDo) Assign(attrs ...field.AssignExpr) IApiKeyDo {
-	return a.withDO(a.DO.Assign(attrs...))
-}
-
-func (a apiKeyDo) Joins(fields ...field.RelationField) IApiKeyDo {
-	for _, _f := range fields {
-		a = *a.withDO(a.DO.Joins(_f))
-	}
-	return &a
-}
-
-func (a apiKeyDo) Preload(fields ...field.RelationField) IApiKeyDo {
-	for _, _f := range fields {
-		a = *a.withDO(a.DO.Preload(_f))
-	}
-	return &a
-}
-
-func (a apiKeyDo) FirstOrInit() (*dbschema.ApiKey, error) {
-	if result, err := a.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ApiKey), nil
-	}
-}
-
-func (a apiKeyDo) FirstOrCreate() (*dbschema.ApiKey, error) {
-	if result, err := a.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ApiKey), nil
-	}
-}
-
-func (a apiKeyDo) FindByPage(offset int, limit int) (result []*dbschema.ApiKey, count int64, err error) {
-	result, err = a.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = a.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (a apiKeyDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = a.Count()
-	if err != nil {
-		return
-	}
-
-	err = a.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (a apiKeyDo) Scan(result interface{}) (err error) {
-	return a.DO.Scan(result)
-}
-
-func (a apiKeyDo) Delete(models ...*dbschema.ApiKey) (result gen.ResultInfo, err error) {
-	return a.DO.Delete(models)
-}
-
-func (a *apiKeyDo) withDO(do gen.Dao) *apiKeyDo {
-	a.DO = *do.(*gen.DO)
-	return a
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/gen.go
deleted file mode 100644
index 7924d13c..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/gen.go
+++ /dev/null
@@ -1,175 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package gormgen
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-
-	"gorm.io/gen"
-
-	"gorm.io/plugin/dbresolver"
-)
-
-var (
-	Q                  = new(Query)
-	ApiKey             *apiKey
-	Conversation       *conversation
-	Invite             *invite
-	Item               *item
-	Organization       *organization
-	OrganizationMember *organizationMember
-	Project            *project
-	ProjectMember      *projectMember
-	Response           *response
-	User               *user
-)
-
-func SetDefault(db *gorm.DB, opts ...gen.DOOption) {
-	*Q = *Use(db, opts...)
-	ApiKey = &Q.ApiKey
-	Conversation = &Q.Conversation
-	Invite = &Q.Invite
-	Item = &Q.Item
-	Organization = &Q.Organization
-	OrganizationMember = &Q.OrganizationMember
-	Project = &Q.Project
-	ProjectMember = &Q.ProjectMember
-	Response = &Q.Response
-	User = &Q.User
-}
-
-func Use(db *gorm.DB, opts ...gen.DOOption) *Query {
-	return &Query{
-		db:                 db,
-		ApiKey:             newApiKey(db, opts...),
-		Conversation:       newConversation(db, opts...),
-		Invite:             newInvite(db, opts...),
-		Item:               newItem(db, opts...),
-		Organization:       newOrganization(db, opts...),
-		OrganizationMember: newOrganizationMember(db, opts...),
-		Project:            newProject(db, opts...),
-		ProjectMember:      newProjectMember(db, opts...),
-		Response:           newResponse(db, opts...),
-		User:               newUser(db, opts...),
-	}
-}
-
-type Query struct {
-	db *gorm.DB
-
-	ApiKey             apiKey
-	Conversation       conversation
-	Invite             invite
-	Item               item
-	Organization       organization
-	OrganizationMember organizationMember
-	Project            project
-	ProjectMember      projectMember
-	Response           response
-	User               user
-}
-
-func (q *Query) Available() bool { return q.db != nil }
-
-func (q *Query) clone(db *gorm.DB) *Query {
-	return &Query{
-		db:                 db,
-		ApiKey:             q.ApiKey.clone(db),
-		Conversation:       q.Conversation.clone(db),
-		Invite:             q.Invite.clone(db),
-		Item:               q.Item.clone(db),
-		Organization:       q.Organization.clone(db),
-		OrganizationMember: q.OrganizationMember.clone(db),
-		Project:            q.Project.clone(db),
-		ProjectMember:      q.ProjectMember.clone(db),
-		Response:           q.Response.clone(db),
-		User:               q.User.clone(db),
-	}
-}
-
-func (q *Query) ReadDB() *Query {
-	return q.ReplaceDB(q.db.Clauses(dbresolver.Read))
-}
-
-func (q *Query) WriteDB() *Query {
-	return q.ReplaceDB(q.db.Clauses(dbresolver.Write))
-}
-
-func (q *Query) ReplaceDB(db *gorm.DB) *Query {
-	return &Query{
-		db:                 db,
-		ApiKey:             q.ApiKey.replaceDB(db),
-		Conversation:       q.Conversation.replaceDB(db),
-		Invite:             q.Invite.replaceDB(db),
-		Item:               q.Item.replaceDB(db),
-		Organization:       q.Organization.replaceDB(db),
-		OrganizationMember: q.OrganizationMember.replaceDB(db),
-		Project:            q.Project.replaceDB(db),
-		ProjectMember:      q.ProjectMember.replaceDB(db),
-		Response:           q.Response.replaceDB(db),
-		User:               q.User.replaceDB(db),
-	}
-}
-
-type queryCtx struct {
-	ApiKey             IApiKeyDo
-	Conversation       IConversationDo
-	Invite             IInviteDo
-	Item               IItemDo
-	Organization       IOrganizationDo
-	OrganizationMember IOrganizationMemberDo
-	Project            IProjectDo
-	ProjectMember      IProjectMemberDo
-	Response           IResponseDo
-	User               IUserDo
-}
-
-func (q *Query) WithContext(ctx context.Context) *queryCtx {
-	return &queryCtx{
-		ApiKey:             q.ApiKey.WithContext(ctx),
-		Conversation:       q.Conversation.WithContext(ctx),
-		Invite:             q.Invite.WithContext(ctx),
-		Item:               q.Item.WithContext(ctx),
-		Organization:       q.Organization.WithContext(ctx),
-		OrganizationMember: q.OrganizationMember.WithContext(ctx),
-		Project:            q.Project.WithContext(ctx),
-		ProjectMember:      q.ProjectMember.WithContext(ctx),
-		Response:           q.Response.WithContext(ctx),
-		User:               q.User.WithContext(ctx),
-	}
-}
-
-func (q *Query) Transaction(fc func(tx *Query) error, opts ...*sql.TxOptions) error {
-	return q.db.Transaction(func(tx *gorm.DB) error { return fc(q.clone(tx)) }, opts...)
-}
-
-func (q *Query) Begin(opts ...*sql.TxOptions) *QueryTx {
-	tx := q.db.Begin(opts...)
-	return &QueryTx{Query: q.clone(tx), Error: tx.Error}
-}
-
-type QueryTx struct {
-	*Query
-	Error error
-}
-
-func (q *QueryTx) Commit() error {
-	return q.db.Commit().Error
-}
-
-func (q *QueryTx) Rollback() error {
-	return q.db.Rollback().Error
-}
-
-func (q *QueryTx) SavePoint(name string) error {
-	return q.db.SavePoint(name).Error
-}
-
-func (q *QueryTx) RollbackTo(name string) error {
-	return q.db.RollbackTo(name).Error
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/invites.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/invites.gen.go
deleted file mode 100644
index 5dc00e17..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/invites.gen.go
+++ /dev/null
@@ -1,435 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package gormgen
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-)
-
-func newInvite(db *gorm.DB, opts ...gen.DOOption) invite {
-	_invite := invite{}
-
-	_invite.inviteDo.UseDB(db, opts...)
-	_invite.inviteDo.UseModel(&dbschema.Invite{})
-
-	tableName := _invite.inviteDo.TableName()
-	_invite.ALL = field.NewAsterisk(tableName)
-	_invite.ID = field.NewUint(tableName, "id")
-	_invite.CreatedAt = field.NewTime(tableName, "created_at")
-	_invite.UpdatedAt = field.NewTime(tableName, "updated_at")
-	_invite.DeletedAt = field.NewField(tableName, "deleted_at")
-	_invite.PublicID = field.NewString(tableName, "public_id")
-	_invite.Email = field.NewString(tableName, "email")
-	_invite.Role = field.NewString(tableName, "role")
-	_invite.Status = field.NewString(tableName, "status")
-	_invite.InvitedAt = field.NewTime(tableName, "invited_at")
-	_invite.ExpiresAt = field.NewTime(tableName, "expires_at")
-	_invite.AcceptedAt = field.NewTime(tableName, "accepted_at")
-	_invite.Secrets = field.NewString(tableName, "secrets")
-	_invite.Projects = field.NewString(tableName, "projects")
-	_invite.OrganizationID = field.NewUint(tableName, "organization_id")
-
-	_invite.fillFieldMap()
-
-	return _invite
-}
-
-type invite struct {
-	inviteDo
-
-	ALL            field.Asterisk
-	ID             field.Uint
-	CreatedAt      field.Time
-	UpdatedAt      field.Time
-	DeletedAt      field.Field
-	PublicID       field.String
-	Email          field.String
-	Role           field.String
-	Status         field.String
-	InvitedAt      field.Time
-	ExpiresAt      field.Time
-	AcceptedAt     field.Time
-	Secrets        field.String
-	Projects       field.String
-	OrganizationID field.Uint
-
-	fieldMap map[string]field.Expr
-}
-
-func (i invite) Table(newTableName string) *invite {
-	i.inviteDo.UseTable(newTableName)
-	return i.updateTableName(newTableName)
-}
-
-func (i invite) As(alias string) *invite {
-	i.inviteDo.DO = *(i.inviteDo.As(alias).(*gen.DO))
-	return i.updateTableName(alias)
-}
-
-func (i *invite) updateTableName(table string) *invite {
-	i.ALL = field.NewAsterisk(table)
-	i.ID = field.NewUint(table, "id")
-	i.CreatedAt = field.NewTime(table, "created_at")
-	i.UpdatedAt = field.NewTime(table, "updated_at")
-	i.DeletedAt = field.NewField(table, "deleted_at")
-	i.PublicID = field.NewString(table, "public_id")
-	i.Email = field.NewString(table, "email")
-	i.Role = field.NewString(table, "role")
-	i.Status = field.NewString(table, "status")
-	i.InvitedAt = field.NewTime(table, "invited_at")
-	i.ExpiresAt = field.NewTime(table, "expires_at")
-	i.AcceptedAt = field.NewTime(table, "accepted_at")
-	i.Secrets = field.NewString(table, "secrets")
-	i.Projects = field.NewString(table, "projects")
-	i.OrganizationID = field.NewUint(table, "organization_id")
-
-	i.fillFieldMap()
-
-	return i
-}
-
-func (i *invite) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := i.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (i *invite) fillFieldMap() {
-	i.fieldMap = make(map[string]field.Expr, 14)
-	i.fieldMap["id"] = i.ID
-	i.fieldMap["created_at"] = i.CreatedAt
-	i.fieldMap["updated_at"] = i.UpdatedAt
-	i.fieldMap["deleted_at"] = i.DeletedAt
-	i.fieldMap["public_id"] = i.PublicID
-	i.fieldMap["email"] = i.Email
-	i.fieldMap["role"] = i.Role
-	i.fieldMap["status"] = i.Status
-	i.fieldMap["invited_at"] = i.InvitedAt
-	i.fieldMap["expires_at"] = i.ExpiresAt
-	i.fieldMap["accepted_at"] = i.AcceptedAt
-	i.fieldMap["secrets"] = i.Secrets
-	i.fieldMap["projects"] = i.Projects
-	i.fieldMap["organization_id"] = i.OrganizationID
-}
-
-func (i invite) clone(db *gorm.DB) invite {
-	i.inviteDo.ReplaceConnPool(db.Statement.ConnPool)
-	return i
-}
-
-func (i invite) replaceDB(db *gorm.DB) invite {
-	i.inviteDo.ReplaceDB(db)
-	return i
-}
-
-type inviteDo struct{ gen.DO }
-
-type IInviteDo interface {
-	gen.SubQuery
-	Debug() IInviteDo
-	WithContext(ctx context.Context) IInviteDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IInviteDo
-	WriteDB() IInviteDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IInviteDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IInviteDo
-	Not(conds ...gen.Condition) IInviteDo
-	Or(conds ...gen.Condition) IInviteDo
-	Select(conds ...field.Expr) IInviteDo
-	Where(conds ...gen.Condition) IInviteDo
-	Order(conds ...field.Expr) IInviteDo
-	Distinct(cols ...field.Expr) IInviteDo
-	Omit(cols ...field.Expr) IInviteDo
-	Join(table schema.Tabler, on ...field.Expr) IInviteDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IInviteDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IInviteDo
-	Group(cols ...field.Expr) IInviteDo
-	Having(conds ...gen.Condition) IInviteDo
-	Limit(limit int) IInviteDo
-	Offset(offset int) IInviteDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IInviteDo
-	Unscoped() IInviteDo
-	Create(values ...*dbschema.Invite) error
-	CreateInBatches(values []*dbschema.Invite, batchSize int) error
-	Save(values ...*dbschema.Invite) error
-	First() (*dbschema.Invite, error)
-	Take() (*dbschema.Invite, error)
-	Last() (*dbschema.Invite, error)
-	Find() ([]*dbschema.Invite, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Invite, err error)
-	FindInBatches(result *[]*dbschema.Invite, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*dbschema.Invite) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IInviteDo
-	Assign(attrs ...field.AssignExpr) IInviteDo
-	Joins(fields ...field.RelationField) IInviteDo
-	Preload(fields ...field.RelationField) IInviteDo
-	FirstOrInit() (*dbschema.Invite, error)
-	FirstOrCreate() (*dbschema.Invite, error)
-	FindByPage(offset int, limit int) (result []*dbschema.Invite, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Rows() (*sql.Rows, error)
-	Row() *sql.Row
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IInviteDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (i inviteDo) Debug() IInviteDo {
-	return i.withDO(i.DO.Debug())
-}
-
-func (i inviteDo) WithContext(ctx context.Context) IInviteDo {
-	return i.withDO(i.DO.WithContext(ctx))
-}
-
-func (i inviteDo) ReadDB() IInviteDo {
-	return i.Clauses(dbresolver.Read)
-}
-
-func (i inviteDo) WriteDB() IInviteDo {
-	return i.Clauses(dbresolver.Write)
-}
-
-func (i inviteDo) Session(config *gorm.Session) IInviteDo {
-	return i.withDO(i.DO.Session(config))
-}
-
-func (i inviteDo) Clauses(conds ...clause.Expression) IInviteDo {
-	return i.withDO(i.DO.Clauses(conds...))
-}
-
-func (i inviteDo) Returning(value interface{}, columns ...string) IInviteDo {
-	return i.withDO(i.DO.Returning(value, columns...))
-}
-
-func (i inviteDo) Not(conds ...gen.Condition) IInviteDo {
-	return i.withDO(i.DO.Not(conds...))
-}
-
-func (i inviteDo) Or(conds ...gen.Condition) IInviteDo {
-	return i.withDO(i.DO.Or(conds...))
-}
-
-func (i inviteDo) Select(conds ...field.Expr) IInviteDo {
-	return i.withDO(i.DO.Select(conds...))
-}
-
-func (i inviteDo) Where(conds ...gen.Condition) IInviteDo {
-	return i.withDO(i.DO.Where(conds...))
-}
-
-func (i inviteDo) Order(conds ...field.Expr) IInviteDo {
-	return i.withDO(i.DO.Order(conds...))
-}
-
-func (i inviteDo) Distinct(cols ...field.Expr) IInviteDo {
-	return i.withDO(i.DO.Distinct(cols...))
-}
-
-func (i inviteDo) Omit(cols ...field.Expr) IInviteDo {
-	return i.withDO(i.DO.Omit(cols...))
-}
-
-func (i inviteDo) Join(table schema.Tabler, on ...field.Expr) IInviteDo {
-	return i.withDO(i.DO.Join(table, on...))
-}
-
-func (i inviteDo) LeftJoin(table schema.Tabler, on ...field.Expr) IInviteDo {
-	return i.withDO(i.DO.LeftJoin(table, on...))
-}
-
-func (i inviteDo) RightJoin(table schema.Tabler, on ...field.Expr) IInviteDo {
-	return i.withDO(i.DO.RightJoin(table, on...))
-}
-
-func (i inviteDo) Group(cols ...field.Expr) IInviteDo {
-	return i.withDO(i.DO.Group(cols...))
-}
-
-func (i inviteDo) Having(conds ...gen.Condition) IInviteDo {
-	return i.withDO(i.DO.Having(conds...))
-}
-
-func (i inviteDo) Limit(limit int) IInviteDo {
-	return i.withDO(i.DO.Limit(limit))
-}
-
-func (i inviteDo) Offset(offset int) IInviteDo {
-	return i.withDO(i.DO.Offset(offset))
-}
-
-func (i inviteDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IInviteDo {
-	return i.withDO(i.DO.Scopes(funcs...))
-}
-
-func (i inviteDo) Unscoped() IInviteDo {
-	return i.withDO(i.DO.Unscoped())
-}
-
-func (i inviteDo) Create(values ...*dbschema.Invite) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return i.DO.Create(values)
-}
-
-func (i inviteDo) CreateInBatches(values []*dbschema.Invite, batchSize int) error {
-	return i.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (i inviteDo) Save(values ...*dbschema.Invite) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return i.DO.Save(values)
-}
-
-func (i inviteDo) First() (*dbschema.Invite, error) {
-	if result, err := i.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Invite), nil
-	}
-}
-
-func (i inviteDo) Take() (*dbschema.Invite, error) {
-	if result, err := i.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Invite), nil
-	}
-}
-
-func (i inviteDo) Last() (*dbschema.Invite, error) {
-	if result, err := i.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Invite), nil
-	}
-}
-
-func (i inviteDo) Find() ([]*dbschema.Invite, error) {
-	result, err := i.DO.Find()
-	return result.([]*dbschema.Invite), err
-}
-
-func (i inviteDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Invite, err error) {
-	buf := make([]*dbschema.Invite, 0, batchSize)
-	err = i.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (i inviteDo) FindInBatches(result *[]*dbschema.Invite, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return i.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (i inviteDo) Attrs(attrs ...field.AssignExpr) IInviteDo {
-	return i.withDO(i.DO.Attrs(attrs...))
-}
-
-func (i inviteDo) Assign(attrs ...field.AssignExpr) IInviteDo {
-	return i.withDO(i.DO.Assign(attrs...))
-}
-
-func (i inviteDo) Joins(fields ...field.RelationField) IInviteDo {
-	for _, _f := range fields {
-		i = *i.withDO(i.DO.Joins(_f))
-	}
-	return &i
-}
-
-func (i inviteDo) Preload(fields ...field.RelationField) IInviteDo {
-	for _, _f := range fields {
-		i = *i.withDO(i.DO.Preload(_f))
-	}
-	return &i
-}
-
-func (i inviteDo) FirstOrInit() (*dbschema.Invite, error) {
-	if result, err := i.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Invite), nil
-	}
-}
-
-func (i inviteDo) FirstOrCreate() (*dbschema.Invite, error) {
-	if result, err := i.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Invite), nil
-	}
-}
-
-func (i inviteDo) FindByPage(offset int, limit int) (result []*dbschema.Invite, count int64, err error) {
-	result, err = i.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = i.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (i inviteDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = i.Count()
-	if err != nil {
-		return
-	}
-
-	err = i.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (i inviteDo) Scan(result interface{}) (err error) {
-	return i.DO.Scan(result)
-}
-
-func (i inviteDo) Delete(models ...*dbschema.Invite) (result gen.ResultInfo, err error) {
-	return i.DO.Delete(models)
-}
-
-func (i *inviteDo) withDO(do gen.Dao) *inviteDo {
-	i.DO = *do.(*gen.DO)
-	return i
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/items.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/items.gen.go
deleted file mode 100644
index 0f495980..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/items.gen.go
+++ /dev/null
@@ -1,721 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package gormgen
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-)
-
-func newItem(db *gorm.DB, opts ...gen.DOOption) item {
-	_item := item{}
-
-	_item.itemDo.UseDB(db, opts...)
-	_item.itemDo.UseModel(&dbschema.Item{})
-
-	tableName := _item.itemDo.TableName()
-	_item.ALL = field.NewAsterisk(tableName)
-	_item.ID = field.NewUint(tableName, "id")
-	_item.CreatedAt = field.NewTime(tableName, "created_at")
-	_item.UpdatedAt = field.NewTime(tableName, "updated_at")
-	_item.DeletedAt = field.NewField(tableName, "deleted_at")
-	_item.PublicID = field.NewString(tableName, "public_id")
-	_item.ConversationID = field.NewUint(tableName, "conversation_id")
-	_item.ResponseID = field.NewUint(tableName, "response_id")
-	_item.Type = field.NewString(tableName, "type")
-	_item.Role = field.NewString(tableName, "role")
-	_item.Content = field.NewString(tableName, "content")
-	_item.Status = field.NewString(tableName, "status")
-	_item.IncompleteAt = field.NewTime(tableName, "incomplete_at")
-	_item.IncompleteDetails = field.NewString(tableName, "incomplete_details")
-	_item.CompletedAt = field.NewTime(tableName, "completed_at")
-	_item.Conversation = itemBelongsToConversation{
-		db: db.Session(&gorm.Session{}),
-
-		RelationField: field.NewRelation("Conversation", "dbschema.Conversation"),
-		User: struct {
-			field.RelationField
-			Organizations struct {
-				field.RelationField
-			}
-			Projects struct {
-				field.RelationField
-			}
-		}{
-			RelationField: field.NewRelation("Conversation.User", "dbschema.User"),
-			Organizations: struct {
-				field.RelationField
-			}{
-				RelationField: field.NewRelation("Conversation.User.Organizations", "dbschema.OrganizationMember"),
-			},
-			Projects: struct {
-				field.RelationField
-			}{
-				RelationField: field.NewRelation("Conversation.User.Projects", "dbschema.ProjectMember"),
-			},
-		},
-		Items: struct {
-			field.RelationField
-			Conversation struct {
-				field.RelationField
-			}
-			Response struct {
-				field.RelationField
-				UserEntity struct {
-					field.RelationField
-				}
-				Conversation struct {
-					field.RelationField
-				}
-				Items struct {
-					field.RelationField
-				}
-			}
-		}{
-			RelationField: field.NewRelation("Conversation.Items", "dbschema.Item"),
-			Conversation: struct {
-				field.RelationField
-			}{
-				RelationField: field.NewRelation("Conversation.Items.Conversation", "dbschema.Conversation"),
-			},
-			Response: struct {
-				field.RelationField
-				UserEntity struct {
-					field.RelationField
-				}
-				Conversation struct {
-					field.RelationField
-				}
-				Items struct {
-					field.RelationField
-				}
-			}{
-				RelationField: field.NewRelation("Conversation.Items.Response", "dbschema.Response"),
-				UserEntity: struct {
-					field.RelationField
-				}{
-					RelationField: field.NewRelation("Conversation.Items.Response.UserEntity", "dbschema.User"),
-				},
-				Conversation: struct {
-					field.RelationField
-				}{
-					RelationField: field.NewRelation("Conversation.Items.Response.Conversation", "dbschema.Conversation"),
-				},
-				Items: struct {
-					field.RelationField
-				}{
-					RelationField: field.NewRelation("Conversation.Items.Response.Items", "dbschema.Item"),
-				},
-			},
-		},
-	}
-
-	_item.Response = itemBelongsToResponse{
-		db: db.Session(&gorm.Session{}),
-
-		RelationField: field.NewRelation("Response", "dbschema.Response"),
-	}
-
-	_item.fillFieldMap()
-
-	return _item
-}
-
-type item struct {
-	itemDo
-
-	ALL               field.Asterisk
-	ID                field.Uint
-	CreatedAt         field.Time
-	UpdatedAt         field.Time
-	DeletedAt         field.Field
-	PublicID          field.String
-	ConversationID    field.Uint
-	ResponseID        field.Uint
-	Type              field.String
-	Role              field.String
-	Content           field.String
-	Status            field.String
-	IncompleteAt      field.Time
-	IncompleteDetails field.String
-	CompletedAt       field.Time
-	Conversation      itemBelongsToConversation
-
-	Response itemBelongsToResponse
-
-	fieldMap map[string]field.Expr
-}
-
-func (i item) Table(newTableName string) *item {
-	i.itemDo.UseTable(newTableName)
-	return i.updateTableName(newTableName)
-}
-
-func (i item) As(alias string) *item {
-	i.itemDo.DO = *(i.itemDo.As(alias).(*gen.DO))
-	return i.updateTableName(alias)
-}
-
-func (i *item) updateTableName(table string) *item {
-	i.ALL = field.NewAsterisk(table)
-	i.ID = field.NewUint(table, "id")
-	i.CreatedAt = field.NewTime(table, "created_at")
-	i.UpdatedAt = field.NewTime(table, "updated_at")
-	i.DeletedAt = field.NewField(table, "deleted_at")
-	i.PublicID = field.NewString(table, "public_id")
-	i.ConversationID = field.NewUint(table, "conversation_id")
-	i.ResponseID = field.NewUint(table, "response_id")
-	i.Type = field.NewString(table, "type")
-	i.Role = field.NewString(table, "role")
-	i.Content = field.NewString(table, "content")
-	i.Status = field.NewString(table, "status")
-	i.IncompleteAt = field.NewTime(table, "incomplete_at")
-	i.IncompleteDetails = field.NewString(table, "incomplete_details")
-	i.CompletedAt = field.NewTime(table, "completed_at")
-
-	i.fillFieldMap()
-
-	return i
-}
-
-func (i *item) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := i.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (i *item) fillFieldMap() {
-	i.fieldMap = make(map[string]field.Expr, 16)
-	i.fieldMap["id"] = i.ID
-	i.fieldMap["created_at"] = i.CreatedAt
-	i.fieldMap["updated_at"] = i.UpdatedAt
-	i.fieldMap["deleted_at"] = i.DeletedAt
-	i.fieldMap["public_id"] = i.PublicID
-	i.fieldMap["conversation_id"] = i.ConversationID
-	i.fieldMap["response_id"] = i.ResponseID
-	i.fieldMap["type"] = i.Type
-	i.fieldMap["role"] = i.Role
-	i.fieldMap["content"] = i.Content
-	i.fieldMap["status"] = i.Status
-	i.fieldMap["incomplete_at"] = i.IncompleteAt
-	i.fieldMap["incomplete_details"] = i.IncompleteDetails
-	i.fieldMap["completed_at"] = i.CompletedAt
-
-}
-
-func (i item) clone(db *gorm.DB) item {
-	i.itemDo.ReplaceConnPool(db.Statement.ConnPool)
-	i.Conversation.db = db.Session(&gorm.Session{Initialized: true})
-	i.Conversation.db.Statement.ConnPool = db.Statement.ConnPool
-	i.Response.db = db.Session(&gorm.Session{Initialized: true})
-	i.Response.db.Statement.ConnPool = db.Statement.ConnPool
-	return i
-}
-
-func (i item) replaceDB(db *gorm.DB) item {
-	i.itemDo.ReplaceDB(db)
-	i.Conversation.db = db.Session(&gorm.Session{})
-	i.Response.db = db.Session(&gorm.Session{})
-	return i
-}
-
-type itemBelongsToConversation struct {
-	db *gorm.DB
-
-	field.RelationField
-
-	User struct {
-		field.RelationField
-		Organizations struct {
-			field.RelationField
-		}
-		Projects struct {
-			field.RelationField
-		}
-	}
-	Items struct {
-		field.RelationField
-		Conversation struct {
-			field.RelationField
-		}
-		Response struct {
-			field.RelationField
-			UserEntity struct {
-				field.RelationField
-			}
-			Conversation struct {
-				field.RelationField
-			}
-			Items struct {
-				field.RelationField
-			}
-		}
-	}
-}
-
-func (a itemBelongsToConversation) Where(conds ...field.Expr) *itemBelongsToConversation {
-	if len(conds) == 0 {
-		return &a
-	}
-
-	exprs := make([]clause.Expression, 0, len(conds))
-	for _, cond := range conds {
-		exprs = append(exprs, cond.BeCond().(clause.Expression))
-	}
-	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
-	return &a
-}
-
-func (a itemBelongsToConversation) WithContext(ctx context.Context) *itemBelongsToConversation {
-	a.db = a.db.WithContext(ctx)
-	return &a
-}
-
-func (a itemBelongsToConversation) Session(session *gorm.Session) *itemBelongsToConversation {
-	a.db = a.db.Session(session)
-	return &a
-}
-
-func (a itemBelongsToConversation) Model(m *dbschema.Item) *itemBelongsToConversationTx {
-	return &itemBelongsToConversationTx{a.db.Model(m).Association(a.Name())}
-}
-
-func (a itemBelongsToConversation) Unscoped() *itemBelongsToConversation {
-	a.db = a.db.Unscoped()
-	return &a
-}
-
-type itemBelongsToConversationTx struct{ tx *gorm.Association }
-
-func (a itemBelongsToConversationTx) Find() (result *dbschema.Conversation, err error) {
-	return result, a.tx.Find(&result)
-}
-
-func (a itemBelongsToConversationTx) Append(values ...*dbschema.Conversation) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Append(targetValues...)
-}
-
-func (a itemBelongsToConversationTx) Replace(values ...*dbschema.Conversation) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Replace(targetValues...)
-}
-
-func (a itemBelongsToConversationTx) Delete(values ...*dbschema.Conversation) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Delete(targetValues...)
-}
-
-func (a itemBelongsToConversationTx) Clear() error {
-	return a.tx.Clear()
-}
-
-func (a itemBelongsToConversationTx) Count() int64 {
-	return a.tx.Count()
-}
-
-func (a itemBelongsToConversationTx) Unscoped() *itemBelongsToConversationTx {
-	a.tx = a.tx.Unscoped()
-	return &a
-}
-
-type itemBelongsToResponse struct {
-	db *gorm.DB
-
-	field.RelationField
-}
-
-func (a itemBelongsToResponse) Where(conds ...field.Expr) *itemBelongsToResponse {
-	if len(conds) == 0 {
-		return &a
-	}
-
-	exprs := make([]clause.Expression, 0, len(conds))
-	for _, cond := range conds {
-		exprs = append(exprs, cond.BeCond().(clause.Expression))
-	}
-	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
-	return &a
-}
-
-func (a itemBelongsToResponse) WithContext(ctx context.Context) *itemBelongsToResponse {
-	a.db = a.db.WithContext(ctx)
-	return &a
-}
-
-func (a itemBelongsToResponse) Session(session *gorm.Session) *itemBelongsToResponse {
-	a.db = a.db.Session(session)
-	return &a
-}
-
-func (a itemBelongsToResponse) Model(m *dbschema.Item) *itemBelongsToResponseTx {
-	return &itemBelongsToResponseTx{a.db.Model(m).Association(a.Name())}
-}
-
-func (a itemBelongsToResponse) Unscoped() *itemBelongsToResponse {
-	a.db = a.db.Unscoped()
-	return &a
-}
-
-type itemBelongsToResponseTx struct{ tx *gorm.Association }
-
-func (a itemBelongsToResponseTx) Find() (result *dbschema.Response, err error) {
-	return result, a.tx.Find(&result)
-}
-
-func (a itemBelongsToResponseTx) Append(values ...*dbschema.Response) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Append(targetValues...)
-}
-
-func (a itemBelongsToResponseTx) Replace(values ...*dbschema.Response) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Replace(targetValues...)
-}
-
-func (a itemBelongsToResponseTx) Delete(values ...*dbschema.Response) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Delete(targetValues...)
-}
-
-func (a itemBelongsToResponseTx) Clear() error {
-	return a.tx.Clear()
-}
-
-func (a itemBelongsToResponseTx) Count() int64 {
-	return a.tx.Count()
-}
-
-func (a itemBelongsToResponseTx) Unscoped() *itemBelongsToResponseTx {
-	a.tx = a.tx.Unscoped()
-	return &a
-}
-
-type itemDo struct{ gen.DO }
-
-type IItemDo interface {
-	gen.SubQuery
-	Debug() IItemDo
-	WithContext(ctx context.Context) IItemDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IItemDo
-	WriteDB() IItemDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IItemDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IItemDo
-	Not(conds ...gen.Condition) IItemDo
-	Or(conds ...gen.Condition) IItemDo
-	Select(conds ...field.Expr) IItemDo
-	Where(conds ...gen.Condition) IItemDo
-	Order(conds ...field.Expr) IItemDo
-	Distinct(cols ...field.Expr) IItemDo
-	Omit(cols ...field.Expr) IItemDo
-	Join(table schema.Tabler, on ...field.Expr) IItemDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IItemDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IItemDo
-	Group(cols ...field.Expr) IItemDo
-	Having(conds ...gen.Condition) IItemDo
-	Limit(limit int) IItemDo
-	Offset(offset int) IItemDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IItemDo
-	Unscoped() IItemDo
-	Create(values ...*dbschema.Item) error
-	CreateInBatches(values []*dbschema.Item, batchSize int) error
-	Save(values ...*dbschema.Item) error
-	First() (*dbschema.Item, error)
-	Take() (*dbschema.Item, error)
-	Last() (*dbschema.Item, error)
-	Find() ([]*dbschema.Item, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Item, err error)
-	FindInBatches(result *[]*dbschema.Item, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*dbschema.Item) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IItemDo
-	Assign(attrs ...field.AssignExpr) IItemDo
-	Joins(fields ...field.RelationField) IItemDo
-	Preload(fields ...field.RelationField) IItemDo
-	FirstOrInit() (*dbschema.Item, error)
-	FirstOrCreate() (*dbschema.Item, error)
-	FindByPage(offset int, limit int) (result []*dbschema.Item, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Rows() (*sql.Rows, error)
-	Row() *sql.Row
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IItemDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (i itemDo) Debug() IItemDo {
-	return i.withDO(i.DO.Debug())
-}
-
-func (i itemDo) WithContext(ctx context.Context) IItemDo {
-	return i.withDO(i.DO.WithContext(ctx))
-}
-
-func (i itemDo) ReadDB() IItemDo {
-	return i.Clauses(dbresolver.Read)
-}
-
-func (i itemDo) WriteDB() IItemDo {
-	return i.Clauses(dbresolver.Write)
-}
-
-func (i itemDo) Session(config *gorm.Session) IItemDo {
-	return i.withDO(i.DO.Session(config))
-}
-
-func (i itemDo) Clauses(conds ...clause.Expression) IItemDo {
-	return i.withDO(i.DO.Clauses(conds...))
-}
-
-func (i itemDo) Returning(value interface{}, columns ...string) IItemDo {
-	return i.withDO(i.DO.Returning(value, columns...))
-}
-
-func (i itemDo) Not(conds ...gen.Condition) IItemDo {
-	return i.withDO(i.DO.Not(conds...))
-}
-
-func (i itemDo) Or(conds ...gen.Condition) IItemDo {
-	return i.withDO(i.DO.Or(conds...))
-}
-
-func (i itemDo) Select(conds ...field.Expr) IItemDo {
-	return i.withDO(i.DO.Select(conds...))
-}
-
-func (i itemDo) Where(conds ...gen.Condition) IItemDo {
-	return i.withDO(i.DO.Where(conds...))
-}
-
-func (i itemDo) Order(conds ...field.Expr) IItemDo {
-	return i.withDO(i.DO.Order(conds...))
-}
-
-func (i itemDo) Distinct(cols ...field.Expr) IItemDo {
-	return i.withDO(i.DO.Distinct(cols...))
-}
-
-func (i itemDo) Omit(cols ...field.Expr) IItemDo {
-	return i.withDO(i.DO.Omit(cols...))
-}
-
-func (i itemDo) Join(table schema.Tabler, on ...field.Expr) IItemDo {
-	return i.withDO(i.DO.Join(table, on...))
-}
-
-func (i itemDo) LeftJoin(table schema.Tabler, on ...field.Expr) IItemDo {
-	return i.withDO(i.DO.LeftJoin(table, on...))
-}
-
-func (i itemDo) RightJoin(table schema.Tabler, on ...field.Expr) IItemDo {
-	return i.withDO(i.DO.RightJoin(table, on...))
-}
-
-func (i itemDo) Group(cols ...field.Expr) IItemDo {
-	return i.withDO(i.DO.Group(cols...))
-}
-
-func (i itemDo) Having(conds ...gen.Condition) IItemDo {
-	return i.withDO(i.DO.Having(conds...))
-}
-
-func (i itemDo) Limit(limit int) IItemDo {
-	return i.withDO(i.DO.Limit(limit))
-}
-
-func (i itemDo) Offset(offset int) IItemDo {
-	return i.withDO(i.DO.Offset(offset))
-}
-
-func (i itemDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IItemDo {
-	return i.withDO(i.DO.Scopes(funcs...))
-}
-
-func (i itemDo) Unscoped() IItemDo {
-	return i.withDO(i.DO.Unscoped())
-}
-
-func (i itemDo) Create(values ...*dbschema.Item) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return i.DO.Create(values)
-}
-
-func (i itemDo) CreateInBatches(values []*dbschema.Item, batchSize int) error {
-	return i.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (i itemDo) Save(values ...*dbschema.Item) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return i.DO.Save(values)
-}
-
-func (i itemDo) First() (*dbschema.Item, error) {
-	if result, err := i.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Item), nil
-	}
-}
-
-func (i itemDo) Take() (*dbschema.Item, error) {
-	if result, err := i.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Item), nil
-	}
-}
-
-func (i itemDo) Last() (*dbschema.Item, error) {
-	if result, err := i.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Item), nil
-	}
-}
-
-func (i itemDo) Find() ([]*dbschema.Item, error) {
-	result, err := i.DO.Find()
-	return result.([]*dbschema.Item), err
-}
-
-func (i itemDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Item, err error) {
-	buf := make([]*dbschema.Item, 0, batchSize)
-	err = i.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (i itemDo) FindInBatches(result *[]*dbschema.Item, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return i.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (i itemDo) Attrs(attrs ...field.AssignExpr) IItemDo {
-	return i.withDO(i.DO.Attrs(attrs...))
-}
-
-func (i itemDo) Assign(attrs ...field.AssignExpr) IItemDo {
-	return i.withDO(i.DO.Assign(attrs...))
-}
-
-func (i itemDo) Joins(fields ...field.RelationField) IItemDo {
-	for _, _f := range fields {
-		i = *i.withDO(i.DO.Joins(_f))
-	}
-	return &i
-}
-
-func (i itemDo) Preload(fields ...field.RelationField) IItemDo {
-	for _, _f := range fields {
-		i = *i.withDO(i.DO.Preload(_f))
-	}
-	return &i
-}
-
-func (i itemDo) FirstOrInit() (*dbschema.Item, error) {
-	if result, err := i.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Item), nil
-	}
-}
-
-func (i itemDo) FirstOrCreate() (*dbschema.Item, error) {
-	if result, err := i.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Item), nil
-	}
-}
-
-func (i itemDo) FindByPage(offset int, limit int) (result []*dbschema.Item, count int64, err error) {
-	result, err = i.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = i.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (i itemDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = i.Count()
-	if err != nil {
-		return
-	}
-
-	err = i.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (i itemDo) Scan(result interface{}) (err error) {
-	return i.DO.Scan(result)
-}
-
-func (i itemDo) Delete(models ...*dbschema.Item) (result gen.ResultInfo, err error) {
-	return i.DO.Delete(models)
-}
-
-func (i *itemDo) withDO(do gen.Dao) *itemDo {
-	i.DO = *do.(*gen.DO)
-	return i
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/organization_members.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/organization_members.gen.go
deleted file mode 100644
index 9566487c..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/organization_members.gen.go
+++ /dev/null
@@ -1,407 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package gormgen
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-)
-
-func newOrganizationMember(db *gorm.DB, opts ...gen.DOOption) organizationMember {
-	_organizationMember := organizationMember{}
-
-	_organizationMember.organizationMemberDo.UseDB(db, opts...)
-	_organizationMember.organizationMemberDo.UseModel(&dbschema.OrganizationMember{})
-
-	tableName := _organizationMember.organizationMemberDo.TableName()
-	_organizationMember.ALL = field.NewAsterisk(tableName)
-	_organizationMember.ID = field.NewUint(tableName, "id")
-	_organizationMember.CreatedAt = field.NewTime(tableName, "created_at")
-	_organizationMember.UpdatedAt = field.NewTime(tableName, "updated_at")
-	_organizationMember.DeletedAt = field.NewField(tableName, "deleted_at")
-	_organizationMember.UserID = field.NewUint(tableName, "user_id")
-	_organizationMember.OrganizationID = field.NewUint(tableName, "organization_id")
-	_organizationMember.Role = field.NewString(tableName, "role")
-
-	_organizationMember.fillFieldMap()
-
-	return _organizationMember
-}
-
-type organizationMember struct {
-	organizationMemberDo
-
-	ALL            field.Asterisk
-	ID             field.Uint
-	CreatedAt      field.Time
-	UpdatedAt      field.Time
-	DeletedAt      field.Field
-	UserID         field.Uint
-	OrganizationID field.Uint
-	Role           field.String
-
-	fieldMap map[string]field.Expr
-}
-
-func (o organizationMember) Table(newTableName string) *organizationMember {
-	o.organizationMemberDo.UseTable(newTableName)
-	return o.updateTableName(newTableName)
-}
-
-func (o organizationMember) As(alias string) *organizationMember {
-	o.organizationMemberDo.DO = *(o.organizationMemberDo.As(alias).(*gen.DO))
-	return o.updateTableName(alias)
-}
-
-func (o *organizationMember) updateTableName(table string) *organizationMember {
-	o.ALL = field.NewAsterisk(table)
-	o.ID = field.NewUint(table, "id")
-	o.CreatedAt = field.NewTime(table, "created_at")
-	o.UpdatedAt = field.NewTime(table, "updated_at")
-	o.DeletedAt = field.NewField(table, "deleted_at")
-	o.UserID = field.NewUint(table, "user_id")
-	o.OrganizationID = field.NewUint(table, "organization_id")
-	o.Role = field.NewString(table, "role")
-
-	o.fillFieldMap()
-
-	return o
-}
-
-func (o *organizationMember) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := o.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (o *organizationMember) fillFieldMap() {
-	o.fieldMap = make(map[string]field.Expr, 7)
-	o.fieldMap["id"] = o.ID
-	o.fieldMap["created_at"] = o.CreatedAt
-	o.fieldMap["updated_at"] = o.UpdatedAt
-	o.fieldMap["deleted_at"] = o.DeletedAt
-	o.fieldMap["user_id"] = o.UserID
-	o.fieldMap["organization_id"] = o.OrganizationID
-	o.fieldMap["role"] = o.Role
-}
-
-func (o organizationMember) clone(db *gorm.DB) organizationMember {
-	o.organizationMemberDo.ReplaceConnPool(db.Statement.ConnPool)
-	return o
-}
-
-func (o organizationMember) replaceDB(db *gorm.DB) organizationMember {
-	o.organizationMemberDo.ReplaceDB(db)
-	return o
-}
-
-type organizationMemberDo struct{ gen.DO }
-
-type IOrganizationMemberDo interface {
-	gen.SubQuery
-	Debug() IOrganizationMemberDo
-	WithContext(ctx context.Context) IOrganizationMemberDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IOrganizationMemberDo
-	WriteDB() IOrganizationMemberDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IOrganizationMemberDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IOrganizationMemberDo
-	Not(conds ...gen.Condition) IOrganizationMemberDo
-	Or(conds ...gen.Condition) IOrganizationMemberDo
-	Select(conds ...field.Expr) IOrganizationMemberDo
-	Where(conds ...gen.Condition) IOrganizationMemberDo
-	Order(conds ...field.Expr) IOrganizationMemberDo
-	Distinct(cols ...field.Expr) IOrganizationMemberDo
-	Omit(cols ...field.Expr) IOrganizationMemberDo
-	Join(table schema.Tabler, on ...field.Expr) IOrganizationMemberDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IOrganizationMemberDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IOrganizationMemberDo
-	Group(cols ...field.Expr) IOrganizationMemberDo
-	Having(conds ...gen.Condition) IOrganizationMemberDo
-	Limit(limit int) IOrganizationMemberDo
-	Offset(offset int) IOrganizationMemberDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IOrganizationMemberDo
-	Unscoped() IOrganizationMemberDo
-	Create(values ...*dbschema.OrganizationMember) error
-	CreateInBatches(values []*dbschema.OrganizationMember, batchSize int) error
-	Save(values ...*dbschema.OrganizationMember) error
-	First() (*dbschema.OrganizationMember, error)
-	Take() (*dbschema.OrganizationMember, error)
-	Last() (*dbschema.OrganizationMember, error)
-	Find() ([]*dbschema.OrganizationMember, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.OrganizationMember, err error)
-	FindInBatches(result *[]*dbschema.OrganizationMember, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*dbschema.OrganizationMember) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IOrganizationMemberDo
-	Assign(attrs ...field.AssignExpr) IOrganizationMemberDo
-	Joins(fields ...field.RelationField) IOrganizationMemberDo
-	Preload(fields ...field.RelationField) IOrganizationMemberDo
-	FirstOrInit() (*dbschema.OrganizationMember, error)
-	FirstOrCreate() (*dbschema.OrganizationMember, error)
-	FindByPage(offset int, limit int) (result []*dbschema.OrganizationMember, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Rows() (*sql.Rows, error)
-	Row() *sql.Row
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IOrganizationMemberDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (o organizationMemberDo) Debug() IOrganizationMemberDo {
-	return o.withDO(o.DO.Debug())
-}
-
-func (o organizationMemberDo) WithContext(ctx context.Context) IOrganizationMemberDo {
-	return o.withDO(o.DO.WithContext(ctx))
-}
-
-func (o organizationMemberDo) ReadDB() IOrganizationMemberDo {
-	return o.Clauses(dbresolver.Read)
-}
-
-func (o organizationMemberDo) WriteDB() IOrganizationMemberDo {
-	return o.Clauses(dbresolver.Write)
-}
-
-func (o organizationMemberDo) Session(config *gorm.Session) IOrganizationMemberDo {
-	return o.withDO(o.DO.Session(config))
-}
-
-func (o organizationMemberDo) Clauses(conds ...clause.Expression) IOrganizationMemberDo {
-	return o.withDO(o.DO.Clauses(conds...))
-}
-
-func (o organizationMemberDo) Returning(value interface{}, columns ...string) IOrganizationMemberDo {
-	return o.withDO(o.DO.Returning(value, columns...))
-}
-
-func (o organizationMemberDo) Not(conds ...gen.Condition) IOrganizationMemberDo {
-	return o.withDO(o.DO.Not(conds...))
-}
-
-func (o organizationMemberDo) Or(conds ...gen.Condition) IOrganizationMemberDo {
-	return o.withDO(o.DO.Or(conds...))
-}
-
-func (o organizationMemberDo) Select(conds ...field.Expr) IOrganizationMemberDo {
-	return o.withDO(o.DO.Select(conds...))
-}
-
-func (o organizationMemberDo) Where(conds ...gen.Condition) IOrganizationMemberDo {
-	return o.withDO(o.DO.Where(conds...))
-}
-
-func (o organizationMemberDo) Order(conds ...field.Expr) IOrganizationMemberDo {
-	return o.withDO(o.DO.Order(conds...))
-}
-
-func (o organizationMemberDo) Distinct(cols ...field.Expr) IOrganizationMemberDo {
-	return o.withDO(o.DO.Distinct(cols...))
-}
-
-func (o organizationMemberDo) Omit(cols ...field.Expr) IOrganizationMemberDo {
-	return o.withDO(o.DO.Omit(cols...))
-}
-
-func (o organizationMemberDo) Join(table schema.Tabler, on ...field.Expr) IOrganizationMemberDo {
-	return o.withDO(o.DO.Join(table, on...))
-}
-
-func (o organizationMemberDo) LeftJoin(table schema.Tabler, on ...field.Expr) IOrganizationMemberDo {
-	return o.withDO(o.DO.LeftJoin(table, on...))
-}
-
-func (o organizationMemberDo) RightJoin(table schema.Tabler, on ...field.Expr) IOrganizationMemberDo {
-	return o.withDO(o.DO.RightJoin(table, on...))
-}
-
-func (o organizationMemberDo) Group(cols ...field.Expr) IOrganizationMemberDo {
-	return o.withDO(o.DO.Group(cols...))
-}
-
-func (o organizationMemberDo) Having(conds ...gen.Condition) IOrganizationMemberDo {
-	return o.withDO(o.DO.Having(conds...))
-}
-
-func (o organizationMemberDo) Limit(limit int) IOrganizationMemberDo {
-	return o.withDO(o.DO.Limit(limit))
-}
-
-func (o organizationMemberDo) Offset(offset int) IOrganizationMemberDo {
-	return o.withDO(o.DO.Offset(offset))
-}
-
-func (o organizationMemberDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IOrganizationMemberDo {
-	return o.withDO(o.DO.Scopes(funcs...))
-}
-
-func (o organizationMemberDo) Unscoped() IOrganizationMemberDo {
-	return o.withDO(o.DO.Unscoped())
-}
-
-func (o organizationMemberDo) Create(values ...*dbschema.OrganizationMember) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return o.DO.Create(values)
-}
-
-func (o organizationMemberDo) CreateInBatches(values []*dbschema.OrganizationMember, batchSize int) error {
-	return o.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (o organizationMemberDo) Save(values ...*dbschema.OrganizationMember) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return o.DO.Save(values)
-}
-
-func (o organizationMemberDo) First() (*dbschema.OrganizationMember, error) {
-	if result, err := o.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.OrganizationMember), nil
-	}
-}
-
-func (o organizationMemberDo) Take() (*dbschema.OrganizationMember, error) {
-	if result, err := o.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.OrganizationMember), nil
-	}
-}
-
-func (o organizationMemberDo) Last() (*dbschema.OrganizationMember, error) {
-	if result, err := o.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.OrganizationMember), nil
-	}
-}
-
-func (o organizationMemberDo) Find() ([]*dbschema.OrganizationMember, error) {
-	result, err := o.DO.Find()
-	return result.([]*dbschema.OrganizationMember), err
-}
-
-func (o organizationMemberDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.OrganizationMember, err error) {
-	buf := make([]*dbschema.OrganizationMember, 0, batchSize)
-	err = o.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (o organizationMemberDo) FindInBatches(result *[]*dbschema.OrganizationMember, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return o.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (o organizationMemberDo) Attrs(attrs ...field.AssignExpr) IOrganizationMemberDo {
-	return o.withDO(o.DO.Attrs(attrs...))
-}
-
-func (o organizationMemberDo) Assign(attrs ...field.AssignExpr) IOrganizationMemberDo {
-	return o.withDO(o.DO.Assign(attrs...))
-}
-
-func (o organizationMemberDo) Joins(fields ...field.RelationField) IOrganizationMemberDo {
-	for _, _f := range fields {
-		o = *o.withDO(o.DO.Joins(_f))
-	}
-	return &o
-}
-
-func (o organizationMemberDo) Preload(fields ...field.RelationField) IOrganizationMemberDo {
-	for _, _f := range fields {
-		o = *o.withDO(o.DO.Preload(_f))
-	}
-	return &o
-}
-
-func (o organizationMemberDo) FirstOrInit() (*dbschema.OrganizationMember, error) {
-	if result, err := o.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.OrganizationMember), nil
-	}
-}
-
-func (o organizationMemberDo) FirstOrCreate() (*dbschema.OrganizationMember, error) {
-	if result, err := o.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.OrganizationMember), nil
-	}
-}
-
-func (o organizationMemberDo) FindByPage(offset int, limit int) (result []*dbschema.OrganizationMember, count int64, err error) {
-	result, err = o.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = o.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (o organizationMemberDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = o.Count()
-	if err != nil {
-		return
-	}
-
-	err = o.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (o organizationMemberDo) Scan(result interface{}) (err error) {
-	return o.DO.Scan(result)
-}
-
-func (o organizationMemberDo) Delete(models ...*dbschema.OrganizationMember) (result gen.ResultInfo, err error) {
-	return o.DO.Delete(models)
-}
-
-func (o *organizationMemberDo) withDO(do gen.Dao) *organizationMemberDo {
-	o.DO = *do.(*gen.DO)
-	return o
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/organizations.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/organizations.gen.go
deleted file mode 100644
index 2f66a35c..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/organizations.gen.go
+++ /dev/null
@@ -1,498 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package gormgen
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-)
-
-func newOrganization(db *gorm.DB, opts ...gen.DOOption) organization {
-	_organization := organization{}
-
-	_organization.organizationDo.UseDB(db, opts...)
-	_organization.organizationDo.UseModel(&dbschema.Organization{})
-
-	tableName := _organization.organizationDo.TableName()
-	_organization.ALL = field.NewAsterisk(tableName)
-	_organization.ID = field.NewUint(tableName, "id")
-	_organization.CreatedAt = field.NewTime(tableName, "created_at")
-	_organization.UpdatedAt = field.NewTime(tableName, "updated_at")
-	_organization.DeletedAt = field.NewField(tableName, "deleted_at")
-	_organization.Name = field.NewString(tableName, "name")
-	_organization.PublicID = field.NewString(tableName, "public_id")
-	_organization.Enabled = field.NewBool(tableName, "enabled")
-	_organization.Members = organizationHasManyMembers{
-		db: db.Session(&gorm.Session{}),
-
-		RelationField: field.NewRelation("Members", "dbschema.OrganizationMember"),
-	}
-
-	_organization.fillFieldMap()
-
-	return _organization
-}
-
-type organization struct {
-	organizationDo
-
-	ALL       field.Asterisk
-	ID        field.Uint
-	CreatedAt field.Time
-	UpdatedAt field.Time
-	DeletedAt field.Field
-	Name      field.String
-	PublicID  field.String
-	Enabled   field.Bool
-	Members   organizationHasManyMembers
-
-	fieldMap map[string]field.Expr
-}
-
-func (o organization) Table(newTableName string) *organization {
-	o.organizationDo.UseTable(newTableName)
-	return o.updateTableName(newTableName)
-}
-
-func (o organization) As(alias string) *organization {
-	o.organizationDo.DO = *(o.organizationDo.As(alias).(*gen.DO))
-	return o.updateTableName(alias)
-}
-
-func (o *organization) updateTableName(table string) *organization {
-	o.ALL = field.NewAsterisk(table)
-	o.ID = field.NewUint(table, "id")
-	o.CreatedAt = field.NewTime(table, "created_at")
-	o.UpdatedAt = field.NewTime(table, "updated_at")
-	o.DeletedAt = field.NewField(table, "deleted_at")
-	o.Name = field.NewString(table, "name")
-	o.PublicID = field.NewString(table, "public_id")
-	o.Enabled = field.NewBool(table, "enabled")
-
-	o.fillFieldMap()
-
-	return o
-}
-
-func (o *organization) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := o.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (o *organization) fillFieldMap() {
-	o.fieldMap = make(map[string]field.Expr, 8)
-	o.fieldMap["id"] = o.ID
-	o.fieldMap["created_at"] = o.CreatedAt
-	o.fieldMap["updated_at"] = o.UpdatedAt
-	o.fieldMap["deleted_at"] = o.DeletedAt
-	o.fieldMap["name"] = o.Name
-	o.fieldMap["public_id"] = o.PublicID
-	o.fieldMap["enabled"] = o.Enabled
-
-}
-
-func (o organization) clone(db *gorm.DB) organization {
-	o.organizationDo.ReplaceConnPool(db.Statement.ConnPool)
-	o.Members.db = db.Session(&gorm.Session{Initialized: true})
-	o.Members.db.Statement.ConnPool = db.Statement.ConnPool
-	return o
-}
-
-func (o organization) replaceDB(db *gorm.DB) organization {
-	o.organizationDo.ReplaceDB(db)
-	o.Members.db = db.Session(&gorm.Session{})
-	return o
-}
-
-type organizationHasManyMembers struct {
-	db *gorm.DB
-
-	field.RelationField
-}
-
-func (a organizationHasManyMembers) Where(conds ...field.Expr) *organizationHasManyMembers {
-	if len(conds) == 0 {
-		return &a
-	}
-
-	exprs := make([]clause.Expression, 0, len(conds))
-	for _, cond := range conds {
-		exprs = append(exprs, cond.BeCond().(clause.Expression))
-	}
-	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
-	return &a
-}
-
-func (a organizationHasManyMembers) WithContext(ctx context.Context) *organizationHasManyMembers {
-	a.db = a.db.WithContext(ctx)
-	return &a
-}
-
-func (a organizationHasManyMembers) Session(session *gorm.Session) *organizationHasManyMembers {
-	a.db = a.db.Session(session)
-	return &a
-}
-
-func (a organizationHasManyMembers) Model(m *dbschema.Organization) *organizationHasManyMembersTx {
-	return &organizationHasManyMembersTx{a.db.Model(m).Association(a.Name())}
-}
-
-func (a organizationHasManyMembers) Unscoped() *organizationHasManyMembers {
-	a.db = a.db.Unscoped()
-	return &a
-}
-
-type organizationHasManyMembersTx struct{ tx *gorm.Association }
-
-func (a organizationHasManyMembersTx) Find() (result []*dbschema.OrganizationMember, err error) {
-	return result, a.tx.Find(&result)
-}
-
-func (a organizationHasManyMembersTx) Append(values ...*dbschema.OrganizationMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Append(targetValues...)
-}
-
-func (a organizationHasManyMembersTx) Replace(values ...*dbschema.OrganizationMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Replace(targetValues...)
-}
-
-func (a organizationHasManyMembersTx) Delete(values ...*dbschema.OrganizationMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Delete(targetValues...)
-}
-
-func (a organizationHasManyMembersTx) Clear() error {
-	return a.tx.Clear()
-}
-
-func (a organizationHasManyMembersTx) Count() int64 {
-	return a.tx.Count()
-}
-
-func (a organizationHasManyMembersTx) Unscoped() *organizationHasManyMembersTx {
-	a.tx = a.tx.Unscoped()
-	return &a
-}
-
-type organizationDo struct{ gen.DO }
-
-type IOrganizationDo interface {
-	gen.SubQuery
-	Debug() IOrganizationDo
-	WithContext(ctx context.Context) IOrganizationDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IOrganizationDo
-	WriteDB() IOrganizationDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IOrganizationDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IOrganizationDo
-	Not(conds ...gen.Condition) IOrganizationDo
-	Or(conds ...gen.Condition) IOrganizationDo
-	Select(conds ...field.Expr) IOrganizationDo
-	Where(conds ...gen.Condition) IOrganizationDo
-	Order(conds ...field.Expr) IOrganizationDo
-	Distinct(cols ...field.Expr) IOrganizationDo
-	Omit(cols ...field.Expr) IOrganizationDo
-	Join(table schema.Tabler, on ...field.Expr) IOrganizationDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IOrganizationDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IOrganizationDo
-	Group(cols ...field.Expr) IOrganizationDo
-	Having(conds ...gen.Condition) IOrganizationDo
-	Limit(limit int) IOrganizationDo
-	Offset(offset int) IOrganizationDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IOrganizationDo
-	Unscoped() IOrganizationDo
-	Create(values ...*dbschema.Organization) error
-	CreateInBatches(values []*dbschema.Organization, batchSize int) error
-	Save(values ...*dbschema.Organization) error
-	First() (*dbschema.Organization, error)
-	Take() (*dbschema.Organization, error)
-	Last() (*dbschema.Organization, error)
-	Find() ([]*dbschema.Organization, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Organization, err error)
-	FindInBatches(result *[]*dbschema.Organization, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*dbschema.Organization) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IOrganizationDo
-	Assign(attrs ...field.AssignExpr) IOrganizationDo
-	Joins(fields ...field.RelationField) IOrganizationDo
-	Preload(fields ...field.RelationField) IOrganizationDo
-	FirstOrInit() (*dbschema.Organization, error)
-	FirstOrCreate() (*dbschema.Organization, error)
-	FindByPage(offset int, limit int) (result []*dbschema.Organization, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Rows() (*sql.Rows, error)
-	Row() *sql.Row
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IOrganizationDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (o organizationDo) Debug() IOrganizationDo {
-	return o.withDO(o.DO.Debug())
-}
-
-func (o organizationDo) WithContext(ctx context.Context) IOrganizationDo {
-	return o.withDO(o.DO.WithContext(ctx))
-}
-
-func (o organizationDo) ReadDB() IOrganizationDo {
-	return o.Clauses(dbresolver.Read)
-}
-
-func (o organizationDo) WriteDB() IOrganizationDo {
-	return o.Clauses(dbresolver.Write)
-}
-
-func (o organizationDo) Session(config *gorm.Session) IOrganizationDo {
-	return o.withDO(o.DO.Session(config))
-}
-
-func (o organizationDo) Clauses(conds ...clause.Expression) IOrganizationDo {
-	return o.withDO(o.DO.Clauses(conds...))
-}
-
-func (o organizationDo) Returning(value interface{}, columns ...string) IOrganizationDo {
-	return o.withDO(o.DO.Returning(value, columns...))
-}
-
-func (o organizationDo) Not(conds ...gen.Condition) IOrganizationDo {
-	return o.withDO(o.DO.Not(conds...))
-}
-
-func (o organizationDo) Or(conds ...gen.Condition) IOrganizationDo {
-	return o.withDO(o.DO.Or(conds...))
-}
-
-func (o organizationDo) Select(conds ...field.Expr) IOrganizationDo {
-	return o.withDO(o.DO.Select(conds...))
-}
-
-func (o organizationDo) Where(conds ...gen.Condition) IOrganizationDo {
-	return o.withDO(o.DO.Where(conds...))
-}
-
-func (o organizationDo) Order(conds ...field.Expr) IOrganizationDo {
-	return o.withDO(o.DO.Order(conds...))
-}
-
-func (o organizationDo) Distinct(cols ...field.Expr) IOrganizationDo {
-	return o.withDO(o.DO.Distinct(cols...))
-}
-
-func (o organizationDo) Omit(cols ...field.Expr) IOrganizationDo {
-	return o.withDO(o.DO.Omit(cols...))
-}
-
-func (o organizationDo) Join(table schema.Tabler, on ...field.Expr) IOrganizationDo {
-	return o.withDO(o.DO.Join(table, on...))
-}
-
-func (o organizationDo) LeftJoin(table schema.Tabler, on ...field.Expr) IOrganizationDo {
-	return o.withDO(o.DO.LeftJoin(table, on...))
-}
-
-func (o organizationDo) RightJoin(table schema.Tabler, on ...field.Expr) IOrganizationDo {
-	return o.withDO(o.DO.RightJoin(table, on...))
-}
-
-func (o organizationDo) Group(cols ...field.Expr) IOrganizationDo {
-	return o.withDO(o.DO.Group(cols...))
-}
-
-func (o organizationDo) Having(conds ...gen.Condition) IOrganizationDo {
-	return o.withDO(o.DO.Having(conds...))
-}
-
-func (o organizationDo) Limit(limit int) IOrganizationDo {
-	return o.withDO(o.DO.Limit(limit))
-}
-
-func (o organizationDo) Offset(offset int) IOrganizationDo {
-	return o.withDO(o.DO.Offset(offset))
-}
-
-func (o organizationDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IOrganizationDo {
-	return o.withDO(o.DO.Scopes(funcs...))
-}
-
-func (o organizationDo) Unscoped() IOrganizationDo {
-	return o.withDO(o.DO.Unscoped())
-}
-
-func (o organizationDo) Create(values ...*dbschema.Organization) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return o.DO.Create(values)
-}
-
-func (o organizationDo) CreateInBatches(values []*dbschema.Organization, batchSize int) error {
-	return o.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (o organizationDo) Save(values ...*dbschema.Organization) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return o.DO.Save(values)
-}
-
-func (o organizationDo) First() (*dbschema.Organization, error) {
-	if result, err := o.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Organization), nil
-	}
-}
-
-func (o organizationDo) Take() (*dbschema.Organization, error) {
-	if result, err := o.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Organization), nil
-	}
-}
-
-func (o organizationDo) Last() (*dbschema.Organization, error) {
-	if result, err := o.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Organization), nil
-	}
-}
-
-func (o organizationDo) Find() ([]*dbschema.Organization, error) {
-	result, err := o.DO.Find()
-	return result.([]*dbschema.Organization), err
-}
-
-func (o organizationDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Organization, err error) {
-	buf := make([]*dbschema.Organization, 0, batchSize)
-	err = o.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (o organizationDo) FindInBatches(result *[]*dbschema.Organization, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return o.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (o organizationDo) Attrs(attrs ...field.AssignExpr) IOrganizationDo {
-	return o.withDO(o.DO.Attrs(attrs...))
-}
-
-func (o organizationDo) Assign(attrs ...field.AssignExpr) IOrganizationDo {
-	return o.withDO(o.DO.Assign(attrs...))
-}
-
-func (o organizationDo) Joins(fields ...field.RelationField) IOrganizationDo {
-	for _, _f := range fields {
-		o = *o.withDO(o.DO.Joins(_f))
-	}
-	return &o
-}
-
-func (o organizationDo) Preload(fields ...field.RelationField) IOrganizationDo {
-	for _, _f := range fields {
-		o = *o.withDO(o.DO.Preload(_f))
-	}
-	return &o
-}
-
-func (o organizationDo) FirstOrInit() (*dbschema.Organization, error) {
-	if result, err := o.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Organization), nil
-	}
-}
-
-func (o organizationDo) FirstOrCreate() (*dbschema.Organization, error) {
-	if result, err := o.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Organization), nil
-	}
-}
-
-func (o organizationDo) FindByPage(offset int, limit int) (result []*dbschema.Organization, count int64, err error) {
-	result, err = o.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = o.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (o organizationDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = o.Count()
-	if err != nil {
-		return
-	}
-
-	err = o.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (o organizationDo) Scan(result interface{}) (err error) {
-	return o.DO.Scan(result)
-}
-
-func (o organizationDo) Delete(models ...*dbschema.Organization) (result gen.ResultInfo, err error) {
-	return o.DO.Delete(models)
-}
-
-func (o *organizationDo) withDO(do gen.Dao) *organizationDo {
-	o.DO = *do.(*gen.DO)
-	return o
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/project_members.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/project_members.gen.go
deleted file mode 100644
index afcafcad..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/project_members.gen.go
+++ /dev/null
@@ -1,407 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package gormgen
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-)
-
-func newProjectMember(db *gorm.DB, opts ...gen.DOOption) projectMember {
-	_projectMember := projectMember{}
-
-	_projectMember.projectMemberDo.UseDB(db, opts...)
-	_projectMember.projectMemberDo.UseModel(&dbschema.ProjectMember{})
-
-	tableName := _projectMember.projectMemberDo.TableName()
-	_projectMember.ALL = field.NewAsterisk(tableName)
-	_projectMember.ID = field.NewUint(tableName, "id")
-	_projectMember.CreatedAt = field.NewTime(tableName, "created_at")
-	_projectMember.UpdatedAt = field.NewTime(tableName, "updated_at")
-	_projectMember.DeletedAt = field.NewField(tableName, "deleted_at")
-	_projectMember.UserID = field.NewUint(tableName, "user_id")
-	_projectMember.ProjectID = field.NewUint(tableName, "project_id")
-	_projectMember.Role = field.NewString(tableName, "role")
-
-	_projectMember.fillFieldMap()
-
-	return _projectMember
-}
-
-type projectMember struct {
-	projectMemberDo
-
-	ALL       field.Asterisk
-	ID        field.Uint
-	CreatedAt field.Time
-	UpdatedAt field.Time
-	DeletedAt field.Field
-	UserID    field.Uint
-	ProjectID field.Uint
-	Role      field.String
-
-	fieldMap map[string]field.Expr
-}
-
-func (p projectMember) Table(newTableName string) *projectMember {
-	p.projectMemberDo.UseTable(newTableName)
-	return p.updateTableName(newTableName)
-}
-
-func (p projectMember) As(alias string) *projectMember {
-	p.projectMemberDo.DO = *(p.projectMemberDo.As(alias).(*gen.DO))
-	return p.updateTableName(alias)
-}
-
-func (p *projectMember) updateTableName(table string) *projectMember {
-	p.ALL = field.NewAsterisk(table)
-	p.ID = field.NewUint(table, "id")
-	p.CreatedAt = field.NewTime(table, "created_at")
-	p.UpdatedAt = field.NewTime(table, "updated_at")
-	p.DeletedAt = field.NewField(table, "deleted_at")
-	p.UserID = field.NewUint(table, "user_id")
-	p.ProjectID = field.NewUint(table, "project_id")
-	p.Role = field.NewString(table, "role")
-
-	p.fillFieldMap()
-
-	return p
-}
-
-func (p *projectMember) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := p.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (p *projectMember) fillFieldMap() {
-	p.fieldMap = make(map[string]field.Expr, 7)
-	p.fieldMap["id"] = p.ID
-	p.fieldMap["created_at"] = p.CreatedAt
-	p.fieldMap["updated_at"] = p.UpdatedAt
-	p.fieldMap["deleted_at"] = p.DeletedAt
-	p.fieldMap["user_id"] = p.UserID
-	p.fieldMap["project_id"] = p.ProjectID
-	p.fieldMap["role"] = p.Role
-}
-
-func (p projectMember) clone(db *gorm.DB) projectMember {
-	p.projectMemberDo.ReplaceConnPool(db.Statement.ConnPool)
-	return p
-}
-
-func (p projectMember) replaceDB(db *gorm.DB) projectMember {
-	p.projectMemberDo.ReplaceDB(db)
-	return p
-}
-
-type projectMemberDo struct{ gen.DO }
-
-type IProjectMemberDo interface {
-	gen.SubQuery
-	Debug() IProjectMemberDo
-	WithContext(ctx context.Context) IProjectMemberDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IProjectMemberDo
-	WriteDB() IProjectMemberDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IProjectMemberDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IProjectMemberDo
-	Not(conds ...gen.Condition) IProjectMemberDo
-	Or(conds ...gen.Condition) IProjectMemberDo
-	Select(conds ...field.Expr) IProjectMemberDo
-	Where(conds ...gen.Condition) IProjectMemberDo
-	Order(conds ...field.Expr) IProjectMemberDo
-	Distinct(cols ...field.Expr) IProjectMemberDo
-	Omit(cols ...field.Expr) IProjectMemberDo
-	Join(table schema.Tabler, on ...field.Expr) IProjectMemberDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IProjectMemberDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IProjectMemberDo
-	Group(cols ...field.Expr) IProjectMemberDo
-	Having(conds ...gen.Condition) IProjectMemberDo
-	Limit(limit int) IProjectMemberDo
-	Offset(offset int) IProjectMemberDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IProjectMemberDo
-	Unscoped() IProjectMemberDo
-	Create(values ...*dbschema.ProjectMember) error
-	CreateInBatches(values []*dbschema.ProjectMember, batchSize int) error
-	Save(values ...*dbschema.ProjectMember) error
-	First() (*dbschema.ProjectMember, error)
-	Take() (*dbschema.ProjectMember, error)
-	Last() (*dbschema.ProjectMember, error)
-	Find() ([]*dbschema.ProjectMember, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ProjectMember, err error)
-	FindInBatches(result *[]*dbschema.ProjectMember, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*dbschema.ProjectMember) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IProjectMemberDo
-	Assign(attrs ...field.AssignExpr) IProjectMemberDo
-	Joins(fields ...field.RelationField) IProjectMemberDo
-	Preload(fields ...field.RelationField) IProjectMemberDo
-	FirstOrInit() (*dbschema.ProjectMember, error)
-	FirstOrCreate() (*dbschema.ProjectMember, error)
-	FindByPage(offset int, limit int) (result []*dbschema.ProjectMember, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Rows() (*sql.Rows, error)
-	Row() *sql.Row
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IProjectMemberDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (p projectMemberDo) Debug() IProjectMemberDo {
-	return p.withDO(p.DO.Debug())
-}
-
-func (p projectMemberDo) WithContext(ctx context.Context) IProjectMemberDo {
-	return p.withDO(p.DO.WithContext(ctx))
-}
-
-func (p projectMemberDo) ReadDB() IProjectMemberDo {
-	return p.Clauses(dbresolver.Read)
-}
-
-func (p projectMemberDo) WriteDB() IProjectMemberDo {
-	return p.Clauses(dbresolver.Write)
-}
-
-func (p projectMemberDo) Session(config *gorm.Session) IProjectMemberDo {
-	return p.withDO(p.DO.Session(config))
-}
-
-func (p projectMemberDo) Clauses(conds ...clause.Expression) IProjectMemberDo {
-	return p.withDO(p.DO.Clauses(conds...))
-}
-
-func (p projectMemberDo) Returning(value interface{}, columns ...string) IProjectMemberDo {
-	return p.withDO(p.DO.Returning(value, columns...))
-}
-
-func (p projectMemberDo) Not(conds ...gen.Condition) IProjectMemberDo {
-	return p.withDO(p.DO.Not(conds...))
-}
-
-func (p projectMemberDo) Or(conds ...gen.Condition) IProjectMemberDo {
-	return p.withDO(p.DO.Or(conds...))
-}
-
-func (p projectMemberDo) Select(conds ...field.Expr) IProjectMemberDo {
-	return p.withDO(p.DO.Select(conds...))
-}
-
-func (p projectMemberDo) Where(conds ...gen.Condition) IProjectMemberDo {
-	return p.withDO(p.DO.Where(conds...))
-}
-
-func (p projectMemberDo) Order(conds ...field.Expr) IProjectMemberDo {
-	return p.withDO(p.DO.Order(conds...))
-}
-
-func (p projectMemberDo) Distinct(cols ...field.Expr) IProjectMemberDo {
-	return p.withDO(p.DO.Distinct(cols...))
-}
-
-func (p projectMemberDo) Omit(cols ...field.Expr) IProjectMemberDo {
-	return p.withDO(p.DO.Omit(cols...))
-}
-
-func (p projectMemberDo) Join(table schema.Tabler, on ...field.Expr) IProjectMemberDo {
-	return p.withDO(p.DO.Join(table, on...))
-}
-
-func (p projectMemberDo) LeftJoin(table schema.Tabler, on ...field.Expr) IProjectMemberDo {
-	return p.withDO(p.DO.LeftJoin(table, on...))
-}
-
-func (p projectMemberDo) RightJoin(table schema.Tabler, on ...field.Expr) IProjectMemberDo {
-	return p.withDO(p.DO.RightJoin(table, on...))
-}
-
-func (p projectMemberDo) Group(cols ...field.Expr) IProjectMemberDo {
-	return p.withDO(p.DO.Group(cols...))
-}
-
-func (p projectMemberDo) Having(conds ...gen.Condition) IProjectMemberDo {
-	return p.withDO(p.DO.Having(conds...))
-}
-
-func (p projectMemberDo) Limit(limit int) IProjectMemberDo {
-	return p.withDO(p.DO.Limit(limit))
-}
-
-func (p projectMemberDo) Offset(offset int) IProjectMemberDo {
-	return p.withDO(p.DO.Offset(offset))
-}
-
-func (p projectMemberDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IProjectMemberDo {
-	return p.withDO(p.DO.Scopes(funcs...))
-}
-
-func (p projectMemberDo) Unscoped() IProjectMemberDo {
-	return p.withDO(p.DO.Unscoped())
-}
-
-func (p projectMemberDo) Create(values ...*dbschema.ProjectMember) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return p.DO.Create(values)
-}
-
-func (p projectMemberDo) CreateInBatches(values []*dbschema.ProjectMember, batchSize int) error {
-	return p.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (p projectMemberDo) Save(values ...*dbschema.ProjectMember) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return p.DO.Save(values)
-}
-
-func (p projectMemberDo) First() (*dbschema.ProjectMember, error) {
-	if result, err := p.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ProjectMember), nil
-	}
-}
-
-func (p projectMemberDo) Take() (*dbschema.ProjectMember, error) {
-	if result, err := p.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ProjectMember), nil
-	}
-}
-
-func (p projectMemberDo) Last() (*dbschema.ProjectMember, error) {
-	if result, err := p.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ProjectMember), nil
-	}
-}
-
-func (p projectMemberDo) Find() ([]*dbschema.ProjectMember, error) {
-	result, err := p.DO.Find()
-	return result.([]*dbschema.ProjectMember), err
-}
-
-func (p projectMemberDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ProjectMember, err error) {
-	buf := make([]*dbschema.ProjectMember, 0, batchSize)
-	err = p.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (p projectMemberDo) FindInBatches(result *[]*dbschema.ProjectMember, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return p.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (p projectMemberDo) Attrs(attrs ...field.AssignExpr) IProjectMemberDo {
-	return p.withDO(p.DO.Attrs(attrs...))
-}
-
-func (p projectMemberDo) Assign(attrs ...field.AssignExpr) IProjectMemberDo {
-	return p.withDO(p.DO.Assign(attrs...))
-}
-
-func (p projectMemberDo) Joins(fields ...field.RelationField) IProjectMemberDo {
-	for _, _f := range fields {
-		p = *p.withDO(p.DO.Joins(_f))
-	}
-	return &p
-}
-
-func (p projectMemberDo) Preload(fields ...field.RelationField) IProjectMemberDo {
-	for _, _f := range fields {
-		p = *p.withDO(p.DO.Preload(_f))
-	}
-	return &p
-}
-
-func (p projectMemberDo) FirstOrInit() (*dbschema.ProjectMember, error) {
-	if result, err := p.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ProjectMember), nil
-	}
-}
-
-func (p projectMemberDo) FirstOrCreate() (*dbschema.ProjectMember, error) {
-	if result, err := p.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.ProjectMember), nil
-	}
-}
-
-func (p projectMemberDo) FindByPage(offset int, limit int) (result []*dbschema.ProjectMember, count int64, err error) {
-	result, err = p.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = p.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (p projectMemberDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = p.Count()
-	if err != nil {
-		return
-	}
-
-	err = p.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (p projectMemberDo) Scan(result interface{}) (err error) {
-	return p.DO.Scan(result)
-}
-
-func (p projectMemberDo) Delete(models ...*dbschema.ProjectMember) (result gen.ResultInfo, err error) {
-	return p.DO.Delete(models)
-}
-
-func (p *projectMemberDo) withDO(do gen.Dao) *projectMemberDo {
-	p.DO = *do.(*gen.DO)
-	return p
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/projects.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/projects.gen.go
deleted file mode 100644
index c6846e2a..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/projects.gen.go
+++ /dev/null
@@ -1,506 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package gormgen
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-)
-
-func newProject(db *gorm.DB, opts ...gen.DOOption) project {
-	_project := project{}
-
-	_project.projectDo.UseDB(db, opts...)
-	_project.projectDo.UseModel(&dbschema.Project{})
-
-	tableName := _project.projectDo.TableName()
-	_project.ALL = field.NewAsterisk(tableName)
-	_project.ID = field.NewUint(tableName, "id")
-	_project.CreatedAt = field.NewTime(tableName, "created_at")
-	_project.UpdatedAt = field.NewTime(tableName, "updated_at")
-	_project.DeletedAt = field.NewField(tableName, "deleted_at")
-	_project.Name = field.NewString(tableName, "name")
-	_project.PublicID = field.NewString(tableName, "public_id")
-	_project.Status = field.NewString(tableName, "status")
-	_project.OrganizationID = field.NewUint(tableName, "organization_id")
-	_project.ArchivedAt = field.NewTime(tableName, "archived_at")
-	_project.Members = projectHasManyMembers{
-		db: db.Session(&gorm.Session{}),
-
-		RelationField: field.NewRelation("Members", "dbschema.ProjectMember"),
-	}
-
-	_project.fillFieldMap()
-
-	return _project
-}
-
-type project struct {
-	projectDo
-
-	ALL            field.Asterisk
-	ID             field.Uint
-	CreatedAt      field.Time
-	UpdatedAt      field.Time
-	DeletedAt      field.Field
-	Name           field.String
-	PublicID       field.String
-	Status         field.String
-	OrganizationID field.Uint
-	ArchivedAt     field.Time
-	Members        projectHasManyMembers
-
-	fieldMap map[string]field.Expr
-}
-
-func (p project) Table(newTableName string) *project {
-	p.projectDo.UseTable(newTableName)
-	return p.updateTableName(newTableName)
-}
-
-func (p project) As(alias string) *project {
-	p.projectDo.DO = *(p.projectDo.As(alias).(*gen.DO))
-	return p.updateTableName(alias)
-}
-
-func (p *project) updateTableName(table string) *project {
-	p.ALL = field.NewAsterisk(table)
-	p.ID = field.NewUint(table, "id")
-	p.CreatedAt = field.NewTime(table, "created_at")
-	p.UpdatedAt = field.NewTime(table, "updated_at")
-	p.DeletedAt = field.NewField(table, "deleted_at")
-	p.Name = field.NewString(table, "name")
-	p.PublicID = field.NewString(table, "public_id")
-	p.Status = field.NewString(table, "status")
-	p.OrganizationID = field.NewUint(table, "organization_id")
-	p.ArchivedAt = field.NewTime(table, "archived_at")
-
-	p.fillFieldMap()
-
-	return p
-}
-
-func (p *project) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := p.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (p *project) fillFieldMap() {
-	p.fieldMap = make(map[string]field.Expr, 10)
-	p.fieldMap["id"] = p.ID
-	p.fieldMap["created_at"] = p.CreatedAt
-	p.fieldMap["updated_at"] = p.UpdatedAt
-	p.fieldMap["deleted_at"] = p.DeletedAt
-	p.fieldMap["name"] = p.Name
-	p.fieldMap["public_id"] = p.PublicID
-	p.fieldMap["status"] = p.Status
-	p.fieldMap["organization_id"] = p.OrganizationID
-	p.fieldMap["archived_at"] = p.ArchivedAt
-
-}
-
-func (p project) clone(db *gorm.DB) project {
-	p.projectDo.ReplaceConnPool(db.Statement.ConnPool)
-	p.Members.db = db.Session(&gorm.Session{Initialized: true})
-	p.Members.db.Statement.ConnPool = db.Statement.ConnPool
-	return p
-}
-
-func (p project) replaceDB(db *gorm.DB) project {
-	p.projectDo.ReplaceDB(db)
-	p.Members.db = db.Session(&gorm.Session{})
-	return p
-}
-
-type projectHasManyMembers struct {
-	db *gorm.DB
-
-	field.RelationField
-}
-
-func (a projectHasManyMembers) Where(conds ...field.Expr) *projectHasManyMembers {
-	if len(conds) == 0 {
-		return &a
-	}
-
-	exprs := make([]clause.Expression, 0, len(conds))
-	for _, cond := range conds {
-		exprs = append(exprs, cond.BeCond().(clause.Expression))
-	}
-	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
-	return &a
-}
-
-func (a projectHasManyMembers) WithContext(ctx context.Context) *projectHasManyMembers {
-	a.db = a.db.WithContext(ctx)
-	return &a
-}
-
-func (a projectHasManyMembers) Session(session *gorm.Session) *projectHasManyMembers {
-	a.db = a.db.Session(session)
-	return &a
-}
-
-func (a projectHasManyMembers) Model(m *dbschema.Project) *projectHasManyMembersTx {
-	return &projectHasManyMembersTx{a.db.Model(m).Association(a.Name())}
-}
-
-func (a projectHasManyMembers) Unscoped() *projectHasManyMembers {
-	a.db = a.db.Unscoped()
-	return &a
-}
-
-type projectHasManyMembersTx struct{ tx *gorm.Association }
-
-func (a projectHasManyMembersTx) Find() (result []*dbschema.ProjectMember, err error) {
-	return result, a.tx.Find(&result)
-}
-
-func (a projectHasManyMembersTx) Append(values ...*dbschema.ProjectMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Append(targetValues...)
-}
-
-func (a projectHasManyMembersTx) Replace(values ...*dbschema.ProjectMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Replace(targetValues...)
-}
-
-func (a projectHasManyMembersTx) Delete(values ...*dbschema.ProjectMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Delete(targetValues...)
-}
-
-func (a projectHasManyMembersTx) Clear() error {
-	return a.tx.Clear()
-}
-
-func (a projectHasManyMembersTx) Count() int64 {
-	return a.tx.Count()
-}
-
-func (a projectHasManyMembersTx) Unscoped() *projectHasManyMembersTx {
-	a.tx = a.tx.Unscoped()
-	return &a
-}
-
-type projectDo struct{ gen.DO }
-
-type IProjectDo interface {
-	gen.SubQuery
-	Debug() IProjectDo
-	WithContext(ctx context.Context) IProjectDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IProjectDo
-	WriteDB() IProjectDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IProjectDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IProjectDo
-	Not(conds ...gen.Condition) IProjectDo
-	Or(conds ...gen.Condition) IProjectDo
-	Select(conds ...field.Expr) IProjectDo
-	Where(conds ...gen.Condition) IProjectDo
-	Order(conds ...field.Expr) IProjectDo
-	Distinct(cols ...field.Expr) IProjectDo
-	Omit(cols ...field.Expr) IProjectDo
-	Join(table schema.Tabler, on ...field.Expr) IProjectDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IProjectDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IProjectDo
-	Group(cols ...field.Expr) IProjectDo
-	Having(conds ...gen.Condition) IProjectDo
-	Limit(limit int) IProjectDo
-	Offset(offset int) IProjectDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IProjectDo
-	Unscoped() IProjectDo
-	Create(values ...*dbschema.Project) error
-	CreateInBatches(values []*dbschema.Project, batchSize int) error
-	Save(values ...*dbschema.Project) error
-	First() (*dbschema.Project, error)
-	Take() (*dbschema.Project, error)
-	Last() (*dbschema.Project, error)
-	Find() ([]*dbschema.Project, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Project, err error)
-	FindInBatches(result *[]*dbschema.Project, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*dbschema.Project) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IProjectDo
-	Assign(attrs ...field.AssignExpr) IProjectDo
-	Joins(fields ...field.RelationField) IProjectDo
-	Preload(fields ...field.RelationField) IProjectDo
-	FirstOrInit() (*dbschema.Project, error)
-	FirstOrCreate() (*dbschema.Project, error)
-	FindByPage(offset int, limit int) (result []*dbschema.Project, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Rows() (*sql.Rows, error)
-	Row() *sql.Row
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IProjectDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (p projectDo) Debug() IProjectDo {
-	return p.withDO(p.DO.Debug())
-}
-
-func (p projectDo) WithContext(ctx context.Context) IProjectDo {
-	return p.withDO(p.DO.WithContext(ctx))
-}
-
-func (p projectDo) ReadDB() IProjectDo {
-	return p.Clauses(dbresolver.Read)
-}
-
-func (p projectDo) WriteDB() IProjectDo {
-	return p.Clauses(dbresolver.Write)
-}
-
-func (p projectDo) Session(config *gorm.Session) IProjectDo {
-	return p.withDO(p.DO.Session(config))
-}
-
-func (p projectDo) Clauses(conds ...clause.Expression) IProjectDo {
-	return p.withDO(p.DO.Clauses(conds...))
-}
-
-func (p projectDo) Returning(value interface{}, columns ...string) IProjectDo {
-	return p.withDO(p.DO.Returning(value, columns...))
-}
-
-func (p projectDo) Not(conds ...gen.Condition) IProjectDo {
-	return p.withDO(p.DO.Not(conds...))
-}
-
-func (p projectDo) Or(conds ...gen.Condition) IProjectDo {
-	return p.withDO(p.DO.Or(conds...))
-}
-
-func (p projectDo) Select(conds ...field.Expr) IProjectDo {
-	return p.withDO(p.DO.Select(conds...))
-}
-
-func (p projectDo) Where(conds ...gen.Condition) IProjectDo {
-	return p.withDO(p.DO.Where(conds...))
-}
-
-func (p projectDo) Order(conds ...field.Expr) IProjectDo {
-	return p.withDO(p.DO.Order(conds...))
-}
-
-func (p projectDo) Distinct(cols ...field.Expr) IProjectDo {
-	return p.withDO(p.DO.Distinct(cols...))
-}
-
-func (p projectDo) Omit(cols ...field.Expr) IProjectDo {
-	return p.withDO(p.DO.Omit(cols...))
-}
-
-func (p projectDo) Join(table schema.Tabler, on ...field.Expr) IProjectDo {
-	return p.withDO(p.DO.Join(table, on...))
-}
-
-func (p projectDo) LeftJoin(table schema.Tabler, on ...field.Expr) IProjectDo {
-	return p.withDO(p.DO.LeftJoin(table, on...))
-}
-
-func (p projectDo) RightJoin(table schema.Tabler, on ...field.Expr) IProjectDo {
-	return p.withDO(p.DO.RightJoin(table, on...))
-}
-
-func (p projectDo) Group(cols ...field.Expr) IProjectDo {
-	return p.withDO(p.DO.Group(cols...))
-}
-
-func (p projectDo) Having(conds ...gen.Condition) IProjectDo {
-	return p.withDO(p.DO.Having(conds...))
-}
-
-func (p projectDo) Limit(limit int) IProjectDo {
-	return p.withDO(p.DO.Limit(limit))
-}
-
-func (p projectDo) Offset(offset int) IProjectDo {
-	return p.withDO(p.DO.Offset(offset))
-}
-
-func (p projectDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IProjectDo {
-	return p.withDO(p.DO.Scopes(funcs...))
-}
-
-func (p projectDo) Unscoped() IProjectDo {
-	return p.withDO(p.DO.Unscoped())
-}
-
-func (p projectDo) Create(values ...*dbschema.Project) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return p.DO.Create(values)
-}
-
-func (p projectDo) CreateInBatches(values []*dbschema.Project, batchSize int) error {
-	return p.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (p projectDo) Save(values ...*dbschema.Project) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return p.DO.Save(values)
-}
-
-func (p projectDo) First() (*dbschema.Project, error) {
-	if result, err := p.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Project), nil
-	}
-}
-
-func (p projectDo) Take() (*dbschema.Project, error) {
-	if result, err := p.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Project), nil
-	}
-}
-
-func (p projectDo) Last() (*dbschema.Project, error) {
-	if result, err := p.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Project), nil
-	}
-}
-
-func (p projectDo) Find() ([]*dbschema.Project, error) {
-	result, err := p.DO.Find()
-	return result.([]*dbschema.Project), err
-}
-
-func (p projectDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Project, err error) {
-	buf := make([]*dbschema.Project, 0, batchSize)
-	err = p.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (p projectDo) FindInBatches(result *[]*dbschema.Project, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return p.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (p projectDo) Attrs(attrs ...field.AssignExpr) IProjectDo {
-	return p.withDO(p.DO.Attrs(attrs...))
-}
-
-func (p projectDo) Assign(attrs ...field.AssignExpr) IProjectDo {
-	return p.withDO(p.DO.Assign(attrs...))
-}
-
-func (p projectDo) Joins(fields ...field.RelationField) IProjectDo {
-	for _, _f := range fields {
-		p = *p.withDO(p.DO.Joins(_f))
-	}
-	return &p
-}
-
-func (p projectDo) Preload(fields ...field.RelationField) IProjectDo {
-	for _, _f := range fields {
-		p = *p.withDO(p.DO.Preload(_f))
-	}
-	return &p
-}
-
-func (p projectDo) FirstOrInit() (*dbschema.Project, error) {
-	if result, err := p.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Project), nil
-	}
-}
-
-func (p projectDo) FirstOrCreate() (*dbschema.Project, error) {
-	if result, err := p.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Project), nil
-	}
-}
-
-func (p projectDo) FindByPage(offset int, limit int) (result []*dbschema.Project, count int64, err error) {
-	result, err = p.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = p.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (p projectDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = p.Count()
-	if err != nil {
-		return
-	}
-
-	err = p.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (p projectDo) Scan(result interface{}) (err error) {
-	return p.DO.Scan(result)
-}
-
-func (p projectDo) Delete(models ...*dbschema.Project) (result gen.ResultInfo, err error) {
-	return p.DO.Delete(models)
-}
-
-func (p *projectDo) withDO(do gen.Dao) *projectDo {
-	p.DO = *do.(*gen.DO)
-	return p
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/responses.gen.go b/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/responses.gen.go
deleted file mode 100644
index 2f2d8f40..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/responses.gen.go
+++ /dev/null
@@ -1,898 +0,0 @@
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-// Code generated by gorm.io/gen. DO NOT EDIT.
-
-package gormgen
-
-import (
-	"context"
-	"database/sql"
-
-	"gorm.io/gorm"
-	"gorm.io/gorm/clause"
-	"gorm.io/gorm/schema"
-
-	"gorm.io/gen"
-	"gorm.io/gen/field"
-
-	"gorm.io/plugin/dbresolver"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-)
-
-func newResponse(db *gorm.DB, opts ...gen.DOOption) response {
-	_response := response{}
-
-	_response.responseDo.UseDB(db, opts...)
-	_response.responseDo.UseModel(&dbschema.Response{})
-
-	tableName := _response.responseDo.TableName()
-	_response.ALL = field.NewAsterisk(tableName)
-	_response.ID = field.NewUint(tableName, "id")
-	_response.CreatedAt = field.NewTime(tableName, "created_at")
-	_response.UpdatedAt = field.NewTime(tableName, "updated_at")
-	_response.DeletedAt = field.NewField(tableName, "deleted_at")
-	_response.PublicID = field.NewString(tableName, "public_id")
-	_response.UserID = field.NewUint(tableName, "user_id")
-	_response.ConversationID = field.NewUint(tableName, "conversation_id")
-	_response.PreviousResponseID = field.NewString(tableName, "previous_response_id")
-	_response.Model = field.NewString(tableName, "model")
-	_response.Status = field.NewString(tableName, "status")
-	_response.Input = field.NewString(tableName, "input")
-	_response.Output = field.NewString(tableName, "output")
-	_response.SystemPrompt = field.NewString(tableName, "system_prompt")
-	_response.MaxTokens = field.NewInt(tableName, "max_tokens")
-	_response.Temperature = field.NewFloat64(tableName, "temperature")
-	_response.TopP = field.NewFloat64(tableName, "top_p")
-	_response.TopK = field.NewInt(tableName, "top_k")
-	_response.RepetitionPenalty = field.NewFloat64(tableName, "repetition_penalty")
-	_response.Seed = field.NewInt(tableName, "seed")
-	_response.Stop = field.NewString(tableName, "stop")
-	_response.PresencePenalty = field.NewFloat64(tableName, "presence_penalty")
-	_response.FrequencyPenalty = field.NewFloat64(tableName, "frequency_penalty")
-	_response.LogitBias = field.NewString(tableName, "logit_bias")
-	_response.ResponseFormat = field.NewString(tableName, "response_format")
-	_response.Tools = field.NewString(tableName, "tools")
-	_response.ToolChoice = field.NewString(tableName, "tool_choice")
-	_response.Metadata = field.NewString(tableName, "metadata")
-	_response.Stream = field.NewBool(tableName, "stream")
-	_response.Background = field.NewBool(tableName, "background")
-	_response.Timeout = field.NewInt(tableName, "timeout")
-	_response.User = field.NewString(tableName, "user")
-	_response.Usage = field.NewString(tableName, "usage")
-	_response.Error = field.NewString(tableName, "error")
-	_response.CompletedAt = field.NewTime(tableName, "completed_at")
-	_response.CancelledAt = field.NewTime(tableName, "cancelled_at")
-	_response.FailedAt = field.NewTime(tableName, "failed_at")
-	_response.Items = responseHasManyItems{
-		db: db.Session(&gorm.Session{}),
-
-		RelationField: field.NewRelation("Items", "dbschema.Item"),
-		Conversation: struct {
-			field.RelationField
-			User struct {
-				field.RelationField
-				Organizations struct {
-					field.RelationField
-				}
-				Projects struct {
-					field.RelationField
-				}
-			}
-			Items struct {
-				field.RelationField
-			}
-		}{
-			RelationField: field.NewRelation("Items.Conversation", "dbschema.Conversation"),
-			User: struct {
-				field.RelationField
-				Organizations struct {
-					field.RelationField
-				}
-				Projects struct {
-					field.RelationField
-				}
-			}{
-				RelationField: field.NewRelation("Items.Conversation.User", "dbschema.User"),
-				Organizations: struct {
-					field.RelationField
-				}{
-					RelationField: field.NewRelation("Items.Conversation.User.Organizations", "dbschema.OrganizationMember"),
-				},
-				Projects: struct {
-					field.RelationField
-				}{
-					RelationField: field.NewRelation("Items.Conversation.User.Projects", "dbschema.ProjectMember"),
-				},
-			},
-			Items: struct {
-				field.RelationField
-			}{
-				RelationField: field.NewRelation("Items.Conversation.Items", "dbschema.Item"),
-			},
-		},
-		Response: struct {
-			field.RelationField
-			UserEntity struct {
-				field.RelationField
-			}
-			Conversation struct {
-				field.RelationField
-			}
-			Items struct {
-				field.RelationField
-			}
-		}{
-			RelationField: field.NewRelation("Items.Response", "dbschema.Response"),
-			UserEntity: struct {
-				field.RelationField
-			}{
-				RelationField: field.NewRelation("Items.Response.UserEntity", "dbschema.User"),
-			},
-			Conversation: struct {
-				field.RelationField
-			}{
-				RelationField: field.NewRelation("Items.Response.Conversation", "dbschema.Conversation"),
-			},
-			Items: struct {
-				field.RelationField
-			}{
-				RelationField: field.NewRelation("Items.Response.Items", "dbschema.Item"),
-			},
-		},
-	}
-
-	_response.UserEntity = responseBelongsToUserEntity{
-		db: db.Session(&gorm.Session{}),
-
-		RelationField: field.NewRelation("UserEntity", "dbschema.User"),
-	}
-
-	_response.Conversation = responseBelongsToConversation{
-		db: db.Session(&gorm.Session{}),
-
-		RelationField: field.NewRelation("Conversation", "dbschema.Conversation"),
-	}
-
-	_response.fillFieldMap()
-
-	return _response
-}
-
-type response struct {
-	responseDo
-
-	ALL                field.Asterisk
-	ID                 field.Uint
-	CreatedAt          field.Time
-	UpdatedAt          field.Time
-	DeletedAt          field.Field
-	PublicID           field.String
-	UserID             field.Uint
-	ConversationID     field.Uint
-	PreviousResponseID field.String
-	Model              field.String
-	Status             field.String
-	Input              field.String
-	Output             field.String
-	SystemPrompt       field.String
-	MaxTokens          field.Int
-	Temperature        field.Float64
-	TopP               field.Float64
-	TopK               field.Int
-	RepetitionPenalty  field.Float64
-	Seed               field.Int
-	Stop               field.String
-	PresencePenalty    field.Float64
-	FrequencyPenalty   field.Float64
-	LogitBias          field.String
-	ResponseFormat     field.String
-	Tools              field.String
-	ToolChoice         field.String
-	Metadata           field.String
-	Stream             field.Bool
-	Background         field.Bool
-	Timeout            field.Int
-	User               field.String
-	Usage              field.String
-	Error              field.String
-	CompletedAt        field.Time
-	CancelledAt        field.Time
-	FailedAt           field.Time
-	Items              responseHasManyItems
-
-	UserEntity responseBelongsToUserEntity
-
-	Conversation responseBelongsToConversation
-
-	fieldMap map[string]field.Expr
-}
-
-func (r response) Table(newTableName string) *response {
-	r.responseDo.UseTable(newTableName)
-	return r.updateTableName(newTableName)
-}
-
-func (r response) As(alias string) *response {
-	r.responseDo.DO = *(r.responseDo.As(alias).(*gen.DO))
-	return r.updateTableName(alias)
-}
-
-func (r *response) updateTableName(table string) *response {
-	r.ALL = field.NewAsterisk(table)
-	r.ID = field.NewUint(table, "id")
-	r.CreatedAt = field.NewTime(table, "created_at")
-	r.UpdatedAt = field.NewTime(table, "updated_at")
-	r.DeletedAt = field.NewField(table, "deleted_at")
-	r.PublicID = field.NewString(table, "public_id")
-	r.UserID = field.NewUint(table, "user_id")
-	r.ConversationID = field.NewUint(table, "conversation_id")
-	r.PreviousResponseID = field.NewString(table, "previous_response_id")
-	r.Model = field.NewString(table, "model")
-	r.Status = field.NewString(table, "status")
-	r.Input = field.NewString(table, "input")
-	r.Output = field.NewString(table, "output")
-	r.SystemPrompt = field.NewString(table, "system_prompt")
-	r.MaxTokens = field.NewInt(table, "max_tokens")
-	r.Temperature = field.NewFloat64(table, "temperature")
-	r.TopP = field.NewFloat64(table, "top_p")
-	r.TopK = field.NewInt(table, "top_k")
-	r.RepetitionPenalty = field.NewFloat64(table, "repetition_penalty")
-	r.Seed = field.NewInt(table, "seed")
-	r.Stop = field.NewString(table, "stop")
-	r.PresencePenalty = field.NewFloat64(table, "presence_penalty")
-	r.FrequencyPenalty = field.NewFloat64(table, "frequency_penalty")
-	r.LogitBias = field.NewString(table, "logit_bias")
-	r.ResponseFormat = field.NewString(table, "response_format")
-	r.Tools = field.NewString(table, "tools")
-	r.ToolChoice = field.NewString(table, "tool_choice")
-	r.Metadata = field.NewString(table, "metadata")
-	r.Stream = field.NewBool(table, "stream")
-	r.Background = field.NewBool(table, "background")
-	r.Timeout = field.NewInt(table, "timeout")
-	r.User = field.NewString(table, "user")
-	r.Usage = field.NewString(table, "usage")
-	r.Error = field.NewString(table, "error")
-	r.CompletedAt = field.NewTime(table, "completed_at")
-	r.CancelledAt = field.NewTime(table, "cancelled_at")
-	r.FailedAt = field.NewTime(table, "failed_at")
-
-	r.fillFieldMap()
-
-	return r
-}
-
-func (r *response) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
-	_f, ok := r.fieldMap[fieldName]
-	if !ok || _f == nil {
-		return nil, false
-	}
-	_oe, ok := _f.(field.OrderExpr)
-	return _oe, ok
-}
-
-func (r *response) fillFieldMap() {
-	r.fieldMap = make(map[string]field.Expr, 39)
-	r.fieldMap["id"] = r.ID
-	r.fieldMap["created_at"] = r.CreatedAt
-	r.fieldMap["updated_at"] = r.UpdatedAt
-	r.fieldMap["deleted_at"] = r.DeletedAt
-	r.fieldMap["public_id"] = r.PublicID
-	r.fieldMap["user_id"] = r.UserID
-	r.fieldMap["conversation_id"] = r.ConversationID
-	r.fieldMap["previous_response_id"] = r.PreviousResponseID
-	r.fieldMap["model"] = r.Model
-	r.fieldMap["status"] = r.Status
-	r.fieldMap["input"] = r.Input
-	r.fieldMap["output"] = r.Output
-	r.fieldMap["system_prompt"] = r.SystemPrompt
-	r.fieldMap["max_tokens"] = r.MaxTokens
-	r.fieldMap["temperature"] = r.Temperature
-	r.fieldMap["top_p"] = r.TopP
-	r.fieldMap["top_k"] = r.TopK
-	r.fieldMap["repetition_penalty"] = r.RepetitionPenalty
-	r.fieldMap["seed"] = r.Seed
-	r.fieldMap["stop"] = r.Stop
-	r.fieldMap["presence_penalty"] = r.PresencePenalty
-	r.fieldMap["frequency_penalty"] = r.FrequencyPenalty
-	r.fieldMap["logit_bias"] = r.LogitBias
-	r.fieldMap["response_format"] = r.ResponseFormat
-	r.fieldMap["tools"] = r.Tools
-	r.fieldMap["tool_choice"] = r.ToolChoice
-	r.fieldMap["metadata"] = r.Metadata
-	r.fieldMap["stream"] = r.Stream
-	r.fieldMap["background"] = r.Background
-	r.fieldMap["timeout"] = r.Timeout
-	r.fieldMap["user"] = r.User
-	r.fieldMap["usage"] = r.Usage
-	r.fieldMap["error"] = r.Error
-	r.fieldMap["completed_at"] = r.CompletedAt
-	r.fieldMap["cancelled_at"] = r.CancelledAt
-	r.fieldMap["failed_at"] = r.FailedAt
-
-}
-
-func (r response) clone(db *gorm.DB) response {
-	r.responseDo.ReplaceConnPool(db.Statement.ConnPool)
-	r.Items.db = db.Session(&gorm.Session{Initialized: true})
-	r.Items.db.Statement.ConnPool = db.Statement.ConnPool
-	r.UserEntity.db = db.Session(&gorm.Session{Initialized: true})
-	r.UserEntity.db.Statement.ConnPool = db.Statement.ConnPool
-	r.Conversation.db = db.Session(&gorm.Session{Initialized: true})
-	r.Conversation.db.Statement.ConnPool = db.Statement.ConnPool
-	return r
-}
-
-func (r response) replaceDB(db *gorm.DB) response {
-	r.responseDo.ReplaceDB(db)
-	r.Items.db = db.Session(&gorm.Session{})
-	r.UserEntity.db = db.Session(&gorm.Session{})
-	r.Conversation.db = db.Session(&gorm.Session{})
-	return r
-}
-
-type responseHasManyItems struct {
-	db *gorm.DB
-
-	field.RelationField
-
-	Conversation struct {
-		field.RelationField
-		User struct {
-			field.RelationField
-			Organizations struct {
-				field.RelationField
-			}
-			Projects struct {
-				field.RelationField
-			}
-		}
-		Items struct {
-			field.RelationField
-		}
-	}
-	Response struct {
-		field.RelationField
-		UserEntity struct {
-			field.RelationField
-		}
-		Conversation struct {
-			field.RelationField
-		}
-		Items struct {
-			field.RelationField
-		}
-	}
-}
-
-func (a responseHasManyItems) Where(conds ...field.Expr) *responseHasManyItems {
-	if len(conds) == 0 {
-		return &a
-	}
-
-	exprs := make([]clause.Expression, 0, len(conds))
-	for _, cond := range conds {
-		exprs = append(exprs, cond.BeCond().(clause.Expression))
-	}
-	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
-	return &a
-}
-
-func (a responseHasManyItems) WithContext(ctx context.Context) *responseHasManyItems {
-	a.db = a.db.WithContext(ctx)
-	return &a
-}
-
-func (a responseHasManyItems) Session(session *gorm.Session) *responseHasManyItems {
-	a.db = a.db.Session(session)
-	return &a
-}
-
-func (a responseHasManyItems) Model(m *dbschema.Response) *responseHasManyItemsTx {
-	return &responseHasManyItemsTx{a.db.Model(m).Association(a.Name())}
-}
-
-func (a responseHasManyItems) Unscoped() *responseHasManyItems {
-	a.db = a.db.Unscoped()
-	return &a
-}
-
-type responseHasManyItemsTx struct{ tx *gorm.Association }
-
-func (a responseHasManyItemsTx) Find() (result []*dbschema.Item, err error) {
-	return result, a.tx.Find(&result)
-}
-
-func (a responseHasManyItemsTx) Append(values ...*dbschema.Item) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Append(targetValues...)
-}
-
-func (a responseHasManyItemsTx) Replace(values ...*dbschema.Item) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Replace(targetValues...)
-}
-
-func (a responseHasManyItemsTx) Delete(values ...*dbschema.Item) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Delete(targetValues...)
-}
-
-func (a responseHasManyItemsTx) Clear() error {
-	return a.tx.Clear()
-}
-
-func (a responseHasManyItemsTx) Count() int64 {
-	return a.tx.Count()
-}
-
-func (a responseHasManyItemsTx) Unscoped() *responseHasManyItemsTx {
-	a.tx = a.tx.Unscoped()
-	return &a
-}
-
-type responseBelongsToUserEntity struct {
-	db *gorm.DB
-
-	field.RelationField
-}
-
-func (a responseBelongsToUserEntity) Where(conds ...field.Expr) *responseBelongsToUserEntity {
-	if len(conds) == 0 {
-		return &a
-	}
-
-	exprs := make([]clause.Expression, 0, len(conds))
-	for _, cond := range conds {
-		exprs = append(exprs, cond.BeCond().(clause.Expression))
-	}
-	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
-	return &a
-}
-
-func (a responseBelongsToUserEntity) WithContext(ctx context.Context) *responseBelongsToUserEntity {
-	a.db = a.db.WithContext(ctx)
-	return &a
-}
-
-func (a responseBelongsToUserEntity) Session(session *gorm.Session) *responseBelongsToUserEntity {
-	a.db = a.db.Session(session)
-	return &a
-}
-
-func (a responseBelongsToUserEntity) Model(m *dbschema.Response) *responseBelongsToUserEntityTx {
-	return &responseBelongsToUserEntityTx{a.db.Model(m).Association(a.Name())}
-}
-
-func (a responseBelongsToUserEntity) Unscoped() *responseBelongsToUserEntity {
-	a.db = a.db.Unscoped()
-	return &a
-}
-
-type responseBelongsToUserEntityTx struct{ tx *gorm.Association }
-
-func (a responseBelongsToUserEntityTx) Find() (result *dbschema.User, err error) {
-	return result, a.tx.Find(&result)
-}
-
-func (a responseBelongsToUserEntityTx) Append(values ...*dbschema.User) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Append(targetValues...)
-}
-
-func (a responseBelongsToUserEntityTx) Replace(values ...*dbschema.User) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Replace(targetValues...)
-}
-
-func (a responseBelongsToUserEntityTx) Delete(values ...*dbschema.User) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Delete(targetValues...)
-}
-
-func (a responseBelongsToUserEntityTx) Clear() error {
-	return a.tx.Clear()
-}
-
-func (a responseBelongsToUserEntityTx) Count() int64 {
-	return a.tx.Count()
-}
-
-func (a responseBelongsToUserEntityTx) Unscoped() *responseBelongsToUserEntityTx {
-	a.tx = a.tx.Unscoped()
-	return &a
-}
-
-type responseBelongsToConversation struct {
-	db *gorm.DB
-
-	field.RelationField
-}
-
-func (a responseBelongsToConversation) Where(conds ...field.Expr) *responseBelongsToConversation {
-	if len(conds) == 0 {
-		return &a
-	}
-
-	exprs := make([]clause.Expression, 0, len(conds))
-	for _, cond := range conds {
-		exprs = append(exprs, cond.BeCond().(clause.Expression))
-	}
-	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
-	return &a
-}
-
-func (a responseBelongsToConversation) WithContext(ctx context.Context) *responseBelongsToConversation {
-	a.db = a.db.WithContext(ctx)
-	return &a
-}
-
-func (a responseBelongsToConversation) Session(session *gorm.Session) *responseBelongsToConversation {
-	a.db = a.db.Session(session)
-	return &a
-}
-
-func (a responseBelongsToConversation) Model(m *dbschema.Response) *responseBelongsToConversationTx {
-	return &responseBelongsToConversationTx{a.db.Model(m).Association(a.Name())}
-}
-
-func (a responseBelongsToConversation) Unscoped() *responseBelongsToConversation {
-	a.db = a.db.Unscoped()
-	return &a
-}
-
-type responseBelongsToConversationTx struct{ tx *gorm.Association }
-
-func (a responseBelongsToConversationTx) Find() (result *dbschema.Conversation, err error) {
-	return result, a.tx.Find(&result)
-}
-
-func (a responseBelongsToConversationTx) Append(values ...*dbschema.Conversation) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Append(targetValues...)
-}
-
-func (a responseBelongsToConversationTx) Replace(values ...*dbschema.Conversation) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Replace(targetValues...)
-}
-
-func (a responseBelongsToConversationTx) Delete(values ...*dbschema.Conversation) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Delete(targetValues...)
-}
-
-func (a responseBelongsToConversationTx) Clear() error {
-	return a.tx.Clear()
-}
-
-func (a responseBelongsToConversationTx) Count() int64 {
-	return a.tx.Count()
-}
-
-func (a responseBelongsToConversationTx) Unscoped() *responseBelongsToConversationTx {
-	a.tx = a.tx.Unscoped()
-	return &a
-}
-
-type responseDo struct{ gen.DO }
-
-type IResponseDo interface {
-	gen.SubQuery
-	Debug() IResponseDo
-	WithContext(ctx context.Context) IResponseDo
-	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
-	ReplaceDB(db *gorm.DB)
-	ReadDB() IResponseDo
-	WriteDB() IResponseDo
-	As(alias string) gen.Dao
-	Session(config *gorm.Session) IResponseDo
-	Columns(cols ...field.Expr) gen.Columns
-	Clauses(conds ...clause.Expression) IResponseDo
-	Not(conds ...gen.Condition) IResponseDo
-	Or(conds ...gen.Condition) IResponseDo
-	Select(conds ...field.Expr) IResponseDo
-	Where(conds ...gen.Condition) IResponseDo
-	Order(conds ...field.Expr) IResponseDo
-	Distinct(cols ...field.Expr) IResponseDo
-	Omit(cols ...field.Expr) IResponseDo
-	Join(table schema.Tabler, on ...field.Expr) IResponseDo
-	LeftJoin(table schema.Tabler, on ...field.Expr) IResponseDo
-	RightJoin(table schema.Tabler, on ...field.Expr) IResponseDo
-	Group(cols ...field.Expr) IResponseDo
-	Having(conds ...gen.Condition) IResponseDo
-	Limit(limit int) IResponseDo
-	Offset(offset int) IResponseDo
-	Count() (count int64, err error)
-	Scopes(funcs ...func(gen.Dao) gen.Dao) IResponseDo
-	Unscoped() IResponseDo
-	Create(values ...*dbschema.Response) error
-	CreateInBatches(values []*dbschema.Response, batchSize int) error
-	Save(values ...*dbschema.Response) error
-	First() (*dbschema.Response, error)
-	Take() (*dbschema.Response, error)
-	Last() (*dbschema.Response, error)
-	Find() ([]*dbschema.Response, error)
-	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Response, err error)
-	FindInBatches(result *[]*dbschema.Response, batchSize int, fc func(tx gen.Dao, batch int) error) error
-	Pluck(column field.Expr, dest interface{}) error
-	Delete(...*dbschema.Response) (info gen.ResultInfo, err error)
-	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	Updates(value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
-	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
-	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
-	UpdateFrom(q gen.SubQuery) gen.Dao
-	Attrs(attrs ...field.AssignExpr) IResponseDo
-	Assign(attrs ...field.AssignExpr) IResponseDo
-	Joins(fields ...field.RelationField) IResponseDo
-	Preload(fields ...field.RelationField) IResponseDo
-	FirstOrInit() (*dbschema.Response, error)
-	FirstOrCreate() (*dbschema.Response, error)
-	FindByPage(offset int, limit int) (result []*dbschema.Response, count int64, err error)
-	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
-	Rows() (*sql.Rows, error)
-	Row() *sql.Row
-	Scan(result interface{}) (err error)
-	Returning(value interface{}, columns ...string) IResponseDo
-	UnderlyingDB() *gorm.DB
-	schema.Tabler
-}
-
-func (r responseDo) Debug() IResponseDo {
-	return r.withDO(r.DO.Debug())
-}
-
-func (r responseDo) WithContext(ctx context.Context) IResponseDo {
-	return r.withDO(r.DO.WithContext(ctx))
-}
-
-func (r responseDo) ReadDB() IResponseDo {
-	return r.Clauses(dbresolver.Read)
-}
-
-func (r responseDo) WriteDB() IResponseDo {
-	return r.Clauses(dbresolver.Write)
-}
-
-func (r responseDo) Session(config *gorm.Session) IResponseDo {
-	return r.withDO(r.DO.Session(config))
-}
-
-func (r responseDo) Clauses(conds ...clause.Expression) IResponseDo {
-	return r.withDO(r.DO.Clauses(conds...))
-}
-
-func (r responseDo) Returning(value interface{}, columns ...string) IResponseDo {
-	return r.withDO(r.DO.Returning(value, columns...))
-}
-
-func (r responseDo) Not(conds ...gen.Condition) IResponseDo {
-	return r.withDO(r.DO.Not(conds...))
-}
-
-func (r responseDo) Or(conds ...gen.Condition) IResponseDo {
-	return r.withDO(r.DO.Or(conds...))
-}
-
-func (r responseDo) Select(conds ...field.Expr) IResponseDo {
-	return r.withDO(r.DO.Select(conds...))
-}
-
-func (r responseDo) Where(conds ...gen.Condition) IResponseDo {
-	return r.withDO(r.DO.Where(conds...))
-}
-
-func (r responseDo) Order(conds ...field.Expr) IResponseDo {
-	return r.withDO(r.DO.Order(conds...))
-}
-
-func (r responseDo) Distinct(cols ...field.Expr) IResponseDo {
-	return r.withDO(r.DO.Distinct(cols...))
-}
-
-func (r responseDo) Omit(cols ...field.Expr) IResponseDo {
-	return r.withDO(r.DO.Omit(cols...))
-}
-
-func (r responseDo) Join(table schema.Tabler, on ...field.Expr) IResponseDo {
-	return r.withDO(r.DO.Join(table, on...))
-}
-
-func (r responseDo) LeftJoin(table schema.Tabler, on ...field.Expr) IResponseDo {
-	return r.withDO(r.DO.LeftJoin(table, on...))
-}
-
-func (r responseDo) RightJoin(table schema.Tabler, on ...field.Expr) IResponseDo {
-	return r.withDO(r.DO.RightJoin(table, on...))
-}
-
-func (r responseDo) Group(cols ...field.Expr) IResponseDo {
-	return r.withDO(r.DO.Group(cols...))
-}
-
-func (r responseDo) Having(conds ...gen.Condition) IResponseDo {
-	return r.withDO(r.DO.Having(conds...))
-}
-
-func (r responseDo) Limit(limit int) IResponseDo {
-	return r.withDO(r.DO.Limit(limit))
-}
-
-func (r responseDo) Offset(offset int) IResponseDo {
-	return r.withDO(r.DO.Offset(offset))
-}
-
-func (r responseDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IResponseDo {
-	return r.withDO(r.DO.Scopes(funcs...))
-}
-
-func (r responseDo) Unscoped() IResponseDo {
-	return r.withDO(r.DO.Unscoped())
-}
-
-func (r responseDo) Create(values ...*dbschema.Response) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return r.DO.Create(values)
-}
-
-func (r responseDo) CreateInBatches(values []*dbschema.Response, batchSize int) error {
-	return r.DO.CreateInBatches(values, batchSize)
-}
-
-// Save : !!! underlying implementation is different with GORM
-// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
-func (r responseDo) Save(values ...*dbschema.Response) error {
-	if len(values) == 0 {
-		return nil
-	}
-	return r.DO.Save(values)
-}
-
-func (r responseDo) First() (*dbschema.Response, error) {
-	if result, err := r.DO.First(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Response), nil
-	}
-}
-
-func (r responseDo) Take() (*dbschema.Response, error) {
-	if result, err := r.DO.Take(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Response), nil
-	}
-}
-
-func (r responseDo) Last() (*dbschema.Response, error) {
-	if result, err := r.DO.Last(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Response), nil
-	}
-}
-
-func (r responseDo) Find() ([]*dbschema.Response, error) {
-	result, err := r.DO.Find()
-	return result.([]*dbschema.Response), err
-}
-
-func (r responseDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Response, err error) {
-	buf := make([]*dbschema.Response, 0, batchSize)
-	err = r.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
-		defer func() { results = append(results, buf...) }()
-		return fc(tx, batch)
-	})
-	return results, err
-}
-
-func (r responseDo) FindInBatches(result *[]*dbschema.Response, batchSize int, fc func(tx gen.Dao, batch int) error) error {
-	return r.DO.FindInBatches(result, batchSize, fc)
-}
-
-func (r responseDo) Attrs(attrs ...field.AssignExpr) IResponseDo {
-	return r.withDO(r.DO.Attrs(attrs...))
-}
-
-func (r responseDo) Assign(attrs ...field.AssignExpr) IResponseDo {
-	return r.withDO(r.DO.Assign(attrs...))
-}
-
-func (r responseDo) Joins(fields ...field.RelationField) IResponseDo {
-	for _, _f := range fields {
-		r = *r.withDO(r.DO.Joins(_f))
-	}
-	return &r
-}
-
-func (r responseDo) Preload(fields ...field.RelationField) IResponseDo {
-	for _, _f := range fields {
-		r = *r.withDO(r.DO.Preload(_f))
-	}
-	return &r
-}
-
-func (r responseDo) FirstOrInit() (*dbschema.Response, error) {
-	if result, err := r.DO.FirstOrInit(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Response), nil
-	}
-}
-
-func (r responseDo) FirstOrCreate() (*dbschema.Response, error) {
-	if result, err := r.DO.FirstOrCreate(); err != nil {
-		return nil, err
-	} else {
-		return result.(*dbschema.Response), nil
-	}
-}
-
-func (r responseDo) FindByPage(offset int, limit int) (result []*dbschema.Response, count int64, err error) {
-	result, err = r.Offset(offset).Limit(limit).Find()
-	if err != nil {
-		return
-	}
-
-	if size := len(result); 0 < limit && 0 < size && size < limit {
-		count = int64(size + offset)
-		return
-	}
-
-	count, err = r.Offset(-1).Limit(-1).Count()
-	return
-}
-
-func (r responseDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
-	count, err = r.Count()
-	if err != nil {
-		return
-	}
-
-	err = r.Offset(offset).Limit(limit).Scan(result)
-	return
-}
-
-func (r responseDo) Scan(result interface{}) (err error) {
-	return r.DO.Scan(result)
-}
-
-func (r responseDo) Delete(models ...*dbschema.Response) (result gen.ResultInfo, err error) {
-	return r.DO.Delete(models)
-}
-
-func (r *responseDo) withDO(do gen.Dao) *responseDo {
-	r.DO = *do.(*gen.DO)
-	return r
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/migration.go b/apps/jan-api-gateway/application/app/infrastructure/database/migration.go
deleted file mode 100644
index 3d512298..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/migration.go
+++ /dev/null
@@ -1,171 +0,0 @@
-package database
-
-import (
-	"context"
-	"fmt"
-
-	"gorm.io/gorm"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-)
-
-type DatabaseMigration struct {
-	gorm.Model
-	Version string `gorm:"not null;uniqueIndex"`
-}
-
-type SchemaVersion struct {
-	Migrations []string `json:"migrations"`
-}
-
-func NewSchemaVersion() SchemaVersion {
-	sv := SchemaVersion{
-		// Consider supporting semantic versioning, such as:
-		// ```
-		// Version {
-		//   ReleaseVersion: "v0.0.3",
-		//   DbVersion: 2
-		// }
-		// ```
-		Migrations: []string{
-			"000001",
-			"000002",
-		},
-	}
-	return sv
-}
-
-type DBMigrator struct {
-	db *gorm.DB
-}
-
-func NewDBMigrator(db *gorm.DB) *DBMigrator {
-	return &DBMigrator{
-		db: db,
-	}
-}
-
-func (d *DBMigrator) initialize() error {
-	db := d.db
-	var reset bool
-	var record DatabaseMigration
-
-	hasTable := db.Migrator().HasTable("database_migration")
-	if hasTable {
-		result := db.Limit(1).Find(&record)
-		if result.Error != nil && result.Error != gorm.ErrRecordNotFound {
-			return fmt.Errorf("failed to query migration records: %w", result.Error)
-		}
-		if result.RowsAffected == 0 {
-			reset = true
-		}
-	} else {
-		reset = true
-	}
-
-	if reset {
-		// Still experiencing a race condition here, need to consult with DevOps regarding deployment strategy.
-		if err := db.Exec("DROP SCHEMA IF EXISTS public CASCADE;").Error; err != nil {
-			return fmt.Errorf("failed to drop public schema: %w", err)
-		}
-		if err := db.Exec("CREATE SCHEMA public;").Error; err != nil {
-			return fmt.Errorf("failed to create public schema: %w", err)
-		}
-		if err := db.AutoMigrate(&DatabaseMigration{}); err != nil {
-			return fmt.Errorf("failed to create 'database_migration' table: %w", err)
-		}
-
-		initialRecord := DatabaseMigration{Version: "000000"}
-		if err := db.Create(&initialRecord).Error; err != nil {
-			return fmt.Errorf("failed to insert initial migration record: %w", err)
-		}
-	}
-
-	return nil
-}
-
-func (d *DBMigrator) lockVersion(ctx context.Context, tx *gorm.DB) (DatabaseMigration, error) {
-	var m DatabaseMigration
-
-	if err := tx.WithContext(ctx).
-		Raw("SELECT id, version FROM database_migration ORDER BY id LIMIT 1").
-		Scan(&m).Error; err != nil {
-		return m, err
-	}
-
-	if m.ID == 0 {
-		return m, fmt.Errorf("no row found in database_migration")
-	}
-
-	if err := tx.WithContext(ctx).
-		Raw("SELECT id, version FROM database_migration WHERE id = ? FOR UPDATE", m.ID).
-		Scan(&m).Error; err != nil {
-		return m, err
-	}
-
-	return m, nil
-}
-
-func (d *DBMigrator) Migrate() (err error) {
-	if err = d.initialize(); err != nil {
-		return err
-	}
-	for _, model := range SchemaRegistry {
-		err = d.db.AutoMigrate(model)
-		if err != nil {
-			logger.GetLogger().
-				WithField("error_code", "75333e43-8157-4f0a-8e34-aa34e6e7c285").
-				Fatalf("failed to auto migrate schema: %T, error: %v", model, err)
-			return err
-		}
-	}
-	return nil
-}
-
-// func (d *DBMigrator) Migrate() (err error) {
-// 	if err = d.initialize(); err != nil {
-// 		return err
-// 	}
-// 	migrations := NewSchemaVersion().Migrations
-// 	ctx := context.Background()
-// 	db := d.db
-// 	tx := db.WithContext(ctx).Begin()
-// 	// select for update
-// 	currentVersion, err := d.lockVersion(ctx, tx)
-// 	if err != nil {
-// 		return
-// 	}
-// 	_, filename, _, ok := runtime.Caller(0)
-// 	if !ok {
-// 		return fmt.Errorf("da75e6a4-af0e-46a0-8cf8-569263651443")
-// 	}
-// 	migrationSqlFolder := filepath.Join(filepath.Dir(filename), "migrationsqls")
-
-// 	updated := false
-// 	for _, migrationVersion := range migrations {
-// 		if currentVersion.Version >= migrationVersion {
-// 			continue
-// 		}
-// 		// get version sql file
-// 		sqlFile := filepath.Join(migrationSqlFolder, fmt.Sprintf("%s.sql", migrationVersion))
-// 		content, err := os.ReadFile(sqlFile)
-// 		if err != nil {
-// 			return err
-// 		}
-
-// 		fileContentAsString := string(content)
-// 		sqlCommands := strings.Split(fileContentAsString, ";")
-// 		for _, command := range sqlCommands {
-// 			db.Exec(command)
-// 		}
-// 		updated = true
-// 	}
-// 	if updated {
-// 		currentVersion.Version = migrations[len(migrations)-1]
-// 		if err := tx.Save(currentVersion).Error; err != nil {
-// 			tx.Rollback()
-// 			return err
-// 		}
-// 	}
-// 	tx.Commit()
-// 	return nil
-// }
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/migrationsqls/000001.sql b/apps/jan-api-gateway/application/app/infrastructure/database/migrationsqls/000001.sql
deleted file mode 100644
index 85a77f81..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/migrationsqls/000001.sql
+++ /dev/null
@@ -1,177 +0,0 @@
--- Create "api_key" table
-CREATE TABLE "public"."api_key" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "public_id" character varying(128) NOT NULL,
-  "key_hash" character varying(128) NOT NULL,
-  "plaintext_hint" character varying(16) NULL,
-  "description" character varying(255) NULL,
-  "enabled" boolean NULL DEFAULT true,
-  "apikey_type" character varying(32) NOT NULL,
-  "owner_public_id" character varying(50) NOT NULL,
-  "organization_id" bigint NULL,
-  "project_id" bigint NULL,
-  "permissions" json NULL,
-  "expires_at" timestamp NULL,
-  "last_used_at" timestamp NULL,
-  PRIMARY KEY ("id")
-);
--- Create index "idx_api_key_apikey_type" to table: "api_key"
-CREATE INDEX "idx_api_key_apikey_type" ON "public"."api_key" ("apikey_type");
--- Create index "idx_api_key_deleted_at" to table: "api_key"
-CREATE INDEX "idx_api_key_deleted_at" ON "public"."api_key" ("deleted_at");
--- Create index "idx_api_key_enabled" to table: "api_key"
-CREATE INDEX "idx_api_key_enabled" ON "public"."api_key" ("enabled");
--- Create index "idx_api_key_key_hash" to table: "api_key"
-CREATE UNIQUE INDEX "idx_api_key_key_hash" ON "public"."api_key" ("key_hash");
--- Create index "idx_api_key_organization_id" to table: "api_key"
-CREATE INDEX "idx_api_key_organization_id" ON "public"."api_key" ("organization_id");
--- Create index "idx_api_key_owner_public_id" to table: "api_key"
-CREATE UNIQUE INDEX "idx_api_key_owner_public_id" ON "public"."api_key" ("owner_public_id");
--- Create index "idx_api_key_project_id" to table: "api_key"
-CREATE INDEX "idx_api_key_project_id" ON "public"."api_key" ("project_id");
--- Create index "idx_api_key_public_id" to table: "api_key"
-CREATE UNIQUE INDEX "idx_api_key_public_id" ON "public"."api_key" ("public_id");
--- Create "user" table
-CREATE TABLE "public"."user" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "name" character varying(100) NOT NULL,
-  "email" character varying(255) NOT NULL,
-  "public_id" character varying(50) NOT NULL,
-  "enabled" boolean NULL,
-  PRIMARY KEY ("id")
-);
--- Create index "idx_user_deleted_at" to table: "user"
-CREATE INDEX "idx_user_deleted_at" ON "public"."user" ("deleted_at");
--- Create index "idx_user_email" to table: "user"
-CREATE UNIQUE INDEX "idx_user_email" ON "public"."user" ("email");
--- Create index "idx_user_public_id" to table: "user"
-CREATE UNIQUE INDEX "idx_user_public_id" ON "public"."user" ("public_id");
--- Create "conversation" table
-CREATE TABLE "public"."conversation" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "public_id" character varying(50) NOT NULL,
-  "title" character varying(255) NULL,
-  "user_id" bigint NOT NULL,
-  "status" character varying(20) NOT NULL DEFAULT 'active',
-  "metadata" text NULL,
-  "is_private" boolean NOT NULL DEFAULT true,
-  PRIMARY KEY ("id"),
-  CONSTRAINT "fk_conversation_user" FOREIGN KEY ("user_id") REFERENCES "public"."user" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION
-);
--- Create index "idx_conversation_deleted_at" to table: "conversation"
-CREATE INDEX "idx_conversation_deleted_at" ON "public"."conversation" ("deleted_at");
--- Create index "idx_conversation_public_id" to table: "conversation"
-CREATE UNIQUE INDEX "idx_conversation_public_id" ON "public"."conversation" ("public_id");
--- Create index "idx_conversation_user_id" to table: "conversation"
-CREATE INDEX "idx_conversation_user_id" ON "public"."conversation" ("user_id");
--- Create "item" table
-CREATE TABLE "public"."item" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "public_id" character varying(50) NOT NULL,
-  "conversation_id" bigint NOT NULL,
-  "type" character varying(50) NOT NULL,
-  "role" character varying(20) NULL,
-  "content" text NULL,
-  "status" character varying(50) NULL,
-  "incomplete_at" bigint NULL,
-  "incomplete_details" text NULL,
-  "completed_at" bigint NULL,
-  PRIMARY KEY ("id"),
-  CONSTRAINT "fk_conversation_items" FOREIGN KEY ("conversation_id") REFERENCES "public"."conversation" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION
-);
--- Create index "idx_item_conversation_id" to table: "item"
-CREATE INDEX "idx_item_conversation_id" ON "public"."item" ("conversation_id");
--- Create index "idx_item_deleted_at" to table: "item"
-CREATE INDEX "idx_item_deleted_at" ON "public"."item" ("deleted_at");
--- Create index "idx_item_public_id" to table: "item"
-CREATE UNIQUE INDEX "idx_item_public_id" ON "public"."item" ("public_id");
--- Create "organization" table
-CREATE TABLE "public"."organization" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "name" character varying(128) NOT NULL,
-  "public_id" character varying(64) NOT NULL,
-  "enabled" boolean NULL DEFAULT true,
-  "owner_id" bigint NOT NULL,
-  PRIMARY KEY ("id"),
-  CONSTRAINT "fk_organization_owner" FOREIGN KEY ("owner_id") REFERENCES "public"."user" ("id") ON UPDATE CASCADE ON DELETE SET NULL
-);
--- Create index "idx_organization_deleted_at" to table: "organization"
-CREATE INDEX "idx_organization_deleted_at" ON "public"."organization" ("deleted_at");
--- Create index "idx_organization_enabled" to table: "organization"
-CREATE INDEX "idx_organization_enabled" ON "public"."organization" ("enabled");
--- Create index "idx_organization_name" to table: "organization"
-CREATE UNIQUE INDEX "idx_organization_name" ON "public"."organization" ("name");
--- Create index "idx_organization_owner_id" to table: "organization"
-CREATE INDEX "idx_organization_owner_id" ON "public"."organization" ("owner_id");
--- Create index "idx_organization_public_id" to table: "organization"
-CREATE UNIQUE INDEX "idx_organization_public_id" ON "public"."organization" ("public_id");
--- Create "organization_member" table
-CREATE TABLE "public"."organization_member" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "user_id" bigint NOT NULL,
-  "organization_id" bigint NOT NULL,
-  "role" character varying(20) NOT NULL,
-  PRIMARY KEY ("id", "user_id", "organization_id"),
-  CONSTRAINT "fk_organization_members" FOREIGN KEY ("organization_id") REFERENCES "public"."organization" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION,
-  CONSTRAINT "fk_user_organizations" FOREIGN KEY ("user_id") REFERENCES "public"."user" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION
-);
--- Create index "idx_organization_member_deleted_at" to table: "organization_member"
-CREATE INDEX "idx_organization_member_deleted_at" ON "public"."organization_member" ("deleted_at");
--- Create "project" table
-CREATE TABLE "public"."project" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "name" character varying(128) NOT NULL,
-  "public_id" character varying(50) NOT NULL,
-  "status" character varying(20) NOT NULL DEFAULT 'active',
-  "organization_id" bigint NOT NULL,
-  "archived_at" timestamptz NULL,
-  PRIMARY KEY ("id")
-);
--- Create index "idx_project_archived_at" to table: "project"
-CREATE INDEX "idx_project_archived_at" ON "public"."project" ("archived_at");
--- Create index "idx_project_deleted_at" to table: "project"
-CREATE INDEX "idx_project_deleted_at" ON "public"."project" ("deleted_at");
--- Create index "idx_project_name" to table: "project"
-CREATE UNIQUE INDEX "idx_project_name" ON "public"."project" ("name");
--- Create index "idx_project_organization_id" to table: "project"
-CREATE INDEX "idx_project_organization_id" ON "public"."project" ("organization_id");
--- Create index "idx_project_public_id" to table: "project"
-CREATE UNIQUE INDEX "idx_project_public_id" ON "public"."project" ("public_id");
--- Create index "idx_project_status" to table: "project"
-CREATE INDEX "idx_project_status" ON "public"."project" ("status");
--- Create "project_member" table
-CREATE TABLE "public"."project_member" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "user_id" bigint NOT NULL,
-  "project_id" bigint NOT NULL,
-  "role" character varying(20) NOT NULL,
-  PRIMARY KEY ("id", "user_id", "project_id"),
-  CONSTRAINT "fk_project_members" FOREIGN KEY ("project_id") REFERENCES "public"."project" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION,
-  CONSTRAINT "fk_user_projects" FOREIGN KEY ("user_id") REFERENCES "public"."user" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION
-);
--- Create index "idx_project_member_deleted_at" to table: "project_member"
-CREATE INDEX "idx_project_member_deleted_at" ON "public"."project_member" ("deleted_at");
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/migrationsqls/000002.sql b/apps/jan-api-gateway/application/app/infrastructure/database/migrationsqls/000002.sql
deleted file mode 100644
index f7d443c8..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/migrationsqls/000002.sql
+++ /dev/null
@@ -1,106 +0,0 @@
--- Drop index "idx_api_key_owner_public_id" from table: "api_key"
-DROP INDEX "public"."idx_api_key_owner_public_id";
--- Create index "idx_conversation_is_private" to table: "conversation"
-CREATE INDEX "idx_conversation_is_private" ON "public"."conversation" ("is_private");
--- Create index "idx_conversation_status" to table: "conversation"
-CREATE INDEX "idx_conversation_status" ON "public"."conversation" ("status");
--- Drop index "idx_organization_name" from table: "organization"
-DROP INDEX "public"."idx_organization_name";
--- Modify "organization_member" table
-ALTER TABLE "public"."organization_member" ADD COLUMN "is_primary" boolean NULL DEFAULT false;
--- Drop index "idx_project_name" from table: "project"
-DROP INDEX "public"."idx_project_name";
--- Create "invite" table
-CREATE TABLE "public"."invite" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "public_id" character varying(64) NOT NULL,
-  "email" character varying(128) NOT NULL,
-  "role" character varying(20) NOT NULL,
-  "status" character varying(20) NOT NULL,
-  "invited_at" timestamptz NULL,
-  "expires_at" timestamptz NULL,
-  "accepted_at" timestamptz NULL,
-  "secrets" text NULL,
-  "projects" jsonb NULL,
-  "organization_id" bigint NOT NULL,
-  PRIMARY KEY ("id")
-);
--- Create index "idx_invite_deleted_at" to table: "invite"
-CREATE INDEX "idx_invite_deleted_at" ON "public"."invite" ("deleted_at");
--- Create index "idx_invite_organization_id" to table: "invite"
-CREATE INDEX "idx_invite_organization_id" ON "public"."invite" ("organization_id");
--- Create index "idx_invite_public_id" to table: "invite"
-CREATE UNIQUE INDEX "idx_invite_public_id" ON "public"."invite" ("public_id");
--- Create index "idx_invite_status" to table: "invite"
-CREATE INDEX "idx_invite_status" ON "public"."invite" ("status");
--- Modify "user" table
-ALTER TABLE "public"."user" ADD COLUMN "is_guest" boolean NULL;
--- Create "responses" table
-CREATE TABLE "public"."responses" (
-  "id" bigserial NOT NULL,
-  "created_at" timestamptz NULL,
-  "updated_at" timestamptz NULL,
-  "deleted_at" timestamptz NULL,
-  "public_id" character varying(255) NOT NULL,
-  "user_id" bigint NOT NULL,
-  "conversation_id" bigint NULL,
-  "previous_response_id" character varying(255) NULL,
-  "model" character varying(255) NOT NULL,
-  "status" character varying(50) NOT NULL DEFAULT 'pending',
-  "input" text NOT NULL,
-  "output" text NULL,
-  "system_prompt" text NULL,
-  "max_tokens" bigint NULL,
-  "temperature" numeric NULL,
-  "top_p" numeric NULL,
-  "top_k" bigint NULL,
-  "repetition_penalty" numeric NULL,
-  "seed" bigint NULL,
-  "stop" text NULL,
-  "presence_penalty" numeric NULL,
-  "frequency_penalty" numeric NULL,
-  "logit_bias" text NULL,
-  "response_format" text NULL,
-  "tools" text NULL,
-  "tool_choice" text NULL,
-  "metadata" text NULL,
-  "stream" boolean NULL,
-  "background" boolean NULL,
-  "timeout" bigint NULL,
-  "user" character varying(255) NULL,
-  "usage" text NULL,
-  "error" text NULL,
-  "completed_at" timestamptz NULL,
-  "cancelled_at" timestamptz NULL,
-  "failed_at" timestamptz NULL,
-  PRIMARY KEY ("id"),
-  CONSTRAINT "fk_responses_conversation" FOREIGN KEY ("conversation_id") REFERENCES "public"."conversation" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION,
-  CONSTRAINT "fk_responses_user_entity" FOREIGN KEY ("user_id") REFERENCES "public"."user" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION
-);
--- Create index "idx_responses_conversation_id" to table: "responses"
-CREATE INDEX "idx_responses_conversation_id" ON "public"."responses" ("conversation_id");
--- Create index "idx_responses_deleted_at" to table: "responses"
-CREATE INDEX "idx_responses_deleted_at" ON "public"."responses" ("deleted_at");
--- Create index "idx_responses_model" to table: "responses"
-CREATE INDEX "idx_responses_model" ON "public"."responses" ("model");
--- Create index "idx_responses_previous_response_id" to table: "responses"
-CREATE INDEX "idx_responses_previous_response_id" ON "public"."responses" ("previous_response_id");
--- Create index "idx_responses_public_id" to table: "responses"
-CREATE UNIQUE INDEX "idx_responses_public_id" ON "public"."responses" ("public_id");
--- Create index "idx_responses_status" to table: "responses"
-CREATE INDEX "idx_responses_status" ON "public"."responses" ("status");
--- Create index "idx_responses_user_id" to table: "responses"
-CREATE INDEX "idx_responses_user_id" ON "public"."responses" ("user_id");
--- Modify "item" table
-ALTER TABLE "public"."item" ALTER COLUMN "incomplete_at" TYPE timestamp, ALTER COLUMN "completed_at" TYPE timestamp, ADD COLUMN "response_id" bigint NULL, ADD CONSTRAINT "fk_responses_items" FOREIGN KEY ("response_id") REFERENCES "public"."responses" ("id") ON UPDATE NO ACTION ON DELETE NO ACTION;
--- Create index "idx_item_response_id" to table: "item"
-CREATE INDEX "idx_item_response_id" ON "public"."item" ("response_id");
--- Create index "idx_item_role" to table: "item"
-CREATE INDEX "idx_item_role" ON "public"."item" ("role");
--- Create index "idx_item_status" to table: "item"
-CREATE INDEX "idx_item_status" ON "public"."item" ("status");
--- Create index "idx_item_type" to table: "item"
-CREATE INDEX "idx_item_type" ON "public"."item" ("type");
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/apikeyrepo/apikey_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/apikeyrepo/apikey_repository.go
deleted file mode 100644
index 4c4d529c..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/apikeyrepo/apikey_repository.go
+++ /dev/null
@@ -1,138 +0,0 @@
-package apikeyrepo
-
-import (
-	"context"
-	"fmt"
-
-	domain "menlo.ai/jan-api-gateway/app/domain/apikey"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-)
-
-type ApiKeyGormRepository struct {
-	db *transaction.Database
-}
-
-// Count implements apikey.ApiKeyRepository.
-func (repo *ApiKeyGormRepository) Count(ctx context.Context, filter domain.ApiKeyFilter) (int64, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.WithContext(ctx).ApiKey
-	sql = repo.applyFilter(query, sql, filter)
-	return sql.Count()
-}
-
-// Create implements apikey.ApiKeyRepository.
-func (repo *ApiKeyGormRepository) Create(ctx context.Context, a *domain.ApiKey) error {
-	model := dbschema.NewSchemaApiKey(a)
-	query := repo.db.GetQuery(ctx)
-	err := query.ApiKey.WithContext(ctx).Create(model)
-	if err != nil {
-		return err
-	}
-	a.ID = model.ID
-	return nil
-}
-
-// DeleteByID implements apikey.ApiKeyRepository.
-func (repo *ApiKeyGormRepository) DeleteByID(ctx context.Context, id uint) error {
-	return repo.db.GetTx(ctx).Delete(&dbschema.ApiKey{}, id).Error
-}
-
-// FindByID implements apikey.ApiKeyRepository.
-func (repo *ApiKeyGormRepository) FindByID(ctx context.Context, id uint) (*domain.ApiKey, error) {
-	query := repo.db.GetQuery(ctx)
-	model, err := query.ApiKey.WithContext(ctx).Where(query.ApiKey.ID.Eq(id)).First()
-	if err != nil {
-		return nil, err
-	}
-	return model.EtoD(), nil
-}
-
-// FindByKeyHash implements apikey.ApiKeyRepository.
-func (repo *ApiKeyGormRepository) FindByKeyHash(ctx context.Context, keyHash string) (*domain.ApiKey, error) {
-	query := repo.db.GetQuery(ctx)
-	model, err := query.ApiKey.WithContext(ctx).Where(query.ApiKey.KeyHash.Eq(keyHash)).First()
-	if err != nil {
-		return nil, err
-	}
-	return model.EtoD(), nil
-}
-
-// Update implements apikey.ApiKeyRepository.
-func (repo *ApiKeyGormRepository) Update(ctx context.Context, u *domain.ApiKey) error {
-	query := repo.db.GetQuery(ctx)
-	apiKey := dbschema.NewSchemaApiKey(u)
-	return query.ApiKey.WithContext(ctx).Save(apiKey)
-}
-
-// FindOneFilter implements apikey.ApiKeyRepository.
-func (repo *ApiKeyGormRepository) FindOneByFilter(ctx context.Context, filter domain.ApiKeyFilter) (*domain.ApiKey, error) {
-	entities, err := repo.FindByFilter(ctx, filter, nil)
-	if err != nil {
-		return nil, err
-	}
-	if len(entities) != 1 {
-		return nil, fmt.Errorf("no records")
-	}
-	return entities[0], err
-}
-
-func (repo *ApiKeyGormRepository) FindByFilter(ctx context.Context, filter domain.ApiKeyFilter, p *query.Pagination) ([]*domain.ApiKey, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.WithContext(ctx).ApiKey
-	sql = repo.applyFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.ApiKey.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.ApiKey.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.ApiKey.ID.Desc())
-		} else {
-			// Default to ascending order
-			sql = sql.Order(query.ApiKey.ID.Asc())
-		}
-	}
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(item *dbschema.ApiKey) *domain.ApiKey {
-		return item.EtoD()
-	})
-	return result, nil
-}
-
-func (repo *ApiKeyGormRepository) applyFilter(query *gormgen.Query, sql gormgen.IApiKeyDo, filter domain.ApiKeyFilter) gormgen.IApiKeyDo {
-	if filter.ApikeyType != nil {
-		sql = sql.Where(query.ApiKey.ApikeyType.Eq(*filter.ApikeyType))
-	}
-	if filter.OwnerPublicID != nil {
-		sql = sql.Where(query.ApiKey.OwnerPublicID.Eq(*filter.OwnerPublicID))
-	}
-	if filter.OrganizationID != nil {
-		sql = sql.Where(query.ApiKey.OrganizationID.Eq(*filter.OrganizationID))
-	}
-	if filter.PublicID != nil {
-		sql = sql.Where(query.ApiKey.PublicID.Eq(*filter.PublicID))
-	}
-	if filter.ProjectID != nil {
-		sql = sql.Where(query.ApiKey.ProjectID.Eq(*filter.ProjectID))
-	}
-	return sql
-}
-
-func NewApiKeyGormRepository(db *transaction.Database) domain.ApiKeyRepository {
-	return &ApiKeyGormRepository{
-		db: db,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/conversationrepo/conversation_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/conversationrepo/conversation_repository.go
deleted file mode 100644
index 612f1f0d..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/conversationrepo/conversation_repository.go
+++ /dev/null
@@ -1,181 +0,0 @@
-package conversationrepo
-
-import (
-	"context"
-	"strings"
-
-	domain "menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-)
-
-type ConversationGormRepository struct {
-	db *transaction.Database
-}
-
-var _ domain.ConversationRepository = (*ConversationGormRepository)(nil)
-
-func NewConversationGormRepository(db *transaction.Database) domain.ConversationRepository {
-	return &ConversationGormRepository{
-		db: db,
-	}
-}
-
-func (r *ConversationGormRepository) Create(ctx context.Context, conversation *domain.Conversation) error {
-	model := dbschema.NewSchemaConversation(conversation)
-	if err := r.db.GetQuery(ctx).Conversation.WithContext(ctx).Create(model); err != nil {
-		return err
-	}
-	conversation.ID = model.ID
-	return nil
-}
-
-func (r *ConversationGormRepository) FindByID(ctx context.Context, id uint) (*domain.Conversation, error) {
-	query := r.db.GetQuery(ctx)
-	model, err := query.Conversation.WithContext(ctx).Where(query.Conversation.ID.Eq(id)).First()
-	if err != nil {
-		return nil, err
-	}
-
-	return model.EtoD(), nil
-}
-
-func (r *ConversationGormRepository) FindByPublicID(ctx context.Context, publicID string) (*domain.Conversation, error) {
-	query := r.db.GetQuery(ctx)
-	model, err := query.Conversation.WithContext(ctx).Where(query.Conversation.PublicID.Eq(publicID)).First()
-	if err != nil {
-		return nil, err
-	}
-
-	return model.EtoD(), nil
-}
-
-func (r *ConversationGormRepository) Update(ctx context.Context, conversation *domain.Conversation) error {
-	model := dbschema.NewSchemaConversation(conversation)
-	model.ID = conversation.ID
-
-	query := r.db.GetQuery(ctx)
-	_, err := query.Conversation.WithContext(ctx).Where(query.Conversation.ID.Eq(conversation.ID)).Updates(model)
-	return err
-}
-
-func (r *ConversationGormRepository) Delete(ctx context.Context, id uint) error {
-	query := r.db.GetQuery(ctx)
-	_, err := query.Conversation.WithContext(ctx).Where(query.Conversation.ID.Eq(id)).Delete()
-	return err
-}
-
-func (r *ConversationGormRepository) AddItem(ctx context.Context, conversationID uint, item *domain.Item) error {
-	model := dbschema.NewSchemaItem(item)
-	model.ConversationID = conversationID
-
-	if err := r.db.GetQuery(ctx).Item.WithContext(ctx).Create(model); err != nil {
-		return err
-	}
-	item.ID = model.ID
-	return nil
-}
-
-func (r *ConversationGormRepository) SearchItems(ctx context.Context, conversationID uint, query string) ([]*domain.Item, error) {
-	searchTerm := "%" + strings.ToLower(query) + "%"
-
-	gormQuery := r.db.GetQuery(ctx)
-	models, err := gormQuery.Item.WithContext(ctx).
-		Where(gormQuery.Item.ConversationID.Eq(conversationID)).
-		Where(gormQuery.Item.Content.Like(searchTerm)).
-		Order(gormQuery.Item.CreatedAt.Asc()).
-		Find()
-
-	if err != nil {
-		return nil, err
-	}
-
-	items := make([]*domain.Item, len(models))
-	for i, model := range models {
-		items[i] = model.EtoD()
-	}
-
-	return items, nil
-}
-
-// BulkAddItems adds multiple items to a conversation in a single transaction
-func (r *ConversationGormRepository) BulkAddItems(ctx context.Context, conversationID uint, items []*domain.Item) error {
-	if len(items) == 0 {
-		return nil
-	}
-
-	models := make([]*dbschema.Item, len(items))
-	for i, item := range items {
-		model := dbschema.NewSchemaItem(item)
-		model.ConversationID = conversationID
-		models[i] = model
-	}
-
-	// Use batch insert for better performance
-	query := r.db.GetQuery(ctx)
-	if err := query.Item.WithContext(ctx).CreateInBatches(models, 100); err != nil {
-		return err
-	}
-
-	// Update the items with their assigned IDs
-	for i, model := range models {
-		items[i].ID = model.ID
-	}
-
-	return nil
-}
-
-func (repo *ConversationGormRepository) FindByFilter(ctx context.Context, filter domain.ConversationFilter, p *query.Pagination) ([]*domain.Conversation, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.Conversation.WithContext(ctx)
-	sql = repo.applyFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.Conversation.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.Conversation.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.Conversation.ID.Desc())
-		} else {
-			sql = sql.Order(query.Conversation.ID.Asc())
-		}
-	}
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(item *dbschema.Conversation) *domain.Conversation {
-		return item.EtoD()
-	})
-	return result, nil
-}
-
-func (repo *ConversationGormRepository) applyFilter(
-	query *gormgen.Query,
-	sql gormgen.IConversationDo,
-	filter domain.ConversationFilter,
-) gormgen.IConversationDo {
-	if filter.PublicID != nil {
-		sql = sql.Where(query.Conversation.PublicID.Eq(*filter.PublicID))
-	}
-	if filter.UserID != nil {
-		sql = sql.Where(query.Conversation.UserID.Eq(*filter.UserID))
-	}
-	return sql
-}
-
-func (repo *ConversationGormRepository) Count(ctx context.Context, filter domain.ConversationFilter) (int64, error) {
-	query := repo.db.GetQuery(ctx)
-	q := query.Conversation.WithContext(ctx)
-	q = repo.applyFilter(query, q, filter)
-	return q.Count()
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/inviterepo/invite_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/inviterepo/invite_repository.go
deleted file mode 100644
index ec36899c..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/inviterepo/invite_repository.go
+++ /dev/null
@@ -1,96 +0,0 @@
-package inviterepo
-
-import (
-	"context"
-
-	domain "menlo.ai/jan-api-gateway/app/domain/invite"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-)
-
-type InviteGormRepository struct {
-	db *transaction.Database
-}
-
-var _ domain.InviteRepository = (*InviteGormRepository)(nil)
-
-func (repo *InviteGormRepository) applyFilter(query *gormgen.Query, sql gormgen.IInviteDo, filter domain.InvitesFilter) gormgen.IInviteDo {
-	if filter.PublicID != nil {
-		sql = sql.Where(query.Invite.PublicID.Eq(*filter.PublicID))
-	}
-	if filter.OrganizationID != nil {
-		sql = sql.Where(query.Invite.OrganizationID.Eq(*filter.OrganizationID))
-	}
-	if filter.Secrets != nil {
-		sql = sql.Where(query.Invite.Secrets.Eq(*filter.Secrets))
-	}
-	return sql
-}
-
-func (repo *InviteGormRepository) Create(ctx context.Context, i *domain.Invite) error {
-	model := dbschema.NewSchemaInvite(i)
-	query := repo.db.GetQuery(ctx)
-	err := query.Invite.WithContext(ctx).Create(model)
-	if err != nil {
-		return err
-	}
-	i.ID = model.ID
-	return nil
-}
-
-func (repo *InviteGormRepository) Update(ctx context.Context, i *domain.Invite) error {
-	invite := dbschema.NewSchemaInvite(i)
-	query := repo.db.GetQuery(ctx)
-	return query.Invite.WithContext(ctx).Save(invite)
-}
-
-func (repo *InviteGormRepository) DeleteByID(ctx context.Context, id uint) error {
-	return repo.db.GetTx(ctx).Delete(&dbschema.Invite{}, id).Error
-}
-
-func (repo *InviteGormRepository) FindByFilter(ctx context.Context, filter domain.InvitesFilter, p *query.Pagination) ([]*domain.Invite, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.Invite.WithContext(ctx)
-	sql = repo.applyFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.Invite.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.Invite.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.Invite.ID.Desc())
-		} else {
-			sql = sql.Order(query.Invite.ID.Asc())
-		}
-	}
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(item *dbschema.Invite) *domain.Invite {
-		return item.EtoD()
-	})
-	return result, nil
-}
-
-func (repo *InviteGormRepository) Count(ctx context.Context, filter domain.InvitesFilter) (int64, error) {
-	query := repo.db.GetQuery(ctx)
-	q := query.Invite.WithContext(ctx)
-	q = repo.applyFilter(query, q, filter)
-	return q.Count()
-}
-
-func NewInviteGormRepository(db *transaction.Database) domain.InviteRepository {
-	return &InviteGormRepository{
-		db: db,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/itemrepo/item_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/itemrepo/item_repository.go
deleted file mode 100644
index 29a9c92b..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/itemrepo/item_repository.go
+++ /dev/null
@@ -1,201 +0,0 @@
-package itemrepo
-
-import (
-	"context"
-	"strings"
-
-	domain "menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-)
-
-type ItemGormRepository struct {
-	db *transaction.Database
-}
-
-func NewItemGormRepository(db *transaction.Database) domain.ItemRepository {
-	return &ItemGormRepository{
-		db: db,
-	}
-}
-
-func (r *ItemGormRepository) Create(ctx context.Context, item *domain.Item) error {
-	model := dbschema.NewSchemaItem(item)
-	if err := r.db.GetQuery(ctx).Item.WithContext(ctx).Create(model); err != nil {
-		return err
-	}
-	item.ID = model.ID
-	return nil
-}
-
-func (r *ItemGormRepository) FindByID(ctx context.Context, id uint) (*domain.Item, error) {
-	query := r.db.GetQuery(ctx)
-	model, err := query.Item.WithContext(ctx).Where(query.Item.ID.Eq(id)).First()
-	if err != nil {
-		return nil, err
-	}
-
-	return model.EtoD(), nil
-}
-
-func (r *ItemGormRepository) FindByConversationID(ctx context.Context, conversationID uint) ([]*domain.Item, error) {
-	query := r.db.GetQuery(ctx)
-	models, err := query.Item.WithContext(ctx).
-		Where(query.Item.ConversationID.Eq(conversationID)).
-		Order(query.Item.CreatedAt.Asc()).
-		Find()
-
-	if err != nil {
-		return nil, err
-	}
-
-	items := make([]*domain.Item, len(models))
-	for i, model := range models {
-		items[i] = model.EtoD()
-	}
-
-	return items, nil
-}
-
-func (r *ItemGormRepository) Search(ctx context.Context, conversationID uint, searchQuery string) ([]*domain.Item, error) {
-	searchTerm := "%" + strings.ToLower(searchQuery) + "%"
-
-	query := r.db.GetQuery(ctx)
-	models, err := query.Item.WithContext(ctx).
-		Where(query.Item.ConversationID.Eq(conversationID)).
-		Where(query.Item.Content.Like(searchTerm)).
-		Order(query.Item.CreatedAt.Asc()).
-		Find()
-
-	if err != nil {
-		return nil, err
-	}
-
-	items := make([]*domain.Item, len(models))
-	for i, model := range models {
-		items[i] = model.EtoD()
-	}
-
-	return items, nil
-}
-
-func (r *ItemGormRepository) FindByPublicID(ctx context.Context, publicID string) (*domain.Item, error) {
-	// Temporary implementation using raw GORM until generated code is updated
-	var model dbschema.Item
-	err := r.db.GetTx(ctx).WithContext(ctx).Where("public_id = ?", publicID).First(&model).Error
-	if err != nil {
-		return nil, err
-	}
-
-	return model.EtoD(), nil
-}
-
-func (r *ItemGormRepository) Delete(ctx context.Context, id uint) error {
-	query := r.db.GetQuery(ctx)
-	_, err := query.Item.WithContext(ctx).Where(query.Item.ID.Eq(id)).Delete()
-	return err
-}
-
-// BulkCreate creates multiple items in a single batch operation
-func (r *ItemGormRepository) BulkCreate(ctx context.Context, items []*domain.Item) error {
-	if len(items) == 0 {
-		return nil
-	}
-
-	models := make([]*dbschema.Item, len(items))
-	for i, item := range items {
-		models[i] = dbschema.NewSchemaItem(item)
-	}
-
-	query := r.db.GetQuery(ctx)
-	if err := query.Item.WithContext(ctx).CreateInBatches(models, 100); err != nil {
-		return err
-	}
-
-	// Update the items with their assigned IDs
-	for i, model := range models {
-		items[i].ID = model.ID
-	}
-
-	return nil
-}
-
-// CountByConversation counts items in a conversation
-func (r *ItemGormRepository) CountByConversation(ctx context.Context, conversationID uint) (int64, error) {
-	query := r.db.GetQuery(ctx)
-	return query.Item.WithContext(ctx).Where(query.Item.ConversationID.Eq(conversationID)).Count()
-}
-
-// ExistsByIDAndConversation efficiently checks if an item exists in a conversation
-func (r *ItemGormRepository) ExistsByIDAndConversation(ctx context.Context, itemID uint, conversationID uint) (bool, error) {
-	query := r.db.GetQuery(ctx)
-	count, err := query.Item.WithContext(ctx).
-		Where(query.Item.ID.Eq(itemID)).
-		Where(query.Item.ConversationID.Eq(conversationID)).
-		Count()
-
-	return count > 0, err
-}
-
-// Count implements conversation.ItemRepository.
-func (repo *ItemGormRepository) Count(ctx context.Context, filter domain.ItemFilter) (int64, error) {
-	query := repo.db.GetQuery(ctx)
-	q := query.Item.WithContext(ctx)
-	q = repo.applyFilter(query, q, filter)
-	return q.Count()
-}
-
-// FindByFilter implements conversation.ItemRepository.
-func (repo *ItemGormRepository) FindByFilter(ctx context.Context, filter domain.ItemFilter, p *query.Pagination) ([]*domain.Item, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.Item.WithContext(ctx)
-	sql = repo.applyFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.Item.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.Item.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.Item.ID.Desc())
-		} else {
-			sql = sql.Order(query.Item.ID.Asc())
-		}
-	}
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(item *dbschema.Item) *domain.Item {
-		return item.EtoD()
-	})
-	return result, nil
-}
-
-func (repo *ItemGormRepository) applyFilter(
-	query *gormgen.Query,
-	sql gormgen.IItemDo,
-	filter domain.ItemFilter,
-) gormgen.IItemDo {
-	if filter.PublicID != nil {
-		sql = sql.Where(query.Item.PublicID.Eq(*filter.PublicID))
-	}
-	if filter.ConversationID != nil {
-		sql = sql.Where(query.Item.ConversationID.Eq(*filter.ConversationID))
-	}
-	if filter.Role != nil {
-		sql = sql.Where(query.Item.Role.Eq(string(*filter.Role)))
-	}
-	if filter.ResponseID != nil {
-		sql = sql.Where(query.Item.ResponseID.Eq(*filter.ResponseID))
-	}
-	return sql
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/organizationrepo/organization_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/organizationrepo/organization_repository.go
deleted file mode 100644
index 06a1a18e..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/organizationrepo/organization_repository.go
+++ /dev/null
@@ -1,186 +0,0 @@
-package organizationrepo
-
-import (
-	"context"
-
-	"gorm.io/gorm/clause"
-	domain "menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-)
-
-type OrganizationGormRepository struct {
-	db *transaction.Database
-}
-
-var _ domain.OrganizationRepository = (*OrganizationGormRepository)(nil)
-
-// applyFilter is a helper function to conditionally apply filter clauses to the GORM query.
-func (repo *OrganizationGormRepository) applyFilter(query *gormgen.Query, sql gormgen.IOrganizationDo, filter domain.OrganizationFilter) gormgen.IOrganizationDo {
-	if filter.PublicID != nil {
-		sql = sql.Where(query.Organization.PublicID.Eq(*filter.PublicID))
-	}
-	// If the Enabled filter is not nil, add a WHERE clause.
-	if filter.Enabled != nil {
-		sql = sql.Where(query.Organization.Enabled.Is(*filter.Enabled))
-	}
-	return sql
-}
-
-// Create persists a new organization to the database.
-func (repo *OrganizationGormRepository) Create(ctx context.Context, o *domain.Organization) error {
-	model := dbschema.NewSchemaOrganization(o)
-	query := repo.db.GetQuery(ctx)
-	err := query.Organization.WithContext(ctx).Create(model)
-	if err != nil {
-		return err
-	}
-	o.ID = model.ID
-	return nil
-}
-
-// Update modifies an existing organization.
-func (repo *OrganizationGormRepository) Update(ctx context.Context, o *domain.Organization) error {
-	organization := dbschema.NewSchemaOrganization(o)
-	query := repo.db.GetQuery(ctx)
-	return query.Organization.WithContext(ctx).Save(organization)
-}
-
-// DeleteByID removes an organization by its ID.
-func (repo *OrganizationGormRepository) DeleteByID(ctx context.Context, id uint) error {
-	return repo.db.GetTx(ctx).Delete(&dbschema.Organization{}, id).Error
-}
-
-// FindByID retrieves an organization by its primary key ID.
-func (repo *OrganizationGormRepository) FindByID(ctx context.Context, id uint) (*domain.Organization, error) {
-	query := repo.db.GetQuery(ctx)
-	model, err := query.Organization.WithContext(ctx).Where(query.Organization.ID.Eq(id)).First()
-	if err != nil {
-		return nil, err
-	}
-	return model.EtoD(), nil
-}
-
-func (repo *OrganizationGormRepository) FindByPublicID(ctx context.Context, publicID string) (*domain.Organization, error) {
-	query := repo.db.GetQuery(ctx)
-	model, err := query.Organization.WithContext(ctx).Where(query.Organization.PublicID.Eq(publicID)).First()
-	if err != nil {
-		return nil, err
-	}
-	return model.EtoD(), nil
-}
-
-// FindByFilter retrieves a list of organizations based on a filter and pagination.
-func (repo *OrganizationGormRepository) FindByFilter(ctx context.Context, filter domain.OrganizationFilter, p *query.Pagination) ([]*domain.Organization, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.WithContext(ctx).Organization
-	sql = repo.applyFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.Organization.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.Organization.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.Organization.ID.Desc())
-		} else {
-			// Default to ascending order
-			sql = sql.Order(query.Organization.ID.Asc())
-		}
-	}
-
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(org *dbschema.Organization) *domain.Organization {
-		return org.EtoD()
-	})
-	return result, nil
-}
-
-// Count returns the total number of organizations matching a given filter.
-func (repo *OrganizationGormRepository) Count(ctx context.Context, filter domain.OrganizationFilter) (int64, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.WithContext(ctx).Organization
-	sql = repo.applyFilter(query, sql, filter)
-	return sql.Count()
-}
-
-// AddMember implements organization.OrganizationRepository.
-func (repo *OrganizationGormRepository) AddMember(ctx context.Context, m *domain.OrganizationMember) error {
-	model := dbschema.NewSchemaOrganizationMember(m)
-	query := repo.db.GetQuery(ctx)
-	return query.OrganizationMember.WithContext(ctx).Clauses(clause.OnConflict{
-		Columns: []clause.Column{
-			{Name: query.OrganizationMember.UserID.ColumnName().String()},
-			{Name: query.OrganizationMember.OrganizationID.ColumnName().String()},
-		},
-		DoNothing: false,
-		UpdateAll: true,
-	}).Create(model)
-}
-
-// FindMemberByFilter implements organization.OrganizationRepository.
-func (repo *OrganizationGormRepository) FindMemberByFilter(ctx context.Context, filter domain.OrganizationMemberFilter, p *query.Pagination) ([]*domain.OrganizationMember, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.WithContext(ctx).OrganizationMember
-	sql = repo.applyMemberFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.OrganizationMember.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.OrganizationMember.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.OrganizationMember.ID.Desc())
-		} else {
-			// Default to ascending order
-			sql = sql.Order(query.OrganizationMember.ID.Asc())
-		}
-	}
-
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(org *dbschema.OrganizationMember) *domain.OrganizationMember {
-		return org.EtoD()
-	})
-	return result, nil
-}
-
-// applyFilter is a helper function to conditionally apply filter clauses to the GORM query.
-func (repo *OrganizationGormRepository) applyMemberFilter(query *gormgen.Query, sql gormgen.IOrganizationMemberDo, filter domain.OrganizationMemberFilter) gormgen.IOrganizationMemberDo {
-	if filter.UserID != nil {
-		sql = sql.Where(query.OrganizationMember.UserID.Eq(*filter.UserID))
-	}
-	// If the Enabled filter is not nil, add a WHERE clause.
-	if filter.OrganizationID != nil {
-		sql = sql.Where(query.OrganizationMember.OrganizationID.Eq(*filter.OrganizationID))
-	}
-	if filter.Role != nil {
-		sql = sql.Where(query.OrganizationMember.Role.Eq(*filter.Role))
-	}
-	return sql
-}
-
-// NewOrganizationGormRepository creates a new repository instance.
-func NewOrganizationGormRepository(db *transaction.Database) domain.OrganizationRepository {
-	return &OrganizationGormRepository{
-		db: db,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/projectrepo/projectrepo.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/projectrepo/projectrepo.go
deleted file mode 100644
index 70357b22..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/projectrepo/projectrepo.go
+++ /dev/null
@@ -1,211 +0,0 @@
-package projectrepo
-
-import (
-	"context"
-
-	"gorm.io/gorm/clause"
-	domain "menlo.ai/jan-api-gateway/app/domain/project"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-type ProjectGormRepository struct {
-	db *transaction.Database
-}
-
-var _ domain.ProjectRepository = (*ProjectGormRepository)(nil)
-
-// AddMember implements project.ProjectRepository.
-func (repo *ProjectGormRepository) AddMember(ctx context.Context, m *domain.ProjectMember) error {
-	model := dbschema.NewSchemaProjectMember(m)
-	query := repo.db.GetQuery(ctx)
-	err := query.ProjectMember.Clauses(clause.OnConflict{
-		Columns: []clause.Column{
-			{Name: query.ProjectMember.UserID.ColumnName().String()},
-			{Name: query.ProjectMember.ProjectID.ColumnName().String()},
-		},
-		DoNothing: false,
-		UpdateAll: true,
-	}).Create(model)
-	if err != nil {
-		return err
-	}
-	m.ID = model.ID
-	return nil
-}
-
-// ListMembers implements project.ProjectRepository.
-func (repo *ProjectGormRepository) FindMembersByFilter(ctx context.Context, filter domain.ProjectMemberFilter, p *query.Pagination) ([]*domain.ProjectMember, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.ProjectMember.WithContext(ctx)
-	sql = repo.applyMemberFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.Project.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.Project.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.Project.ID.Desc())
-		} else {
-			sql = sql.Order(query.Project.ID.Asc())
-		}
-	}
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(item *dbschema.ProjectMember) *domain.ProjectMember {
-		return item.EtoD()
-	})
-	return result, nil
-}
-
-// RemoveMember implements project.ProjectRepository.
-func (repo *ProjectGormRepository) RemoveMember(ctx context.Context, projectID uint, userID uint) error {
-	panic("unimplemented")
-}
-
-// UpdateMemberRole implements project.ProjectRepository.
-func (repo *ProjectGormRepository) UpdateMemberRole(ctx context.Context, projectID uint, userID uint, role string) error {
-	panic("unimplemented")
-}
-
-// applyFilter applies conditions dynamically to the query.
-func (repo *ProjectGormRepository) applyFilter(query *gormgen.Query, sql gormgen.IProjectDo, filter domain.ProjectFilter) gormgen.IProjectDo {
-	if filter.PublicID != nil {
-		sql = sql.Where(query.Project.PublicID.Eq(*filter.PublicID))
-	}
-	if filter.Status != nil {
-		sql = sql.Where(query.Project.Status.Eq(*filter.Status))
-	}
-	if filter.OrganizationID != nil {
-		sql = sql.Where(query.Project.OrganizationID.Eq(*filter.OrganizationID))
-	}
-	if filter.Archived == ptr.ToBool(true) {
-		sql = sql.Where(query.Project.ArchivedAt.IsNotNull())
-	}
-	if filter.PublicIDs != nil {
-		sql = sql.Where(query.Project.PublicID.In(*filter.PublicIDs...))
-	}
-	if filter.MemberID != nil {
-		sql = sql.
-			Join(query.ProjectMember, query.ProjectMember.ProjectID.EqCol(query.Project.ID)).
-			Where(query.ProjectMember.UserID.Eq(*filter.MemberID))
-	}
-	return sql
-}
-
-// applyMemberFilter applies conditions dynamically to the query.
-func (repo *ProjectGormRepository) applyMemberFilter(query *gormgen.Query, sql gormgen.IProjectMemberDo, filter domain.ProjectMemberFilter) gormgen.IProjectMemberDo {
-	if filter.ProjectID != nil {
-		sql = sql.Where(query.ProjectMember.ProjectID.Eq(*filter.ProjectID))
-	}
-	if filter.UserID != nil {
-		sql = sql.Where(query.ProjectMember.UserID.Eq(*filter.UserID))
-	}
-	if filter.Role != nil {
-		sql = sql.Where(query.ProjectMember.Role.Eq(*filter.Role))
-	}
-	return sql
-}
-
-// Create persists a new project to the database.
-func (repo *ProjectGormRepository) Create(ctx context.Context, p *domain.Project) error {
-	model := dbschema.NewSchemaProject(p)
-	query := repo.db.GetQuery(ctx)
-	err := query.Project.WithContext(ctx).Create(model)
-	if err != nil {
-		return err
-	}
-	p.ID = model.ID
-	return nil
-}
-
-// Update modifies an existing project.
-func (repo *ProjectGormRepository) Update(ctx context.Context, p *domain.Project) error {
-	project := dbschema.NewSchemaProject(p)
-	query := repo.db.GetQuery(ctx)
-	return query.Project.WithContext(ctx).Save(project)
-}
-
-// DeleteByID removes a project by its ID.
-func (repo *ProjectGormRepository) DeleteByID(ctx context.Context, id uint) error {
-	return repo.db.GetTx(ctx).Delete(&dbschema.Project{}, id).Error
-}
-
-// FindByID retrieves a project by its primary key.
-func (repo *ProjectGormRepository) FindByID(ctx context.Context, id uint) (*domain.Project, error) {
-	query := repo.db.GetQuery(ctx)
-	model, err := query.Project.WithContext(ctx).Where(query.Project.ID.Eq(id)).First()
-	if err != nil {
-		return nil, err
-	}
-	return model.EtoD(), nil
-}
-
-// FindByPublicID retrieves a project by its public ID.
-func (repo *ProjectGormRepository) FindByPublicID(ctx context.Context, publicID string) (*domain.Project, error) {
-	query := repo.db.GetQuery(ctx)
-	model, err := query.Project.WithContext(ctx).Where(query.Project.PublicID.Eq(publicID)).First()
-	if err != nil {
-		return nil, err
-	}
-	return model.EtoD(), nil
-}
-
-// FindByFilter retrieves a list of projects matching filter + pagination.
-func (repo *ProjectGormRepository) FindByFilter(ctx context.Context, filter domain.ProjectFilter, p *query.Pagination) ([]*domain.Project, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.Project.WithContext(ctx)
-	sql = repo.applyFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.Project.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.Project.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.Project.ID.Desc())
-		} else {
-			sql = sql.Order(query.Project.ID.Asc())
-		}
-	}
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(item *dbschema.Project) *domain.Project {
-		return item.EtoD()
-	})
-	return result, nil
-}
-
-// Count returns number of projects that match filter.
-func (repo *ProjectGormRepository) Count(ctx context.Context, filter domain.ProjectFilter) (int64, error) {
-	query := repo.db.GetQuery(ctx)
-	q := query.Project.WithContext(ctx)
-	q = repo.applyFilter(query, q, filter)
-	return q.Count()
-}
-
-// NewProjectGormRepository creates a new Project repo instance.
-func NewProjectGormRepository(db *transaction.Database) domain.ProjectRepository {
-	return &ProjectGormRepository{
-		db: db,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/repository_provider.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/repository_provider.go
deleted file mode 100644
index 8de51130..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/repository_provider.go
+++ /dev/null
@@ -1,26 +0,0 @@
-package repository
-
-import (
-	"github.com/google/wire"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/apikeyrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/conversationrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/inviterepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/itemrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/organizationrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/projectrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/responserepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/userrepo"
-)
-
-var RepositoryProvider = wire.NewSet(
-	inviterepo.NewInviteGormRepository,
-	organizationrepo.NewOrganizationGormRepository,
-	projectrepo.NewProjectGormRepository,
-	apikeyrepo.NewApiKeyGormRepository,
-	userrepo.NewUserGormRepository,
-	conversationrepo.NewConversationGormRepository,
-	itemrepo.NewItemGormRepository,
-	responserepo.NewResponseGormRepository,
-	transaction.NewDatabase,
-)
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/responserepo/response_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/responserepo/response_repository.go
deleted file mode 100644
index 20ab28fb..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/responserepo/response_repository.go
+++ /dev/null
@@ -1,165 +0,0 @@
-package responserepo
-
-import (
-	"context"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/domain/response"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-)
-
-type ResponseGormRepository struct {
-	db *transaction.Database
-}
-
-var _ response.ResponseRepository = (*ResponseGormRepository)(nil)
-
-func NewResponseGormRepository(db *transaction.Database) response.ResponseRepository {
-	return &ResponseGormRepository{
-		db: db,
-	}
-}
-
-// Create creates a new response in the database
-func (r *ResponseGormRepository) Create(ctx context.Context, resp *response.Response) error {
-	// Generate public ID if not provided
-	if resp.PublicID == "" {
-		id, err := idgen.GenerateSecureID("resp", 42)
-		if err != nil {
-			return err
-		}
-		resp.PublicID = id
-	}
-
-	model := dbschema.NewSchemaResponse(resp)
-	if err := r.db.GetQuery(ctx).Response.WithContext(ctx).Create(model); err != nil {
-		return err
-	}
-	resp.ID = model.ID
-	return nil
-}
-
-// Update updates an existing response in the database
-func (r *ResponseGormRepository) Update(ctx context.Context, resp *response.Response) error {
-	model := dbschema.NewSchemaResponse(resp)
-	model.ID = resp.ID
-
-	query := r.db.GetQuery(ctx)
-	_, err := query.Response.WithContext(ctx).Where(query.Response.ID.Eq(resp.ID)).Updates(model)
-	return err
-}
-
-// DeleteByID deletes a response by ID
-func (r *ResponseGormRepository) DeleteByID(ctx context.Context, id uint) error {
-	query := r.db.GetQuery(ctx)
-	_, err := query.Response.WithContext(ctx).Where(query.Response.ID.Eq(id)).Delete()
-	return err
-}
-
-// FindByID finds a response by ID
-func (r *ResponseGormRepository) FindByID(ctx context.Context, id uint) (*response.Response, error) {
-	query := r.db.GetQuery(ctx)
-	model, err := query.Response.WithContext(ctx).Where(query.Response.ID.Eq(id)).First()
-	if err != nil {
-		return nil, err
-	}
-
-	return model.EtoD(), nil
-}
-
-// FindByPublicID finds a response by public ID
-func (r *ResponseGormRepository) FindByPublicID(ctx context.Context, publicID string) (*response.Response, error) {
-	query := r.db.GetQuery(ctx)
-	model, err := query.Response.WithContext(ctx).Where(query.Response.PublicID.Eq(publicID)).First()
-	if err != nil {
-		return nil, err
-	}
-
-	return model.EtoD(), nil
-}
-
-// FindByFilter finds responses by filter criteria
-func (r *ResponseGormRepository) FindByFilter(ctx context.Context, filter response.ResponseFilter, p *query.Pagination) ([]*response.Response, error) {
-	query := r.db.GetQuery(ctx)
-	sql := query.Response.WithContext(ctx)
-	sql = r.applyFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.Response.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.Response.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.Response.ID.Desc())
-		} else {
-			sql = sql.Order(query.Response.ID.Asc())
-		}
-	}
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(item *dbschema.Response) *response.Response {
-		return item.EtoD()
-	})
-	return result, nil
-}
-
-// Count counts responses by filter criteria
-func (r *ResponseGormRepository) Count(ctx context.Context, filter response.ResponseFilter) (int64, error) {
-	query := r.db.GetQuery(ctx)
-	q := query.Response.WithContext(ctx)
-	q = r.applyFilter(query, q, filter)
-	return q.Count()
-}
-
-// FindByUserID finds responses by user ID
-func (r *ResponseGormRepository) FindByUserID(ctx context.Context, userID uint, pagination *query.Pagination) ([]*response.Response, error) {
-	filter := response.ResponseFilter{UserID: &userID}
-	return r.FindByFilter(ctx, filter, pagination)
-}
-
-// FindByConversationID finds responses by conversation ID
-func (r *ResponseGormRepository) FindByConversationID(ctx context.Context, conversationID uint, pagination *query.Pagination) ([]*response.Response, error) {
-	filter := response.ResponseFilter{ConversationID: &conversationID}
-	return r.FindByFilter(ctx, filter, pagination)
-}
-
-// applyFilter applies conditions dynamically to the query
-func (r *ResponseGormRepository) applyFilter(
-	query *gormgen.Query,
-	sql gormgen.IResponseDo,
-	filter response.ResponseFilter,
-) gormgen.IResponseDo {
-	if filter.PublicID != nil {
-		sql = sql.Where(query.Response.PublicID.Eq(*filter.PublicID))
-	}
-	if filter.UserID != nil {
-		sql = sql.Where(query.Response.UserID.Eq(*filter.UserID))
-	}
-	if filter.ConversationID != nil {
-		sql = sql.Where(query.Response.ConversationID.Eq(*filter.ConversationID))
-	}
-	if filter.Model != nil {
-		sql = sql.Where(query.Response.Model.Eq(*filter.Model))
-	}
-	if filter.Status != nil {
-		sql = sql.Where(query.Response.Status.Eq(string(*filter.Status)))
-	}
-	if filter.CreatedAfter != nil {
-		sql = sql.Where(query.Response.CreatedAt.Gte(*filter.CreatedAfter))
-	}
-	if filter.CreatedBefore != nil {
-		sql = sql.Where(query.Response.CreatedAt.Lte(*filter.CreatedBefore))
-	}
-	return sql
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/userrepo/user_repository.go b/apps/jan-api-gateway/application/app/infrastructure/database/repository/userrepo/user_repository.go
deleted file mode 100644
index 7d2f5e50..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/userrepo/user_repository.go
+++ /dev/null
@@ -1,110 +0,0 @@
-package userrepo
-
-import (
-	"context"
-
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	domain "menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-)
-
-type UserGormRepository struct {
-	db *transaction.Database
-}
-
-var _ domain.UserRepository = (*UserGormRepository)(nil)
-
-func NewUserGormRepository(db *transaction.Database) domain.UserRepository {
-	return &UserGormRepository{
-		db: db,
-	}
-}
-
-func (r *UserGormRepository) Create(ctx context.Context, u *domain.User) error {
-	model := dbschema.NewSchemaUser(u)
-	if err := r.db.GetQuery(ctx).User.WithContext(ctx).Create(model); err != nil {
-		return err
-	}
-	u.ID = model.ID
-	return nil
-}
-
-func (r *UserGormRepository) FindByID(ctx context.Context, id uint) (*domain.User, error) {
-	query := r.db.GetQuery(ctx)
-	model, err := query.User.WithContext(ctx).Where(query.User.ID.Eq(id)).First()
-	if err != nil {
-		return nil, err
-	}
-
-	return model.EtoD(), nil
-}
-
-func (repo *UserGormRepository) FindFirst(ctx context.Context, filter domain.UserFilter) (*domain.User, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.User.WithContext(ctx)
-	sql = repo.applyFilter(query, sql, filter)
-	item, err := sql.First()
-	if err != nil {
-		return nil, err
-	}
-	return item.EtoD(), nil
-}
-
-func (repo *UserGormRepository) FindByFilter(ctx context.Context, filter domain.UserFilter, p *query.Pagination) ([]*domain.User, error) {
-	query := repo.db.GetQuery(ctx)
-	sql := query.User.WithContext(ctx)
-	sql = repo.applyFilter(query, sql, filter)
-	if p != nil {
-		if p.Limit != nil && *p.Limit > 0 {
-			sql = sql.Limit(*p.Limit)
-		}
-		if p.After != nil {
-			if p.Order == "desc" {
-				sql = sql.Where(query.Project.ID.Lt(*p.After))
-			} else {
-				sql = sql.Where(query.Project.ID.Gt(*p.After))
-			}
-		}
-		if p.Order == "desc" {
-			sql = sql.Order(query.Project.ID.Desc())
-		} else {
-			sql = sql.Order(query.Project.ID.Asc())
-		}
-	}
-	rows, err := sql.Find()
-	if err != nil {
-		return nil, err
-	}
-	result := functional.Map(rows, func(item *dbschema.User) *domain.User {
-		return item.EtoD()
-	})
-	return result, nil
-}
-
-// applyFilter applies conditions dynamically to the query.
-func (repo *UserGormRepository) applyFilter(query *gormgen.Query, sql gormgen.IUserDo, filter domain.UserFilter) gormgen.IUserDo {
-	if filter.PublicID != nil {
-		sql = sql.Where(query.User.PublicID.Eq(*filter.PublicID))
-	}
-	if filter.Email != nil {
-		sql = sql.Where(query.User.Email.Eq(*filter.Email))
-	}
-	if filter.Enabled != nil {
-		sql = sql.Where(query.User.Enabled.Is(*filter.Enabled))
-	}
-	if filter.OrganizationId != nil {
-		sql = sql.
-			Join(query.OrganizationMember, query.OrganizationMember.UserID.EqCol(query.User.ID)).
-			Where(query.OrganizationMember.OrganizationID.Eq(*filter.OrganizationId))
-	}
-	return sql
-}
-
-func (r *UserGormRepository) Update(ctx context.Context, u *domain.User) error {
-	user := dbschema.NewSchemaUser(u)
-	query := r.db.GetQuery(ctx)
-	return query.User.WithContext(ctx).Save(user)
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/inference/jan_inference_provider.go b/apps/jan-api-gateway/application/app/infrastructure/inference/jan_inference_provider.go
deleted file mode 100644
index e60f7001..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/inference/jan_inference_provider.go
+++ /dev/null
@@ -1,88 +0,0 @@
-package inference
-
-import (
-	"context"
-	"io"
-
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/inference"
-	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-)
-
-// JanInferenceProvider implements InferenceProvider using Jan Inference service
-type JanInferenceProvider struct {
-	client *janinference.JanInferenceClient
-}
-
-// NewJanInferenceProvider creates a new JanInferenceProvider
-func NewJanInferenceProvider(client *janinference.JanInferenceClient) inference.InferenceProvider {
-	return &JanInferenceProvider{
-		client: client,
-	}
-}
-
-// CreateCompletion creates a non-streaming chat completion
-func (p *JanInferenceProvider) CreateCompletion(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*openai.ChatCompletionResponse, error) {
-	return p.client.CreateChatCompletion(ctx, apiKey, request)
-}
-
-// CreateCompletionStream creates a streaming chat completion
-func (p *JanInferenceProvider) CreateCompletionStream(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (io.ReadCloser, error) {
-	// Create a pipe for streaming
-	reader, writer := io.Pipe()
-
-	go func() {
-		defer writer.Close()
-
-		// Use the existing streaming logic but write to pipe instead of HTTP response
-		req := janinference.JanInferenceRestyClient.R().SetBody(request)
-		resp, err := req.
-			SetContext(ctx).
-			SetDoNotParseResponse(true).
-			Post("/v1/chat/completions")
-		if err != nil {
-			writer.CloseWithError(err)
-			return
-		}
-		defer resp.RawResponse.Body.Close()
-
-		// Stream data to pipe
-		_, err = io.Copy(writer, resp.RawResponse.Body)
-		if err != nil {
-			writer.CloseWithError(err)
-		}
-	}()
-
-	return reader, nil
-}
-
-func (p *JanInferenceProvider) GetModels(ctx context.Context) (*inference.ModelsResponse, error) {
-	clientResponse, err := p.client.GetModels(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	// Convert to domain models
-	models := make([]inference.Model, len(clientResponse.Data))
-	for i, model := range clientResponse.Data {
-		models[i] = inference.Model{
-			ID:      model.ID,
-			Object:  model.Object,
-			Created: model.Created,
-			OwnedBy: model.OwnedBy,
-		}
-	}
-
-	response := &inference.ModelsResponse{
-		Object: clientResponse.Object,
-		Data:   models,
-	}
-	return response, nil
-}
-
-// ValidateModel checks if a model is supported
-func (p *JanInferenceProvider) ValidateModel(model string) error {
-	// For now, assume all models are supported by Jan Inference
-	// In the future, this could check against a list of supported models
-	return nil
-}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/infrastructure_provider.go b/apps/jan-api-gateway/application/app/infrastructure/infrastructure_provider.go
deleted file mode 100644
index 2ec98b34..00000000
--- a/apps/jan-api-gateway/application/app/infrastructure/infrastructure_provider.go
+++ /dev/null
@@ -1,16 +0,0 @@
-package infrastructure
-
-import (
-	"github.com/google/wire"
-	inferencemodelregistry "menlo.ai/jan-api-gateway/app/domain/inference_model_registry"
-	"menlo.ai/jan-api-gateway/app/infrastructure/cache"
-	"menlo.ai/jan-api-gateway/app/infrastructure/inference"
-	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-)
-
-var InfrastructureProvider = wire.NewSet(
-	janinference.NewJanInferenceClient,
-	inference.NewJanInferenceProvider,
-	cache.NewRedisCacheService,
-	inferencemodelregistry.NewInferenceModelRegistry,
-)
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/http_server.go b/apps/jan-api-gateway/application/app/interfaces/http/http_server.go
deleted file mode 100644
index 91e862a3..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/http_server.go
+++ /dev/null
@@ -1,77 +0,0 @@
-package http
-
-import (
-	"fmt"
-	"net/http"
-	"os"
-
-	"github.com/gin-gonic/gin"
-
-	"menlo.ai/jan-api-gateway/app/interfaces/http/middleware"
-	v1 "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-	"menlo.ai/jan-api-gateway/config"
-
-	swaggerFiles "github.com/swaggo/files"
-	ginSwagger "github.com/swaggo/gin-swagger"
-	_ "menlo.ai/jan-api-gateway/docs"
-)
-
-type HttpServer struct {
-	engine  *gin.Engine
-	v1Route *v1.V1Route
-}
-
-func (s *HttpServer) bindSwagger() {
-	g := s.engine.Group("/")
-
-	g.GET("/api/swagger/*any", ginSwagger.WrapHandler(swaggerFiles.Handler))
-}
-
-func (s *HttpServer) bindDev() {
-	g := s.engine.Group("/")
-
-	g.GET("/auth/googletest", func(c *gin.Context) {
-		code := c.Query("code")
-		state := c.Query("state")
-		curlCommand := fmt.Sprintf(`curl --request POST \
-  --url 'http://localhost:8080/v1/auth/google/callback' \
-  --header 'Content-Type: application/json' \
-  --cookie 'jan_oauth_state=%s' \
-  --data '{"code": "%s", "state": "%s"}'`, state, code, state)
-		c.String(http.StatusOK, curlCommand)
-	})
-
-}
-
-func NewHttpServer(v1Route *v1.V1Route) *HttpServer {
-	if os.Getenv("local_dev") == "" {
-		gin.SetMode(gin.ReleaseMode)
-	}
-	server := HttpServer{
-		gin.New(),
-		v1Route,
-	}
-	// TODO: we should enable cors later
-	server.engine.Use(middleware.CORS())
-	server.engine.Use(middleware.LoggerMiddleware(logger.Logger))
-	server.engine.Use(middleware.TransactionMiddleware())
-	server.engine.GET("/healthcheck", func(c *gin.Context) {
-		c.JSON(200, "ok")
-	})
-	server.bindSwagger()
-	if config.IsDev() {
-		server.bindDev()
-	}
-	return &server
-}
-
-func (httpServer *HttpServer) Run() error {
-	port := 8080
-	root := httpServer.engine.Group("/")
-	httpServer.v1Route.RegisterRouter(root)
-	if err := httpServer.engine.Run(fmt.Sprintf(":%d", port)); err != nil {
-		return err
-	}
-	return nil
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/middleware/cors.go b/apps/jan-api-gateway/application/app/interfaces/http/middleware/cors.go
deleted file mode 100644
index 0579e6dc..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/middleware/cors.go
+++ /dev/null
@@ -1,45 +0,0 @@
-package middleware
-
-import (
-	"net/http"
-	"strings"
-
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/config"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-func CORS() gin.HandlerFunc {
-	return func(c *gin.Context) {
-		host := c.Request.Header.Get("Origin")
-		isValidHost := false
-		for _, allowedHost := range environment_variables.EnvironmentVariables.ALLOWED_CORS_HOSTS {
-			// wildcard
-			if strings.HasPrefix(allowedHost, "*") {
-				suffix := strings.TrimPrefix(allowedHost, "*")
-				if strings.HasSuffix(host, suffix) {
-					isValidHost = true
-					break
-				}
-			}
-			if allowedHost == host {
-				isValidHost = true
-				break
-			}
-		}
-		if isValidHost || config.IsDev() {
-			c.Writer.Header().Set("Access-Control-Allow-Origin", host)
-			c.Writer.Header().Set("Access-Control-Allow-Credentials", "true")
-			c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, accept, origin, Cache-Control, X-Requested-With, MCP-Protocol-Version, Mcp-Session-Id, X-User-ID, X-User-Email, X-User-Role, MCP-Client-Id, X-Request-Id")
-			c.Writer.Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS, GET, PUT, PATCH, DELETE")
-			c.Writer.Header().Set("Access-Control-Expose-Headers", "Mcp-Session-Id")
-			c.Writer.Header().Set("Vary", "Origin")
-		}
-
-		if c.Request.Method == "OPTIONS" {
-			c.AbortWithStatus(http.StatusNoContent)
-			return
-		}
-		c.Next()
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/middleware/jwt.go b/apps/jan-api-gateway/application/app/interfaces/http/middleware/jwt.go
deleted file mode 100644
index 1c3444e4..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/middleware/jwt.go
+++ /dev/null
@@ -1,71 +0,0 @@
-package middleware
-
-import (
-	"net/http"
-
-	"github.com/gin-gonic/gin"
-	"github.com/golang-jwt/jwt/v5"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/requests"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-// Todo: Deprecated
-func AuthMiddleware() gin.HandlerFunc {
-	return func(c *gin.Context) {
-		tokenString, ok := requests.GetTokenFromBearer(c)
-		if !ok {
-			c.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "c6d6bafd-b9f3-4ebb-9c90-a21b07308ebc",
-			})
-			return
-		}
-		token, err := jwt.ParseWithClaims(tokenString, &auth.UserClaim{}, func(token *jwt.Token) (interface{}, error) {
-			return environment_variables.EnvironmentVariables.JWT_SECRET, nil
-		})
-		if err != nil || !token.Valid {
-			c.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "9d7a21c4-d94c-4451-841b-4d9333f86942",
-			})
-			return
-		}
-
-		claims, ok := token.Claims.(*auth.UserClaim)
-		if !ok {
-			c.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "6cc0aa26-148d-4b8d-8f53-9d47b2a00ef1",
-			})
-			return
-		}
-
-		c.Set(auth.ContextUserClaim, claims)
-		c.Next()
-	}
-}
-
-func OptionalAuthMiddleware() gin.HandlerFunc {
-	return func(c *gin.Context) {
-		tokenString, ok := requests.GetTokenFromBearer(c)
-		if !ok {
-			c.Next()
-			return
-		}
-		token, err := jwt.ParseWithClaims(tokenString, &auth.UserClaim{}, func(token *jwt.Token) (interface{}, error) {
-			return environment_variables.EnvironmentVariables.JWT_SECRET, nil
-		})
-		if err != nil || !token.Valid {
-			c.Next()
-			return
-		}
-
-		claims, ok := token.Claims.(*auth.UserClaim)
-		if !ok {
-			c.Next()
-			return
-		}
-
-		c.Set(auth.ContextUserClaim, claims)
-		c.Next()
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/middleware/logger.go b/apps/jan-api-gateway/application/app/interfaces/http/middleware/logger.go
deleted file mode 100644
index f9f1fb4f..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/middleware/logger.go
+++ /dev/null
@@ -1,76 +0,0 @@
-package middleware
-
-import (
-	"bytes"
-	"context"
-	"io"
-	"strings"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	"github.com/google/uuid"
-	"github.com/sirupsen/logrus"
-	"menlo.ai/jan-api-gateway/app/utils/contextkeys"
-)
-
-type BodyLogWriter struct {
-	gin.ResponseWriter
-	body *bytes.Buffer
-}
-
-func (w BodyLogWriter) Write(b []byte) (int, error) {
-	w.body.Write(b) // capture response
-	return w.ResponseWriter.Write(b)
-}
-
-func LoggerMiddleware(logger *logrus.Logger) gin.HandlerFunc {
-	return func(c *gin.Context) {
-		start := time.Now()
-
-		// Generate and set request ID
-		requestID := uuid.New().String()
-		ctx := c.Request.Context()
-		ctx = context.WithValue(ctx, contextkeys.RequestId{}, requestID)
-		c.Request = c.Request.WithContext(ctx)
-		c.Writer.Header().Set("X-Request-ID", requestID)
-
-		// Read request body
-		var reqBody []byte
-		if c.Request.Body != nil {
-			reqBody, _ = io.ReadAll(c.Request.Body)
-			// Restore body so Gin can read it again
-			c.Request.Body = io.NopCloser(bytes.NewBuffer(reqBody))
-		}
-
-		// Wrap writer only if not streaming
-		var blw = &BodyLogWriter{body: bytes.NewBufferString(""), ResponseWriter: c.Writer}
-		c.Writer = blw
-
-		// Process request
-		c.Next()
-		contentType := c.Writer.Header().Get("Content-Type")
-		isStream := strings.HasPrefix(contentType, "text/event-stream") ||
-			strings.HasPrefix(contentType, "application/octet-stream") ||
-			strings.HasPrefix(contentType, "application/x-ndjson")
-
-		// Log everything
-		duration := time.Since(start)
-		responseBody := ""
-		if !isStream {
-			responseBody = blw.body.String()
-		}
-		logger.WithFields(logrus.Fields{
-			"request_id": requestID,
-			"status":     c.Writer.Status(),
-			"method":     c.Request.Method,
-			"host":       c.Request.Host,
-			"path":       c.Request.URL.Path,
-			"query":      c.Request.URL.RawQuery,
-			"headers":    c.Request.Header,
-			"req_body":   string(reqBody),
-			"resp_body":  responseBody,
-			"latency":    duration.String(),
-			"client_ip":  c.ClientIP(),
-		}).Info("")
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/middleware/transaction.go b/apps/jan-api-gateway/application/app/interfaces/http/middleware/transaction.go
deleted file mode 100644
index 63376c81..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/middleware/transaction.go
+++ /dev/null
@@ -1,37 +0,0 @@
-package middleware
-
-import (
-	"net/http"
-
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-)
-
-func TransactionMiddleware() gin.HandlerFunc {
-	return func(c *gin.Context) {
-		tx := database.DB.Begin()
-		defer func() {
-			if r := recover(); r != nil {
-				tx.Rollback()
-			}
-		}()
-		ctxWithTx := transaction.WithTx(c.Request.Context(), tx)
-		c.Request = c.Request.WithContext(ctxWithTx)
-		c.Next()
-
-		if c.IsAborted() {
-			tx.Rollback()
-			return
-		}
-
-		if err := tx.Commit().Error; err != nil {
-			tx.Rollback()
-			c.JSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code: "a5a38af2-1605-4f58-a89c-fa3ff390d4db",
-			})
-			return
-		}
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/requests/request.go b/apps/jan-api-gateway/application/app/interfaces/http/requests/request.go
deleted file mode 100644
index 3ab4250d..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/requests/request.go
+++ /dev/null
@@ -1,32 +0,0 @@
-package requests
-
-import (
-	"fmt"
-	"strconv"
-	"strings"
-
-	"github.com/gin-gonic/gin"
-)
-
-func GetIntParam(reqCtx *gin.Context, paramName string) (int, error) {
-	param := reqCtx.Param(paramName)
-	if param == "" {
-		return 0, fmt.Errorf("invalid param")
-	}
-	value, err := strconv.Atoi(param)
-	return value, err
-}
-
-func GetTokenFromBearer(c *gin.Context) (string, bool) {
-	authHeader := c.GetHeader("Authorization")
-	if authHeader == "" {
-		return "", false
-	}
-
-	if !strings.HasPrefix(authHeader, "Bearer ") {
-		return "", false
-	}
-
-	token := strings.TrimPrefix(authHeader, "Bearer ")
-	return token, true
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/requests/response_requests.go b/apps/jan-api-gateway/application/app/interfaces/http/requests/response_requests.go
deleted file mode 100644
index 2aaf6df1..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/requests/response_requests.go
+++ /dev/null
@@ -1,244 +0,0 @@
-package requests
-
-// CreateResponseRequest represents the request body for creating a response
-// Reference: https://platform.openai.com/docs/api-reference/responses/create
-type CreateResponseRequest struct {
-	// The ID of the model to use for this response.
-	Model string `json:"model" binding:"required"`
-
-	// The input to the model. Can be a string or array of strings.
-	Input any `json:"input" binding:"required"`
-
-	// The system prompt to use for this response.
-	SystemPrompt *string `json:"system_prompt,omitempty"`
-
-	// The maximum number of tokens to generate.
-	MaxTokens *int `json:"max_tokens,omitempty"`
-
-	// The temperature to use for this response.
-	Temperature *float64 `json:"temperature,omitempty"`
-
-	// The top_p to use for this response.
-	TopP *float64 `json:"top_p,omitempty"`
-
-	// The top_k to use for this response.
-	TopK *int `json:"top_k,omitempty"`
-
-	// The repetition penalty to use for this response.
-	RepetitionPenalty *float64 `json:"repetition_penalty,omitempty"`
-
-	// The seed to use for this response.
-	Seed *int `json:"seed,omitempty"`
-
-	// The stop sequences to use for this response.
-	Stop []string `json:"stop,omitempty"`
-
-	// The presence penalty to use for this response.
-	PresencePenalty *float64 `json:"presence_penalty,omitempty"`
-
-	// The frequency penalty to use for this response.
-	FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
-
-	// The logit bias to use for this response.
-	LogitBias map[string]float64 `json:"logit_bias,omitempty"`
-
-	// The response format to use for this response.
-	ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
-
-	// The tools to use for this response.
-	Tools []Tool `json:"tools,omitempty"`
-
-	// The tool choice to use for this response.
-	ToolChoice *ToolChoice `json:"tool_choice,omitempty"`
-
-	// The metadata to use for this response.
-	Metadata map[string]any `json:"metadata,omitempty"`
-
-	// Whether to stream the response.
-	Stream *bool `json:"stream,omitempty"`
-
-	// Whether to run the response in the background.
-	Background *bool `json:"background,omitempty"`
-
-	// The timeout in seconds for this response.
-	Timeout *int `json:"timeout,omitempty"`
-
-	// The user to use for this response.
-	User *string `json:"user,omitempty"`
-
-	// The conversation ID to append items to. If not set or set to ClientCreatedRootConversationID, a new conversation will be created.
-	Conversation *string `json:"conversation,omitempty"`
-
-	// The ID of the previous response to continue from. If set, the conversation will be loaded from the previous response.
-	PreviousResponseID *string `json:"previous_response_id,omitempty"`
-
-	// Whether to store the conversation. If false, no conversation will be created or used.
-	Store *bool `json:"store,omitempty"`
-}
-
-// CreateResponseInput represents the input to the model
-type CreateResponseInput struct {
-	// The type of input.
-	Type InputType `json:"type" binding:"required"`
-
-	// The text input (required for text type).
-	Text *string `json:"text,omitempty"`
-
-	// The image input (required for image type).
-	Image *ImageInput `json:"image,omitempty"`
-
-	// The file input (required for file type).
-	File *FileInput `json:"file,omitempty"`
-
-	// The web search input (required for web_search type).
-	WebSearch *WebSearchInput `json:"web_search,omitempty"`
-
-	// The file search input (required for file_search type).
-	FileSearch *FileSearchInput `json:"file_search,omitempty"`
-
-	// The streaming input (required for streaming type).
-	Streaming *StreamingInput `json:"streaming,omitempty"`
-
-	// The function calls input (required for function_calls type).
-	FunctionCalls *FunctionCallsInput `json:"function_calls,omitempty"`
-
-	// The reasoning input (required for reasoning type).
-	Reasoning *ReasoningInput `json:"reasoning,omitempty"`
-}
-
-// InputType represents the type of input
-type InputType string
-
-const (
-	InputTypeText          InputType = "text"
-	InputTypeImage         InputType = "image"
-	InputTypeFile          InputType = "file"
-	InputTypeWebSearch     InputType = "web_search"
-	InputTypeFileSearch    InputType = "file_search"
-	InputTypeStreaming     InputType = "streaming"
-	InputTypeFunctionCalls InputType = "function_calls"
-	InputTypeReasoning     InputType = "reasoning"
-)
-
-// ImageInput represents an image input
-type ImageInput struct {
-	// The URL of the image.
-	URL *string `json:"url,omitempty"`
-
-	// The base64 encoded image data.
-	Data *string `json:"data,omitempty"`
-
-	// The detail level for the image.
-	Detail *string `json:"detail,omitempty"`
-}
-
-// FileInput represents a file input
-type FileInput struct {
-	// The ID of the file.
-	FileID string `json:"file_id" binding:"required"`
-}
-
-// WebSearchInput represents a web search input
-type WebSearchInput struct {
-	// The query to search for.
-	Query string `json:"query" binding:"required"`
-
-	// The number of results to return.
-	MaxResults *int `json:"max_results,omitempty"`
-
-	// The search engine to use.
-	SearchEngine *string `json:"search_engine,omitempty"`
-}
-
-// FileSearchInput represents a file search input
-type FileSearchInput struct {
-	// The query to search for.
-	Query string `json:"query" binding:"required"`
-
-	// The IDs of the files to search in.
-	FileIDs []string `json:"file_ids" binding:"required"`
-
-	// The number of results to return.
-	MaxResults *int `json:"max_results,omitempty"`
-}
-
-// StreamingInput represents a streaming input
-type StreamingInput struct {
-	// The URL to stream from.
-	URL string `json:"url" binding:"required"`
-
-	// The headers to send with the request.
-	Headers map[string]string `json:"headers,omitempty"`
-
-	// The method to use for the request.
-	Method *string `json:"method,omitempty"`
-
-	// The body to send with the request.
-	Body *string `json:"body,omitempty"`
-}
-
-// FunctionCallsInput represents function calls input
-type FunctionCallsInput struct {
-	// The function calls to make.
-	Calls []FunctionCall `json:"calls" binding:"required"`
-}
-
-// FunctionCall represents a function call
-type FunctionCall struct {
-	// The name of the function to call.
-	Name string `json:"name" binding:"required"`
-
-	// The arguments to pass to the function.
-	Arguments map[string]any `json:"arguments,omitempty"`
-}
-
-// ReasoningInput represents a reasoning input
-type ReasoningInput struct {
-	// The reasoning task to perform.
-	Task string `json:"task" binding:"required"`
-
-	// The context for the reasoning task.
-	Context *string `json:"context,omitempty"`
-}
-
-// ResponseFormat represents the format of the response
-type ResponseFormat struct {
-	// The type of response format.
-	Type string `json:"type" binding:"required"`
-}
-
-// Tool represents a tool that can be used by the model
-type Tool struct {
-	// The type of tool.
-	Type string `json:"type" binding:"required"`
-
-	// The function definition for function tools.
-	Function *FunctionDefinition `json:"function,omitempty"`
-}
-
-// FunctionDefinition represents a function definition
-type FunctionDefinition struct {
-	// The name of the function.
-	Name string `json:"name" binding:"required"`
-
-	// The description of the function.
-	Description *string `json:"description,omitempty"`
-
-	// The parameters of the function.
-	Parameters map[string]any `json:"parameters,omitempty"`
-}
-
-// ToolChoice represents the tool choice for the model
-type ToolChoice struct {
-	// The type of tool choice.
-	Type string `json:"type" binding:"required"`
-
-	// The function to use for function tool choice.
-	Function *FunctionChoice `json:"function,omitempty"`
-}
-
-// FunctionChoice represents a function choice
-type FunctionChoice struct {
-	// The name of the function.
-	Name string `json:"name" binding:"required"`
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/responses/openai/response.go b/apps/jan-api-gateway/application/app/interfaces/http/responses/openai/response.go
deleted file mode 100644
index 7e02c292..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/responses/openai/response.go
+++ /dev/null
@@ -1,46 +0,0 @@
-package openai
-
-type ObjectKey string
-
-const (
-	ObjectKeyAdminApiKey ObjectKey = "organization.admin_api_key"
-	ObjectKeyProject     ObjectKey = "organization.project"
-)
-
-type ApikeyType string
-
-const (
-	ApikeyTypeUser ApikeyType = "user"
-)
-
-type OwnerObject string
-
-const (
-	OwnerObjectOrganizationUser OwnerObject = "organization.user"
-)
-
-type OwnerRole string
-
-const (
-	OwnerRoleOwner OwnerObject = "owner"
-)
-
-// @Enum(list)
-type ObjectTypeList string
-
-const ObjectTypeListList ObjectTypeList = "list"
-
-type ListResponse[T any] struct {
-	Object  ObjectTypeList `json:"object"`
-	Data    []T            `json:"data"`
-	FirstID *string        `json:"first_id"`
-	LastID  *string        `json:"last_id"`
-	HasMore bool           `json:"has_more"`
-	Total   int64          `json:"total"`
-}
-
-type DeleteResponse struct {
-	Object  string `json:"object"`
-	ID      string `json:"id"`
-	Deleted bool   `json:"deleted"`
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/responses/response.go b/apps/jan-api-gateway/application/app/interfaces/http/responses/response.go
deleted file mode 100644
index b2dcecfc..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/responses/response.go
+++ /dev/null
@@ -1,105 +0,0 @@
-package responses
-
-import (
-	"net/http"
-	"time"
-
-	"menlo.ai/jan-api-gateway/config"
-)
-
-type ErrorResponse struct {
-	Code          string `json:"code"`
-	Error         string `json:"error"`
-	ErrorInstance error  `json:"-"`
-}
-
-type GeneralResponse[T any] struct {
-	Status string `json:"status"`
-	Result T      `json:"result"`
-}
-
-type ListResponse[T any] struct {
-	Status  string  `json:"status"`
-	Total   int64   `json:"total"`
-	Results []T     `json:"results"`
-	FirstID *string `json:"first_id"`
-	LastID  *string `json:"last_id"`
-	HasMore bool    `json:"has_more"`
-}
-
-// OpenAIListResponse includes common fields and inline embedding
-// All fields of T will be promoted to the top level of the JSON response
-// All fields except T are nullable (omitempty)
-type OpenAIListResponse[T any] struct {
-	JanStatus *string     `json:"jan_status,omitempty"`
-	Object    *ObjectType `json:"object"`
-	FirstID   *string     `json:"first_id,omitempty"`
-	LastID    *string     `json:"last_id,omitempty"`
-	HasMore   *bool       `json:"has_more,omitempty"`
-	T         []T         `json:"data,inline"` // Inline T - all fields of T will be at the top level
-}
-
-// ObjectType represents the type of object in responses
-type ObjectType string
-
-const (
-	ObjectTypeResponse ObjectType = "response"
-	ObjectTypeList     ObjectType = "list"
-)
-
-const ResponseCodeOk = "000000"
-
-type PageCursor struct {
-	FirstID *string
-	LastID  *string
-	HasMore bool
-	Total   int64
-}
-
-func BuildCursorPage[T any](
-	items []*T,
-	getID func(*T) *string,
-	hasMoreFunc func() ([]*T, error),
-	CountFunc func() (int64, error),
-) (*PageCursor, error) {
-	cursorPage := &PageCursor{}
-	if len(items) > 0 {
-		cursorPage.FirstID = getID(items[0])
-		cursorPage.LastID = getID(items[len(items)-1])
-		moreRecords, err := hasMoreFunc()
-		if len(moreRecords) > 0 {
-			cursorPage.HasMore = true
-		}
-		if err != nil {
-			return nil, err
-		}
-	}
-	count, err := CountFunc()
-	if err != nil {
-		return cursorPage, err
-	}
-	cursorPage.Total = count
-	return cursorPage, nil
-}
-
-func NewCookieWithSecurity(name string, value string, expires time.Time) *http.Cookie {
-	if config.IsDev() {
-		return &http.Cookie{
-			Name:     name,
-			Value:    value,
-			Expires:  expires,
-			HttpOnly: false,
-			Secure:   false,
-			Path:     "/",
-		}
-	}
-	return &http.Cookie{
-		Name:     name,
-		Value:    value,
-		Expires:  expires,
-		HttpOnly: true,
-		Secure:   true,
-		Path:     "/",
-		SameSite: http.SameSiteStrictMode,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/responses/response_nonstreaming.go b/apps/jan-api-gateway/application/app/interfaces/http/responses/response_nonstreaming.go
deleted file mode 100644
index 718fe23c..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/responses/response_nonstreaming.go
+++ /dev/null
@@ -1,477 +0,0 @@
-package responses
-
-import (
-	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
-)
-
-// Response represents a model response
-// Reference: https://platform.openai.com/docs/api-reference/responses
-type Response struct {
-	// The unique identifier for the response.
-	ID string `json:"id"`
-
-	// The object type, which is always "response".
-	Object string `json:"object"`
-
-	// The Unix timestamp (in seconds) when the response was created.
-	Created int64 `json:"created"`
-
-	// The ID of the model used for this response.
-	Model string `json:"model"`
-
-	// The status of the response.
-	Status ResponseStatus `json:"status"`
-
-	// The input that was provided to the model. Can be a string or array of strings.
-	Input any `json:"input"`
-
-	// The output generated by the model.
-	Output any `json:"output,omitempty"`
-
-	// The system prompt that was used for this response.
-	SystemPrompt *string `json:"system_prompt,omitempty"`
-
-	// The maximum number of tokens that were generated.
-	MaxTokens *int `json:"max_tokens,omitempty"`
-
-	// The temperature that was used for this response.
-	Temperature *float64 `json:"temperature,omitempty"`
-
-	// The top_p that was used for this response.
-	TopP *float64 `json:"top_p,omitempty"`
-
-	// The top_k that was used for this response.
-	TopK *int `json:"top_k,omitempty"`
-
-	// The repetition penalty that was used for this response.
-	RepetitionPenalty *float64 `json:"repetition_penalty,omitempty"`
-
-	// The seed that was used for this response.
-	Seed *int `json:"seed,omitempty"`
-
-	// The stop sequences that were used for this response.
-	Stop []string `json:"stop,omitempty"`
-
-	// The presence penalty that was used for this response.
-	PresencePenalty *float64 `json:"presence_penalty,omitempty"`
-
-	// The frequency penalty that was used for this response.
-	FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
-
-	// The logit bias that was used for this response.
-	LogitBias map[string]float64 `json:"logit_bias,omitempty"`
-
-	// The response format that was used for this response.
-	ResponseFormat *requesttypes.ResponseFormat `json:"response_format,omitempty"`
-
-	// The tools that were used for this response.
-	Tools []requesttypes.Tool `json:"tools,omitempty"`
-
-	// The tool choice that was used for this response.
-	ToolChoice *requesttypes.ToolChoice `json:"tool_choice,omitempty"`
-
-	// The metadata that was provided for this response.
-	Metadata map[string]any `json:"metadata,omitempty"`
-
-	// Whether the response was streamed.
-	Stream *bool `json:"stream,omitempty"`
-
-	// Whether the response was run in the background.
-	Background *bool `json:"background,omitempty"`
-
-	// The timeout in seconds that was used for this response.
-	Timeout *int `json:"timeout,omitempty"`
-
-	// The user that was provided for this response.
-	User *string `json:"user,omitempty"`
-
-	// The conversation that this response belongs to.
-	Conversation *ConversationInfo `json:"conversation,omitempty"`
-
-	// The usage statistics for this response.
-	Usage *DetailedUsage `json:"usage,omitempty"`
-
-	// The error that occurred during processing, if any.
-	Error *ResponseError `json:"error,omitempty"`
-
-	// The Unix timestamp (in seconds) when the response was completed.
-	CompletedAt *int64 `json:"completed_at,omitempty"`
-
-	// The Unix timestamp (in seconds) when the response was cancelled.
-	CancelledAt *int64 `json:"cancelled_at,omitempty"`
-
-	// The Unix timestamp (in seconds) when the response was failed.
-	FailedAt *int64 `json:"failed_at,omitempty"`
-
-	// OpenAI API response fields
-	IncompleteDetails  any         `json:"incomplete_details,omitempty"`
-	Instructions       any         `json:"instructions,omitempty"`
-	MaxOutputTokens    *int        `json:"max_output_tokens,omitempty"`
-	ParallelToolCalls  bool        `json:"parallel_tool_calls,omitempty"`
-	PreviousResponseID *string     `json:"previous_response_id,omitempty"`
-	Reasoning          *Reasoning  `json:"reasoning,omitempty"`
-	Store              bool        `json:"store,omitempty"`
-	Text               *TextFormat `json:"text,omitempty"`
-	Truncation         string      `json:"truncation,omitempty"`
-}
-
-// ResponseStatus represents the status of a response
-type ResponseStatus string
-
-const (
-	ResponseStatusPending   ResponseStatus = "pending"
-	ResponseStatusRunning   ResponseStatus = "running"
-	ResponseStatusCompleted ResponseStatus = "completed"
-	ResponseStatusCancelled ResponseStatus = "cancelled"
-	ResponseStatusFailed    ResponseStatus = "failed"
-)
-
-// ResponseOutput represents the output generated by the model
-type ResponseOutput struct {
-	// The type of output.
-	Type OutputType `json:"type"`
-
-	// The text output (for text type).
-	Text *TextOutput `json:"text,omitempty"`
-
-	// The image output (for image type).
-	Image *ImageOutput `json:"image,omitempty"`
-
-	// The file output (for file type).
-	File *FileOutput `json:"file,omitempty"`
-
-	// The web search output (for web_search type).
-	WebSearch *WebSearchOutput `json:"web_search,omitempty"`
-
-	// The file search output (for file_search type).
-	FileSearch *FileSearchOutput `json:"file_search,omitempty"`
-
-	// The streaming output (for streaming type).
-	Streaming *StreamingOutput `json:"streaming,omitempty"`
-
-	// The function calls output (for function_calls type).
-	FunctionCalls *FunctionCallsOutput `json:"function_calls,omitempty"`
-
-	// The reasoning output (for reasoning type).
-	Reasoning *ReasoningOutput `json:"reasoning,omitempty"`
-}
-
-// OutputType represents the type of output
-type OutputType string
-
-const (
-	OutputTypeText          OutputType = "text"
-	OutputTypeImage         OutputType = "image"
-	OutputTypeFile          OutputType = "file"
-	OutputTypeWebSearch     OutputType = "web_search"
-	OutputTypeFileSearch    OutputType = "file_search"
-	OutputTypeStreaming     OutputType = "streaming"
-	OutputTypeFunctionCalls OutputType = "function_calls"
-	OutputTypeReasoning     OutputType = "reasoning"
-)
-
-// TextOutput represents a text output
-type TextOutput struct {
-	// The generated text.
-	Value string `json:"value"`
-
-	// The annotations for the text.
-	Annotations []Annotation `json:"annotations,omitempty"`
-}
-
-// ImageOutput represents an image output
-type ImageOutput struct {
-	// The URL of the generated image.
-	URL string `json:"url"`
-
-	// The format of the image.
-	Format string `json:"format"`
-
-	// The size of the image.
-	Size *ImageSize `json:"size,omitempty"`
-}
-
-// FileOutput represents a file output
-type FileOutput struct {
-	// The ID of the generated file.
-	FileID string `json:"file_id"`
-
-	// The name of the file.
-	Name string `json:"name"`
-
-	// The size of the file in bytes.
-	Size int64 `json:"size"`
-
-	// The MIME type of the file.
-	MimeType string `json:"mime_type"`
-}
-
-// WebSearchOutput represents a web search output
-type WebSearchOutput struct {
-	// The search results.
-	Results []WebSearchResult `json:"results"`
-
-	// The search query that was used.
-	Query string `json:"query"`
-}
-
-// WebSearchResult represents a web search result
-type WebSearchResult struct {
-	// The title of the result.
-	Title string `json:"title"`
-
-	// The URL of the result.
-	URL string `json:"url"`
-
-	// The snippet of the result.
-	Snippet string `json:"snippet"`
-
-	// The source of the result.
-	Source *string `json:"source,omitempty"`
-}
-
-// FileSearchOutput represents a file search output
-type FileSearchOutput struct {
-	// The search results.
-	Results []FileSearchResult `json:"results"`
-
-	// The search query that was used.
-	Query string `json:"query"`
-}
-
-// FileSearchResult represents a file search result
-type FileSearchResult struct {
-	// The ID of the file.
-	FileID string `json:"file_id"`
-
-	// The name of the file.
-	Name string `json:"name"`
-
-	// The snippet of the result.
-	Snippet string `json:"snippet"`
-
-	// The score of the result.
-	Score float64 `json:"score"`
-}
-
-// StreamingOutput represents a streaming output
-type StreamingOutput struct {
-	// The URL that was streamed from.
-	URL string `json:"url"`
-
-	// The data that was received.
-	Data string `json:"data"`
-
-	// The status code of the response.
-	StatusCode int `json:"status_code"`
-
-	// The headers of the response.
-	Headers map[string]string `json:"headers"`
-}
-
-// FunctionCallsOutput represents function calls output
-type FunctionCallsOutput struct {
-	// The function calls that were made.
-	Calls []FunctionCallResult `json:"calls"`
-}
-
-// FunctionCallResult represents a function call result
-type FunctionCallResult struct {
-	// The name of the function that was called.
-	Name string `json:"name"`
-
-	// The arguments that were passed to the function.
-	Arguments map[string]any `json:"arguments"`
-
-	// The result of the function call.
-	Result any `json:"result"`
-
-	// The error that occurred during the function call, if any.
-	Error *string `json:"error,omitempty"`
-}
-
-// ReasoningOutput represents a reasoning output
-type ReasoningOutput struct {
-	// The reasoning task that was performed.
-	Task string `json:"task"`
-
-	// The result of the reasoning task.
-	Result string `json:"result"`
-
-	// The steps taken during reasoning.
-	Steps []ReasoningStep `json:"steps,omitempty"`
-}
-
-// ReasoningStep represents a step in reasoning
-type ReasoningStep struct {
-	// The step number.
-	Step int `json:"step"`
-
-	// The description of the step.
-	Description string `json:"description"`
-
-	// The result of the step.
-	Result string `json:"result"`
-}
-
-// Annotation represents an annotation for text
-type Annotation struct {
-	// The type of annotation.
-	Type string `json:"type"`
-
-	// The start index of the annotation.
-	StartIndex int `json:"start_index"`
-
-	// The end index of the annotation.
-	EndIndex int `json:"end_index"`
-
-	// The text of the annotation.
-	Text string `json:"text"`
-
-	// The metadata for the annotation.
-	Metadata map[string]any `json:"metadata,omitempty"`
-}
-
-// ImageSize represents the size of an image
-type ImageSize struct {
-	// The width of the image in pixels.
-	Width int `json:"width"`
-
-	// The height of the image in pixels.
-	Height int `json:"height"`
-}
-
-// Usage represents usage statistics
-type Usage struct {
-	// The number of tokens in the prompt.
-	PromptTokens int `json:"prompt_tokens"`
-
-	// The number of tokens in the completion.
-	CompletionTokens int `json:"completion_tokens"`
-
-	// The total number of tokens used.
-	TotalTokens int `json:"total_tokens"`
-}
-
-// ResponseError represents an error that occurred during processing
-type ResponseError struct {
-	// The error code.
-	Code string `json:"code"`
-
-	// The error message.
-	Message string `json:"message"`
-
-	// The error details.
-	Details map[string]any `json:"details,omitempty"`
-}
-
-// InputItem represents an input item for a response
-type InputItem struct {
-	// The unique identifier for the input item.
-	ID string `json:"id"`
-
-	// The object type, which is always "input_item".
-	Object string `json:"object"`
-
-	// The Unix timestamp (in seconds) when the input item was created.
-	Created int64 `json:"created"`
-
-	// The type of input item.
-	Type requesttypes.InputType `json:"type"`
-
-	// The text content (for text type).
-	Text *string `json:"text,omitempty"`
-
-	// The image content (for image type).
-	Image *requesttypes.ImageInput `json:"image,omitempty"`
-
-	// The file content (for file type).
-	File *requesttypes.FileInput `json:"file,omitempty"`
-
-	// The web search content (for web_search type).
-	WebSearch *requesttypes.WebSearchInput `json:"web_search,omitempty"`
-
-	// The file search content (for file_search type).
-	FileSearch *requesttypes.FileSearchInput `json:"file_search,omitempty"`
-
-	// The streaming content (for streaming type).
-	Streaming *requesttypes.StreamingInput `json:"streaming,omitempty"`
-
-	// The function calls content (for function_calls type).
-	FunctionCalls *requesttypes.FunctionCallsInput `json:"function_calls,omitempty"`
-
-	// The reasoning content (for reasoning type).
-	Reasoning *requesttypes.ReasoningInput `json:"reasoning,omitempty"`
-}
-
-// ListInputItemsResponse represents the response for listing input items
-type ListInputItemsResponse struct {
-	// The object type, which is always "list".
-	Object string `json:"object"`
-
-	// The list of input items.
-	Data []InputItem `json:"data"`
-
-	// The first ID in the list.
-	FirstID *string `json:"first_id,omitempty"`
-
-	// The last ID in the list.
-	LastID *string `json:"last_id,omitempty"`
-
-	// Whether there are more items available.
-	HasMore bool `json:"has_more"`
-}
-
-// ConversationInfo represents the conversation that a response belongs to
-type ConversationInfo struct {
-	// The unique ID of the conversation.
-	ID string `json:"id"`
-}
-
-// DetailedUsage represents detailed usage statistics with token details
-type DetailedUsage struct {
-	// The number of tokens in the prompt.
-	InputTokens int `json:"input_tokens"`
-
-	// The number of tokens in the completion.
-	OutputTokens int `json:"output_tokens"`
-
-	// The total number of tokens used.
-	TotalTokens int `json:"total_tokens"`
-
-	// Details about input tokens.
-	InputTokensDetails *TokenDetails `json:"input_tokens_details,omitempty"`
-
-	// Details about output tokens.
-	OutputTokensDetails *TokenDetails `json:"output_tokens_details,omitempty"`
-}
-
-// TokenDetails represents detailed token information
-type TokenDetails struct {
-	// The number of cached tokens.
-	CachedTokens int `json:"cached_tokens,omitempty"`
-
-	// The number of reasoning tokens.
-	ReasoningTokens int `json:"reasoning_tokens,omitempty"`
-}
-
-// Reasoning represents reasoning information
-type Reasoning struct {
-	// The effort level for reasoning.
-	Effort *string `json:"effort,omitempty"`
-
-	// The summary of reasoning.
-	Summary *string `json:"summary,omitempty"`
-}
-
-// TextFormat represents text format information
-type TextFormat struct {
-	// The format type.
-	Format *FormatType `json:"format,omitempty"`
-}
-
-// FormatType represents the type of format
-type FormatType struct {
-	// The type of format.
-	Type string `json:"type"`
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/responses/response_streaming.go b/apps/jan-api-gateway/application/app/interfaces/http/responses/response_streaming.go
deleted file mode 100644
index 897bfd3d..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/responses/response_streaming.go
+++ /dev/null
@@ -1,505 +0,0 @@
-package responses
-
-// BaseStreamingEvent represents the base structure for all streaming events
-type BaseStreamingEvent struct {
-	// The type of event.
-	Type string `json:"type"`
-
-	// The sequence number of the event.
-	SequenceNumber int `json:"sequence_number"`
-}
-
-// ResponseCreatedEvent represents a response.created event
-type ResponseCreatedEvent struct {
-	BaseStreamingEvent
-	Response Response `json:"response"`
-}
-
-// ResponseInProgressEvent represents a response.in_progress event
-type ResponseInProgressEvent struct {
-	BaseStreamingEvent
-	Response map[string]any `json:"response"`
-}
-
-// ResponseOutputItemAddedEvent represents a response.output_item.added event
-type ResponseOutputItemAddedEvent struct {
-	BaseStreamingEvent
-	OutputIndex int                `json:"output_index"`
-	Item        ResponseOutputItem `json:"item"`
-}
-
-// ResponseContentPartAddedEvent represents a response.content_part.added event
-type ResponseContentPartAddedEvent struct {
-	BaseStreamingEvent
-	ItemID       string              `json:"item_id"`
-	OutputIndex  int                 `json:"output_index"`
-	ContentIndex int                 `json:"content_index"`
-	Part         ResponseContentPart `json:"part"`
-}
-
-// ResponseOutputTextDeltaEvent represents a response.output_text.delta event
-type ResponseOutputTextDeltaEvent struct {
-	BaseStreamingEvent
-	ItemID       string    `json:"item_id"`
-	OutputIndex  int       `json:"output_index"`
-	ContentIndex int       `json:"content_index"`
-	Delta        string    `json:"delta"`
-	Logprobs     []Logprob `json:"logprobs"`
-	Obfuscation  string    `json:"obfuscation"`
-}
-
-// ResponseOutputItem represents an output item
-type ResponseOutputItem struct {
-	ID      string                `json:"id"`
-	Type    string                `json:"type"`
-	Status  string                `json:"status"`
-	Content []ResponseContentPart `json:"content"`
-	Role    string                `json:"role"`
-}
-
-// ResponseContentPart represents a content part
-type ResponseContentPart struct {
-	Type        string       `json:"type"`
-	Annotations []Annotation `json:"annotations"`
-	Logprobs    []Logprob    `json:"logprobs"`
-	Text        string       `json:"text"`
-}
-
-// Logprob represents log probability data
-type Logprob struct {
-	Token       string       `json:"token"`
-	Logprob     float64      `json:"logprob"`
-	Bytes       []int        `json:"bytes,omitempty"`
-	TopLogprobs []TopLogprob `json:"top_logprobs,omitempty"`
-}
-
-// TopLogprob represents top log probability data
-type TopLogprob struct {
-	Token   string  `json:"token"`
-	Logprob float64 `json:"logprob"`
-	Bytes   []int   `json:"bytes,omitempty"`
-}
-
-// TextDelta represents a delta for text output (legacy)
-type TextDelta struct {
-	// The delta text.
-	Delta string `json:"delta"`
-
-	// The annotations for the delta.
-	Annotations []Annotation `json:"annotations,omitempty"`
-}
-
-// ResponseOutputTextDoneEvent represents a response.output_text.done event
-type ResponseOutputTextDoneEvent struct {
-	BaseStreamingEvent
-	ItemID       string    `json:"item_id"`
-	OutputIndex  int       `json:"output_index"`
-	ContentIndex int       `json:"content_index"`
-	Text         string    `json:"text"`
-	Logprobs     []Logprob `json:"logprobs"`
-}
-
-// ResponseContentPartDoneEvent represents a response.content_part.done event
-type ResponseContentPartDoneEvent struct {
-	BaseStreamingEvent
-	ItemID       string              `json:"item_id"`
-	OutputIndex  int                 `json:"output_index"`
-	ContentIndex int                 `json:"content_index"`
-	Part         ResponseContentPart `json:"part"`
-}
-
-// ResponseOutputItemDoneEvent represents a response.output_item.done event
-type ResponseOutputItemDoneEvent struct {
-	BaseStreamingEvent
-	OutputIndex int                `json:"output_index"`
-	Item        ResponseOutputItem `json:"item"`
-}
-
-// ResponseCompletedEvent represents a response.completed event
-type ResponseCompletedEvent struct {
-	BaseStreamingEvent
-	Response Response `json:"response"`
-}
-
-// TextCompletion represents the completion of text output
-type TextCompletion struct {
-	// The final text.
-	Value string `json:"value"`
-
-	// The annotations for the text.
-	Annotations []Annotation `json:"annotations,omitempty"`
-}
-
-// ResponseOutputImageDeltaEvent represents a response.output_image.delta event
-type ResponseOutputImageDeltaEvent struct {
-	// The type of event, always "response.output_image.delta".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The delta data.
-	Data ImageDelta `json:"data"`
-}
-
-// ImageDelta represents a delta for image output
-type ImageDelta struct {
-	// The delta image data.
-	Delta ImageOutput `json:"delta"`
-}
-
-// ResponseOutputImageDoneEvent represents a response.output_image.done event
-type ResponseOutputImageDoneEvent struct {
-	// The type of event, always "response.output_image.done".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The completion data.
-	Data ImageCompletion `json:"data"`
-}
-
-// ImageCompletion represents the completion of image output
-type ImageCompletion struct {
-	// The final image data.
-	Value ImageOutput `json:"value"`
-}
-
-// ResponseOutputFileDeltaEvent represents a response.output_file.delta event
-type ResponseOutputFileDeltaEvent struct {
-	// The type of event, always "response.output_file.delta".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The delta data.
-	Data FileDelta `json:"data"`
-}
-
-// FileDelta represents a delta for file output
-type FileDelta struct {
-	// The delta file data.
-	Delta FileOutput `json:"delta"`
-}
-
-// ResponseOutputFileDoneEvent represents a response.output_file.done event
-type ResponseOutputFileDoneEvent struct {
-	// The type of event, always "response.output_file.done".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The completion data.
-	Data FileCompletion `json:"data"`
-}
-
-// FileCompletion represents the completion of file output
-type FileCompletion struct {
-	// The final file data.
-	Value FileOutput `json:"value"`
-}
-
-// ResponseOutputWebSearchDeltaEvent represents a response.output_web_search.delta event
-type ResponseOutputWebSearchDeltaEvent struct {
-	// The type of event, always "response.output_web_search.delta".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The delta data.
-	Data WebSearchDelta `json:"data"`
-}
-
-// WebSearchDelta represents a delta for web search output
-type WebSearchDelta struct {
-	// The delta web search data.
-	Delta WebSearchOutput `json:"delta"`
-}
-
-// ResponseOutputWebSearchDoneEvent represents a response.output_web_search.done event
-type ResponseOutputWebSearchDoneEvent struct {
-	// The type of event, always "response.output_web_search.done".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The completion data.
-	Data WebSearchCompletion `json:"data"`
-}
-
-// WebSearchCompletion represents the completion of web search output
-type WebSearchCompletion struct {
-	// The final web search data.
-	Value WebSearchOutput `json:"value"`
-}
-
-// ResponseOutputFileSearchDeltaEvent represents a response.output_file_search.delta event
-type ResponseOutputFileSearchDeltaEvent struct {
-	// The type of event, always "response.output_file_search.delta".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The delta data.
-	Data FileSearchDelta `json:"data"`
-}
-
-// FileSearchDelta represents a delta for file search output
-type FileSearchDelta struct {
-	// The delta file search data.
-	Delta FileSearchOutput `json:"delta"`
-}
-
-// ResponseOutputFileSearchDoneEvent represents a response.output_file_search.done event
-type ResponseOutputFileSearchDoneEvent struct {
-	// The type of event, always "response.output_file_search.done".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The completion data.
-	Data FileSearchCompletion `json:"data"`
-}
-
-// FileSearchCompletion represents the completion of file search output
-type FileSearchCompletion struct {
-	// The final file search data.
-	Value FileSearchOutput `json:"value"`
-}
-
-// ResponseOutputStreamingDeltaEvent represents a response.output_streaming.delta event
-type ResponseOutputStreamingDeltaEvent struct {
-	// The type of event, always "response.output_streaming.delta".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The delta data.
-	Data StreamingDelta `json:"data"`
-}
-
-// StreamingDelta represents a delta for streaming output
-type StreamingDelta struct {
-	// The delta streaming data.
-	Delta StreamingOutput `json:"delta"`
-}
-
-// ResponseOutputStreamingDoneEvent represents a response.output_streaming.done event
-type ResponseOutputStreamingDoneEvent struct {
-	// The type of event, always "response.output_streaming.done".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The completion data.
-	Data StreamingCompletion `json:"data"`
-}
-
-// StreamingCompletion represents the completion of streaming output
-type StreamingCompletion struct {
-	// The final streaming data.
-	Value StreamingOutput `json:"value"`
-}
-
-// ResponseOutputFunctionCallsDeltaEvent represents a response.output_function_calls.delta event
-type ResponseOutputFunctionCallsDeltaEvent struct {
-	BaseStreamingEvent
-	ItemID       string            `json:"item_id"`
-	OutputIndex  int               `json:"output_index"`
-	ContentIndex int               `json:"content_index"`
-	Delta        FunctionCallDelta `json:"delta"`
-	Logprobs     []Logprob         `json:"logprobs"`
-}
-
-// FunctionCallDelta represents a delta for function call
-type FunctionCallDelta struct {
-	Name      string                 `json:"name"`
-	Arguments map[string]any `json:"arguments"`
-}
-
-// FunctionCallsDelta represents a delta for function calls output
-type FunctionCallsDelta struct {
-	// The delta function calls data.
-	Delta FunctionCallsOutput `json:"delta"`
-}
-
-// ResponseOutputFunctionCallsDoneEvent represents a response.output_function_calls.done event
-type ResponseOutputFunctionCallsDoneEvent struct {
-	// The type of event, always "response.output_function_calls.done".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The completion data.
-	Data FunctionCallsCompletion `json:"data"`
-}
-
-// FunctionCallsCompletion represents the completion of function calls output
-type FunctionCallsCompletion struct {
-	// The final function calls data.
-	Value FunctionCallsOutput `json:"value"`
-}
-
-// ResponseOutputReasoningDeltaEvent represents a response.output_reasoning.delta event
-type ResponseOutputReasoningDeltaEvent struct {
-	// The type of event, always "response.output_reasoning.delta".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The delta data.
-	Data ReasoningDelta `json:"data"`
-}
-
-// ReasoningDelta represents a delta for reasoning output
-type ReasoningDelta struct {
-	// The delta reasoning data.
-	Delta ReasoningOutput `json:"delta"`
-}
-
-// ResponseOutputReasoningDoneEvent represents a response.output_reasoning.done event
-type ResponseOutputReasoningDoneEvent struct {
-	// The type of event, always "response.output_reasoning.done".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The completion data.
-	Data ReasoningCompletion `json:"data"`
-}
-
-// ReasoningCompletion represents the completion of reasoning output
-type ReasoningCompletion struct {
-	// The final reasoning data.
-	Value ReasoningOutput `json:"value"`
-}
-
-// ResponseDoneEvent represents a response.done event
-type ResponseDoneEvent struct {
-	// The type of event, always "response.done".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The completion data.
-	Data ResponseCompletion `json:"data"`
-}
-
-// ResponseCompletion represents the completion of a response
-type ResponseCompletion struct {
-	// The final response data.
-	Value Response `json:"value"`
-}
-
-// ResponseErrorEvent represents a response.error event
-type ResponseErrorEvent struct {
-	// The type of event, always "response.error".
-	Event string `json:"event"`
-
-	// The Unix timestamp (in seconds) when the event was created.
-	Created int64 `json:"created"`
-
-	// The ID of the response this event belongs to.
-	ResponseID string `json:"response_id"`
-
-	// The error data.
-	Data ResponseError `json:"data"`
-}
-
-// ResponseReasoningSummaryPartAddedEvent represents a response.reasoning_summary_part.added event
-type ResponseReasoningSummaryPartAddedEvent struct {
-	BaseStreamingEvent
-	ItemID       string `json:"item_id"`
-	OutputIndex  int    `json:"output_index"`
-	SummaryIndex int    `json:"summary_index"`
-	Part         struct {
-		Type string `json:"type"`
-		Text string `json:"text"`
-	} `json:"part"`
-}
-
-// ResponseReasoningSummaryTextDeltaEvent represents a response.reasoning_summary_text.delta event
-type ResponseReasoningSummaryTextDeltaEvent struct {
-	BaseStreamingEvent
-	ItemID       string `json:"item_id"`
-	OutputIndex  int    `json:"output_index"`
-	SummaryIndex int    `json:"summary_index"`
-	Delta        string `json:"delta"`
-	Obfuscation  string `json:"obfuscation"`
-}
-
-// ResponseReasoningSummaryTextDoneEvent represents a response.reasoning_summary_text.done event
-type ResponseReasoningSummaryTextDoneEvent struct {
-	BaseStreamingEvent
-	ItemID       string `json:"item_id"`
-	OutputIndex  int    `json:"output_index"`
-	SummaryIndex int    `json:"summary_index"`
-	Text         string `json:"text"`
-}
-
-// ResponseReasoningSummaryPartDoneEvent represents a response.reasoning_summary_part.done event
-type ResponseReasoningSummaryPartDoneEvent struct {
-	BaseStreamingEvent
-	ItemID       string `json:"item_id"`
-	OutputIndex  int    `json:"output_index"`
-	SummaryIndex int    `json:"summary_index"`
-	Part         struct {
-		Type string `json:"type"`
-		Text string `json:"text"`
-	} `json:"part"`
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/routes_provider.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/routes_provider.go
deleted file mode 100644
index 9feffdad..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/routes_provider.go
+++ /dev/null
@@ -1,41 +0,0 @@
-package routes
-
-import (
-	"github.com/google/wire"
-	v1 "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/auth"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/auth/google"
-	chat "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/chat"
-	conv_chat "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/conv"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/conversations"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp"
-	mcp_impl "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp/mcp_impl"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/invites"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects"
-	api_keys "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects/api_keys"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/responses"
-)
-
-var RouteProvider = wire.NewSet(
-	google.NewGoogleAuthAPI,
-	auth.NewAuthRoute,
-	projects.NewProjectsRoute,
-	organization.NewAdminApiKeyAPI,
-	organization.NewOrganizationRoute,
-	mcp_impl.NewSerperMCP,
-	chat.NewChatRoute,
-	chat.NewCompletionAPI,
-	conv_chat.NewConvChatRoute,
-	conv_chat.NewConvCompletionAPI,
-	conv_chat.NewConvMCPAPI,
-	conv_chat.NewCompletionNonStreamHandler,
-	conv_chat.NewCompletionStreamHandler,
-	mcp.NewMCPAPI,
-	v1.NewModelAPI,
-	responses.NewResponseRoute,
-	v1.NewV1Route,
-	conversations.NewConversationAPI,
-	invites.NewInvitesRoute,
-	api_keys.NewProjectApiKeyRoute,
-)
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/auth.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/auth.go
deleted file mode 100644
index a2a16fbd..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/auth.go
+++ /dev/null
@@ -1,272 +0,0 @@
-package auth
-
-import (
-	"fmt"
-	"net/http"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	"github.com/golang-jwt/jwt/v5"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/auth/google"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-)
-
-type AuthRoute struct {
-	google      *google.GoogleAuthAPI
-	userService *user.UserService
-	authService *auth.AuthService
-}
-
-func NewAuthRoute(
-	google *google.GoogleAuthAPI,
-	userService *user.UserService,
-	authService *auth.AuthService) *AuthRoute {
-	return &AuthRoute{
-		google,
-		userService,
-		authService,
-	}
-}
-
-func (authRoute *AuthRoute) RegisterRouter(router gin.IRouter) {
-	authRouter := router.Group("/auth")
-	authRouter.GET("/logout", authRoute.Logout)
-	authRouter.GET("/refresh-token", authRoute.RefreshToken)
-	authRouter.GET("/me",
-		authRoute.authService.AppUserAuthMiddleware(),
-		authRoute.authService.RegisteredUserMiddleware(),
-		authRoute.GetMe,
-	)
-	authRouter.POST("/guest-login", authRoute.GuestLogin)
-	authRoute.google.RegisterRouter(authRouter)
-
-}
-
-// @Enum(access.token)
-type AccessTokenResponseObjectType string
-
-const AccessTokenResponseObjectTypeObject = "access.token"
-
-type AccessTokenResponse struct {
-	Object      AccessTokenResponseObjectType `json:"object"`
-	AccessToken string                        `json:"access_token"`
-	ExpiresIn   int                           `json:"expires_in"`
-}
-
-type GetMeResponse struct {
-	Object string `json:"object"`
-	ID     string `json:"id"`
-	Email  string `json:"email"`
-	Name   string `json:"name"`
-}
-
-// @Summary Get user profile
-// @Description Retrieves the profile of the authenticated user based on the provided JWT.
-// @Tags Authentication API
-// @Security BearerAuth
-// @Produce json
-// @Success 200 {object} GetMeResponse "Successfully retrieved user profile"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized (e.g., missing or invalid JWT)"
-// @Router /v1/auth/me [get]
-func (authRoute *AuthRoute) GetMe(reqCtx *gin.Context) {
-	user, _ := auth.GetUserFromContext(reqCtx)
-	reqCtx.JSON(http.StatusOK, GetMeResponse{
-		Object: "me",
-		ID:     user.PublicID,
-		Email:  user.Email,
-		Name:   user.Name,
-	})
-}
-
-// @Summary Refresh an access token
-// @Description Use a valid refresh token to obtain a new access token. The refresh token is typically sent in a cookie.
-// @Tags Authentication API
-// @Accept json
-// @Produce json
-// @Success 200 {object} nil "Successfully logout"
-// @Failure 400 {object} responses.ErrorResponse "Bad Request (e.g., invalid refresh token)"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized (e.g., expired or missing refresh token)"
-// @Router /v1/auth/logout [get]
-func (authRoute *AuthRoute) Logout(reqCtx *gin.Context) {
-	http.SetCookie(reqCtx.Writer, responses.NewCookieWithSecurity(
-		auth.RefreshTokenKey,
-		"",
-		time.Unix(0, 0),
-	))
-	reqCtx.Status(http.StatusOK)
-}
-
-// @Summary Refresh an access token
-// @Description Use a valid refresh token to obtain a new access token. The refresh token is typically sent in a cookie.
-// @Tags Authentication API
-// @Accept json
-// @Produce json
-// @Success 200 {object} AccessTokenResponse "Successfully refreshed the access token"
-// @Failure 400 {object} responses.ErrorResponse "Bad Request (e.g., invalid refresh token)"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized (e.g., expired or missing refresh token)"
-// @Router /v1/auth/refresh-token [get]
-func (authRoute *AuthRoute) RefreshToken(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	userClaim, ok := auth.GetUserClaimFromRefreshToken(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-			Code: "c2019018-b71c-4f13-8ac6-854fbd61c9dd",
-		})
-		return
-	}
-	if userClaim.ID == "" {
-		user, err := authRoute.userService.FindByEmail(ctx, userClaim.Email)
-		if err != nil || user == nil {
-			reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-				Code: "58174ddb-ef9c-4a3c-a6ad-c880af070518",
-			})
-			return
-		}
-		userClaim.ID = user.PublicID
-	}
-
-	accessTokenExp := time.Now().Add(auth.AccessTokenExpirationDuration)
-	accessTokenString, err := auth.CreateJwtSignedString(auth.UserClaim{
-		Email: userClaim.Email,
-		Name:  userClaim.Name,
-		ID:    userClaim.ID,
-		RegisteredClaims: jwt.RegisteredClaims{
-			ExpiresAt: jwt.NewNumericDate(accessTokenExp),
-			Subject:   userClaim.Email,
-		},
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "79373f8e-d80e-489c-95ba-9e6099ef7539",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	refreshTokenExp := time.Now().Add(7 * 24 * time.Hour)
-	refreshTokenString, err := auth.CreateJwtSignedString(auth.UserClaim{
-		Email: userClaim.Email,
-		Name:  userClaim.Name,
-		ID:    userClaim.ID,
-		RegisteredClaims: jwt.RegisteredClaims{
-			ExpiresAt: jwt.NewNumericDate(refreshTokenExp),
-			Subject:   userClaim.Email,
-		},
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "0e596742-64bb-4904-8429-4c09ce8434b9",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	http.SetCookie(reqCtx.Writer,
-		responses.NewCookieWithSecurity(
-			auth.RefreshTokenKey,
-			refreshTokenString,
-			refreshTokenExp,
-		),
-	)
-
-	reqCtx.JSON(http.StatusOK, &AccessTokenResponse{
-		AccessTokenResponseObjectTypeObject,
-		accessTokenString,
-		int(time.Until(accessTokenExp).Seconds()),
-	})
-}
-
-// @Summary Guest Login
-// @Description JWT-base Guest Login.
-// @Tags Authentication API
-// @Produce json
-// @Success 200 {object} AccessTokenResponse "Successfully refreshed the access token"
-// @Failure 400 {object} responses.ErrorResponse "Bad Request (e.g., invalid refresh token)"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized (e.g., expired or missing refresh token)"
-// @Router /v1/auth/guest-login [post]
-func (authRoute *AuthRoute) GuestLogin(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	userClaim, ok := auth.GetUserClaimFromRefreshToken(reqCtx)
-	email := ""
-	name := ""
-	var id string = ""
-	if !ok {
-		tempId, err := idgen.GenerateSecureID("jan", 12)
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code: "3cb11e83-98ed-4c4f-8823-73c26f0c2d75",
-			})
-			return
-		}
-		user, err := authRoute.authService.RegisterUser(ctx, &user.User{
-			Name:    tempId,
-			Email:   fmt.Sprintf("%s@guest.jan.ai", tempId),
-			Enabled: true,
-			IsGuest: true,
-		})
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code: "9576b6ba-fcc6-4bd2-b13a-33d59d6a71f1",
-			})
-			return
-		}
-		email = user.Email
-		name = user.Name
-		id = user.PublicID
-	} else {
-		email = userClaim.Email
-		name = userClaim.Name
-		id = userClaim.ID
-	}
-
-	accessTokenExp := time.Now().Add(auth.AccessTokenExpirationDuration)
-	accessTokenString, err := auth.CreateJwtSignedString(auth.UserClaim{
-		Email: email,
-		Name:  name,
-		ID:    id,
-		RegisteredClaims: jwt.RegisteredClaims{
-			ExpiresAt: jwt.NewNumericDate(accessTokenExp),
-			Subject:   email,
-		},
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "79373f8e-d80e-489c-95ba-9e6099ef7539",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	refreshTokenExp := time.Now().Add(7 * 24 * time.Hour)
-	refreshTokenString, err := auth.CreateJwtSignedString(auth.UserClaim{
-		Email: email,
-		Name:  name,
-		ID:    id,
-		RegisteredClaims: jwt.RegisteredClaims{
-			ExpiresAt: jwt.NewNumericDate(refreshTokenExp),
-			Subject:   email,
-		},
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "0e596742-64bb-4904-8429-4c09ce8434b9",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	http.SetCookie(reqCtx.Writer, responses.NewCookieWithSecurity(
-		auth.RefreshTokenKey,
-		refreshTokenString,
-		refreshTokenExp,
-	))
-
-	reqCtx.JSON(http.StatusOK, &AccessTokenResponse{
-		AccessTokenResponseObjectTypeObject,
-		accessTokenString,
-		int(time.Until(accessTokenExp).Seconds()),
-	})
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/google/google.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/google/google.go
deleted file mode 100644
index edddf325..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/auth/google/google.go
+++ /dev/null
@@ -1,301 +0,0 @@
-package google
-
-import (
-	"context"
-	"crypto/rand"
-	"encoding/base64"
-	"net/http"
-	"time"
-
-	oidc "github.com/coreos/go-oidc/v3/oidc"
-	"github.com/gin-gonic/gin"
-	"github.com/golang-jwt/jwt/v5"
-	"golang.org/x/oauth2"
-	"golang.org/x/oauth2/google"
-
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-type GoogleAuthAPI struct {
-	oAuth2Config *oauth2.Config
-	oidcProvider *oidc.Provider
-	userService  *user.UserService
-	authService  *auth.AuthService
-}
-
-func NewGoogleAuthAPI(userService *user.UserService, authService *auth.AuthService) *GoogleAuthAPI {
-	oauth2Config := &oauth2.Config{
-		ClientID:     environment_variables.EnvironmentVariables.OAUTH2_GOOGLE_CLIENT_ID,
-		ClientSecret: environment_variables.EnvironmentVariables.OAUTH2_GOOGLE_CLIENT_SECRET,
-		RedirectURL:  environment_variables.EnvironmentVariables.OAUTH2_GOOGLE_REDIRECT_URL,
-		Scopes:       []string{oidc.ScopeOpenID, "profile", "email"},
-		Endpoint:     google.Endpoint,
-	}
-
-	provider, err := oidc.NewProvider(context.Background(), "https://accounts.google.com")
-	if err != nil {
-		panic(err)
-	}
-	return &GoogleAuthAPI{
-		oauth2Config,
-		provider,
-		userService,
-		authService,
-	}
-}
-
-func (googleAuthAPI *GoogleAuthAPI) RegisterRouter(router *gin.RouterGroup) {
-	googleRouter := router.Group("/google")
-	googleRouter.POST("/callback", googleAuthAPI.HandleGoogleCallback)
-	googleRouter.GET("/login", googleAuthAPI.GetGoogleLoginUrl)
-}
-
-type GoogleCallbackRequest struct {
-	Code  string `json:"code" binding:"required"`
-	State string `json:"state"`
-}
-
-// @Enum(access.token)
-type AccessTokenResponseObjectType string
-
-const AccessTokenResponseObjectTypeObject = "access.token"
-
-type AccessTokenResponse struct {
-	Object      AccessTokenResponseObjectType `json:"object"`
-	AccessToken string                        `json:"access_token"`
-	ExpiresIn   int                           `json:"expires_in"`
-}
-
-func generateState() (string, error) {
-	b := make([]byte, 16)
-	if _, err := rand.Read(b); err != nil {
-		return "", err
-	}
-	return base64.URLEncoding.EncodeToString(b), nil
-}
-
-// @Summary Google OAuth2 Callback
-// @Description Handles the callback from the Google OAuth2 provider to exchange the authorization code for a token, verify the user, and issue access and refresh tokens.
-// @Tags Authentication API
-// @Accept json
-// @Produce json
-// @Param request body GoogleCallbackRequest true "Request body containing the authorization code and state"
-// @Success 200 {object} AccessTokenResponse "Successfully authenticated and returned tokens"
-// @Failure 400 {object} responses.ErrorResponse "Bad request (e.g., invalid state, missing code, or invalid claims)"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized (e.g., a user claim is not found or is invalid in the context)"
-// @Failure 500 {object} responses.ErrorResponse "Internal Server Error"
-// @Router /v1/auth/google/callback [post]
-func (googleAuthAPI *GoogleAuthAPI) HandleGoogleCallback(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	var req GoogleCallbackRequest
-	if err := reqCtx.ShouldBindJSON(&req); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "f1ca221e-cc6e-4e31-92b0-7c59dd966536",
-		})
-		return
-	}
-
-	storedState, err := reqCtx.Cookie(auth.OAuthStateKey)
-	if storedState != req.State {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "f845d325-fe49-4487-978b-543090f2ec42",
-		})
-		return
-	}
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "2a17e34c-95bd-4d03-95ee-01fd6172348d",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	token, err := googleAuthAPI.oAuth2Config.Exchange(reqCtx, req.Code)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "f9e2d2b5-45b5-4697-bb04-548b4290fdde",
-		})
-		return
-	}
-
-	rawIDToken, ok := token.Extra("id_token").(string)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "69137efa-bf46-456f-ab4c-bda9fa38aff0",
-		})
-		return
-	}
-	verifier := googleAuthAPI.oidcProvider.Verifier(&oidc.Config{ClientID: googleAuthAPI.oAuth2Config.ClientID})
-	idToken, err := verifier.Verify(reqCtx, rawIDToken)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "8ea31139-211e-4282-82de-9664814e6f46",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	var claims struct {
-		Email string `json:"email"`
-		Name  string `json:"name"`
-		Sub   string `json:"sub"`
-	}
-	if err := idToken.Claims(&claims); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "f2ea83a6-36f6-4a87-ae50-e934f984f1c9",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	userService := googleAuthAPI.userService
-	exists, err := userService.FindByEmail(reqCtx.Request.Context(), claims.Email)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "ad6e260d-b5ad-447b-8ab0-7e161c932b6a",
-			Error: err.Error(),
-		})
-		return
-	}
-	if exists != nil {
-		exists.Name = claims.Name
-		_, err := googleAuthAPI.userService.UpdateUser(ctx, exists)
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:  "f5afc09d-32be-461a-a0af-7b0f2c1dc221",
-				Error: err.Error(),
-			})
-			return
-		}
-	}
-	if exists == nil {
-		exists, err = func() (*user.User, error) {
-			userClaim, ok := auth.GetUserClaimFromRefreshToken(reqCtx)
-			if !ok {
-				return nil, nil
-			}
-			user, err := googleAuthAPI.userService.FindByEmail(ctx, userClaim.Email)
-			if err != nil {
-				return nil, err
-			}
-			if user == nil {
-				return nil, nil
-			}
-			if user.IsGuest {
-				user.IsGuest = false
-				user.Name = claims.Name
-				user.Email = claims.Email
-				exists, err = googleAuthAPI.userService.UpdateUser(ctx, user)
-				if err != nil {
-					return nil, err
-				}
-				return exists, nil
-			}
-			return nil, nil
-		}()
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:          "f7c545df-bdcd-4e6a-843e-9699f0239552",
-				ErrorInstance: err,
-			})
-			return
-		}
-	}
-	if exists == nil {
-		exists, err = googleAuthAPI.authService.RegisterUser(ctx, &user.User{
-			Name:    claims.Name,
-			Email:   claims.Email,
-			Enabled: true,
-		})
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:  "45f08e6d-4b0c-4718-9bf3-5974a14d5f25",
-				Error: err.Error(),
-			})
-			return
-		}
-	}
-	accessTokenExp := time.Now().Add(auth.AccessTokenExpirationDuration)
-	accessTokenString, err := auth.CreateJwtSignedString(auth.UserClaim{
-		Email: exists.Email,
-		Name:  exists.Name,
-		ID:    exists.PublicID,
-		RegisteredClaims: jwt.RegisteredClaims{
-			ExpiresAt: jwt.NewNumericDate(accessTokenExp),
-			Subject:   exists.Email,
-		},
-	})
-
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "7b50f7ab-f3a1-4a3c-920a-41e387c2bc12",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	refreshTokenExp := time.Now().Add(auth.AccessTokenExpirationDuration)
-	refreshTokenString, err := auth.CreateJwtSignedString(auth.UserClaim{
-		Email: exists.Email,
-		Name:  exists.Name,
-		ID:    exists.PublicID,
-		RegisteredClaims: jwt.RegisteredClaims{
-			ExpiresAt: jwt.NewNumericDate(refreshTokenExp),
-			Subject:   exists.Email,
-		},
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "0e596742-64bb-4904-8429-4c09ce8434b9",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	http.SetCookie(reqCtx.Writer, responses.NewCookieWithSecurity(
-		auth.RefreshTokenKey,
-		refreshTokenString,
-		refreshTokenExp,
-	))
-
-	reqCtx.JSON(http.StatusOK, &AccessTokenResponse{
-		AccessTokenResponseObjectTypeObject,
-		accessTokenString,
-		int(time.Until(accessTokenExp).Seconds()),
-	})
-}
-
-type GoogleLoginUrl struct {
-	Object string `json:"object"`
-	Url    string `json:"url"`
-}
-
-// @Summary Google OAuth2 Login
-// @Description Redirects the user to the Google OAuth2 authorization page to initiate the login process.
-// @Tags Authentication API
-// @Success 200 {object} GoogleLoginUrl "redirect url"
-// @Failure 500 {object} responses.ErrorResponse "Internal Server Error"
-// @Router /v1/auth/google/login [get]
-func (googleAuthAPI *GoogleAuthAPI) GetGoogleLoginUrl(reqCtx *gin.Context) {
-	state, err := generateState()
-	if err != nil {
-		reqCtx.JSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "e30d6d79-8126-4e76-bcff-49bbfaee3b06",
-			Error: err.Error(),
-		})
-		return
-	}
-	http.SetCookie(reqCtx.Writer, responses.NewCookieWithSecurity(
-		auth.OAuthStateKey,
-		state,
-		time.Now().Add(300*time.Second),
-	))
-	authURL := googleAuthAPI.oAuth2Config.AuthCodeURL(state, oauth2.AccessTypeOffline)
-	reqCtx.JSON(http.StatusOK, GoogleLoginUrl{
-		Object: "google.login.url",
-		Url:    authURL,
-	})
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/chat_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/chat_route.go
deleted file mode 100644
index 97e5d437..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/chat_route.go
+++ /dev/null
@@ -1,30 +0,0 @@
-package chat
-
-import (
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-)
-
-type ChatRoute struct {
-	authService   *auth.AuthService
-	completionAPI *CompletionAPI
-}
-
-func NewChatRoute(
-	authService *auth.AuthService,
-	completionAPI *CompletionAPI,
-) *ChatRoute {
-	return &ChatRoute{
-		authService:   authService,
-		completionAPI: completionAPI,
-	}
-}
-
-func (chatRoute *ChatRoute) RegisterRouter(router gin.IRouter) {
-	// Register /v1/chat routes
-	chatRouter := router.Group("/chat",
-		chatRoute.authService.AppUserAuthMiddleware(),
-		chatRoute.authService.RegisteredUserMiddleware(),
-	)
-	chatRoute.completionAPI.RegisterRouter(chatRouter)
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_route.go
deleted file mode 100644
index a154e43e..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/chat/completion_route.go
+++ /dev/null
@@ -1,319 +0,0 @@
-package chat
-
-import (
-	"bufio"
-	"context"
-	"net/http"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/inference"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-)
-
-// Constants for streaming configuration
-const (
-	RequestTimeout       = 120 * time.Second
-	ChannelBufferSize    = 100
-	ErrorBufferSize      = 10 // Retained for backward compatibility, but unified now
-	DataPrefix           = "data: "
-	DoneMarker           = "[DONE]"
-	NewlineChar          = "\n"
-	ScannerInitialBuffer = 12 * 1024   // 12KB
-	ScannerMaxBuffer     = 1024 * 1024 // 1MB
-)
-
-// StreamMessage unifies data and error payloads for simplified channel handling
-type StreamMessage struct {
-	Line string
-	Err  error
-}
-
-// CompletionAPI handles chat completion requests with streaming support
-type CompletionAPI struct {
-	inferenceProvider inference.InferenceProvider
-	authService       *auth.AuthService
-}
-
-func NewCompletionAPI(inferenceProvider inference.InferenceProvider, authService *auth.AuthService) *CompletionAPI {
-	return &CompletionAPI{
-		inferenceProvider: inferenceProvider,
-		authService:       authService,
-	}
-}
-
-func (completionAPI *CompletionAPI) RegisterRouter(router *gin.RouterGroup) {
-	router.POST("/completions", completionAPI.PostCompletion)
-}
-
-// PostCompletion
-// @Summary Create a chat completion
-// @Description Generates a model response for the given chat conversation. This is a standard chat completion API that supports both streaming and non-streaming modes without conversation persistence.
-// @Description
-// @Description **Streaming Mode (stream=true):**
-// @Description - Returns Server-Sent Events (SSE) with real-time streaming
-// @Description - Streams completion chunks directly from the inference model
-// @Description - Final event contains "[DONE]" marker
-// @Description
-// @Description **Non-Streaming Mode (stream=false or omitted):**
-// @Description - Returns single JSON response with complete completion
-// @Description - Standard OpenAI ChatCompletionResponse format
-// @Description
-// @Description **Features:**
-// @Description - Supports all OpenAI ChatCompletionRequest parameters
-// @Description - User authentication required
-// @Description - Direct inference model integration
-// @Description - No conversation persistence (stateless)
-// @Tags Chat Completions API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Produce text/event-stream
-// @Param request body openai.ChatCompletionRequest true "Chat completion request with streaming options"
-// @Success 200 {object} openai.ChatCompletionResponse "Successful non-streaming response (when stream=false)"
-// @Success 200 {string} string "Successful streaming response (when stream=true) - SSE format with data: {json} events"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request payload, empty messages, or inference failure"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/chat/completions [post]
-func (cApi *CompletionAPI) PostCompletion(reqCtx *gin.Context) {
-	var request openai.ChatCompletionRequest
-	if err := reqCtx.ShouldBindJSON(&request); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "0199600b-86d3-7339-8402-8ef1c7840475",
-			ErrorInstance: err,
-		})
-		return
-	}
-
-	if len(request.Messages) == 0 {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "0199600f-2cbe-7518-be5c-9989cce59472",
-			Error: "messages cannot be empty",
-		})
-		return
-	}
-
-	// Get authenticated user (required for API access)
-	user, ok := auth.GetUserFromContext(reqCtx)
-	if !ok || user == nil {
-		reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-			Code:  "0199600b-961c-71ba-846b-9ca5b384e382",
-			Error: "user not authenticated",
-		})
-		return
-	}
-
-	// TODO: Implement admin API key check for enhanced security
-
-	var err *common.Error
-	var response *openai.ChatCompletionResponse
-
-	if request.Stream {
-		// Handle streaming completion - streams SSE events directly to client
-		err = cApi.StreamCompletionResponse(reqCtx, "", request)
-	} else {
-		// Handle non-streaming completion - returns complete response
-		response, err = cApi.CallCompletionAndGetRestResponse(reqCtx.Request.Context(), "", request)
-	}
-
-	if err != nil {
-		logger.GetLogger().Errorf("completion failed: %v", err)
-		reqCtx.AbortWithStatusJSON(
-			http.StatusBadRequest,
-			responses.ErrorResponse{
-				Code:          err.GetCode(),
-				ErrorInstance: err.GetError(),
-			})
-		return
-	}
-
-	// Send JSON response for non-streaming requests (streaming responses use SSE)
-	if !request.Stream {
-		reqCtx.JSON(http.StatusOK, response)
-	}
-}
-
-// CallCompletionAndGetRestResponse calls the inference model and returns a complete non-streaming response
-func (cApi *CompletionAPI) CallCompletionAndGetRestResponse(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*openai.ChatCompletionResponse, *common.Error) {
-	// Call inference provider to get complete response
-	response, err := cApi.inferenceProvider.CreateCompletion(ctx, apiKey, request)
-	if err != nil {
-		logger.GetLogger().Errorf("inference failed: %v", err)
-		return nil, common.NewError(err, "0199600c-3b65-7618-83ca-443a583d91c9")
-	}
-
-	return response, nil
-}
-
-// StreamCompletionResponse streams SSE events directly to the client
-func (cApi *CompletionAPI) StreamCompletionResponse(reqCtx *gin.Context, apiKey string, request openai.ChatCompletionRequest) *common.Error {
-	// Create timeout context wrapping the request context
-	ctx, cancel := context.WithTimeout(reqCtx.Request.Context(), RequestTimeout)
-	defer cancel()
-
-	// Set up SSE headers for streaming response
-	cApi.setupSSEHeaders(reqCtx)
-
-	// Create unified buffered channel for streaming messages (data or errors)
-	msgChan := make(chan StreamMessage, ChannelBufferSize)
-
-	var wg sync.WaitGroup
-	wg.Add(1)
-
-	// Start streaming from inference model in a goroutine
-	go cApi.streamResponseToChannel(ctx, apiKey, request, msgChan, &wg)
-
-	// Close the message channel once all producers complete
-	go func() {
-		wg.Wait()
-		close(msgChan)
-	}()
-
-	// Set up client disconnection notifier
-	clientGone := reqCtx.Writer.CloseNotify()
-
-	// Process streaming data from channel
-	streamingComplete := false
-	for !streamingComplete {
-		select {
-		case msg, ok := <-msgChan:
-			if !ok {
-				// Channel closed, streaming complete
-				streamingComplete = true
-				break
-			}
-
-			if msg.Err != nil {
-				// Handle error: cancel and wait
-				logger.GetLogger().Errorf("Stream error: %v", msg.Err)
-				cancel()
-				wg.Wait()
-				return common.NewError(msg.Err, "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4")
-			}
-
-			// Forward streaming line directly to client
-			if err := cApi.writeSSELine(reqCtx, msg.Line); err != nil {
-				logger.GetLogger().Warnf("Client disconnected during streaming: %v", err)
-				cancel()
-				wg.Wait()
-				return common.NewError(err, "8a3f6c2e-1d47-4f89-9a6b-02f3e4b1c7d2")
-			}
-
-			// Check for [DONE] marker
-			if data, found := strings.CutPrefix(msg.Line, DataPrefix); found {
-				if data == DoneMarker {
-					streamingComplete = true
-					cancel()
-					break
-				}
-			}
-
-		case <-clientGone:
-			// Proactive client disconnection
-			logger.GetLogger().Warnf("Client disconnected proactively")
-			cancel()
-			wg.Wait()
-			return common.NewError(context.Canceled, "client disconnected proactively")
-
-		case <-ctx.Done():
-			if ctx.Err() == context.DeadlineExceeded {
-				logger.GetLogger().Errorf("Streaming timeout: %v", ctx.Err())
-			}
-			wg.Wait()
-			return common.NewError(ctx.Err(), "d41f0b2c-3e5a-47c8-8f1a-9b2c6d7e4a1f")
-
-		case <-reqCtx.Request.Context().Done():
-			// Original request context cancellation (e.g., server shutdown)
-			logger.GetLogger().Warnf("Request context cancelled")
-			cancel()
-			wg.Wait()
-			return common.NewError(reqCtx.Request.Context().Err(), "request cancelled")
-		}
-	}
-
-	// Wait for streaming goroutine to complete
-	wg.Wait()
-
-	return nil
-}
-
-// streamResponseToChannel streams the response from inference provider to a unified channel
-func (cApi *CompletionAPI) streamResponseToChannel(ctx context.Context, apiKey string, request openai.ChatCompletionRequest, msgChan chan<- StreamMessage, wg *sync.WaitGroup) {
-	defer wg.Done()
-
-	// Get streaming reader from inference provider
-	reader, err := cApi.inferenceProvider.CreateCompletionStream(ctx, apiKey, request)
-	if err != nil {
-		select {
-		case msgChan <- StreamMessage{Err: err}:
-		default:
-			// Non-blocking send if channel full
-		}
-		return
-	}
-	defer func() {
-		if closeErr := reader.Close(); closeErr != nil {
-			logger.GetLogger().Errorf("Unable to close reader: %v", closeErr)
-		}
-	}()
-
-	scanner := bufio.NewScanner(reader)
-	// Increase scanner buffer size for better performance with large responses
-	scanner.Buffer(make([]byte, 0, ScannerInitialBuffer), ScannerMaxBuffer)
-
-	for scanner.Scan() {
-		select {
-		case <-ctx.Done():
-			// Context cancelled, send error and exit
-			select {
-			case msgChan <- StreamMessage{Err: ctx.Err()}:
-			default:
-			}
-			return
-		default:
-			line := scanner.Text()
-			select {
-			case msgChan <- StreamMessage{Line: line}:
-				// Successfully sent data
-			case <-ctx.Done():
-				// Context cancelled while trying to send
-				return
-			}
-		}
-	}
-
-	if err := scanner.Err(); err != nil {
-		select {
-		case msgChan <- StreamMessage{Err: err}:
-		default:
-		}
-	}
-}
-
-// setupSSEHeaders sets up the required headers for Server-Sent Events streaming
-func (cApi *CompletionAPI) setupSSEHeaders(reqCtx *gin.Context) {
-	reqCtx.Header("Content-Type", "text/event-stream")
-	reqCtx.Header("Cache-Control", "no-cache")
-	reqCtx.Header("Connection", "keep-alive")
-	reqCtx.Header("Access-Control-Allow-Origin", "*")
-	reqCtx.Header("Access-Control-Allow-Headers", "Cache-Control")
-	reqCtx.Header("Transfer-Encoding", "chunked") // Added for better SSE compliance
-	reqCtx.Writer.WriteHeaderNow()
-}
-
-// writeSSELine writes a line to the SSE stream
-func (cApi *CompletionAPI) writeSSELine(reqCtx *gin.Context, line string) error {
-	_, err := reqCtx.Writer.Write([]byte(line + NewlineChar))
-	if err != nil {
-		return err
-	}
-	reqCtx.Writer.Flush()
-	return nil
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_chat_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_chat_route.go
deleted file mode 100644
index af0101c0..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_chat_route.go
+++ /dev/null
@@ -1,42 +0,0 @@
-package conv
-
-import (
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-)
-
-// ConvChatRoute handles conversation-aware chat completion routes
-// This route provides chat completion functionality with conversation persistence,
-// history management, and extended features like storage and reasoning.
-type ConvChatRoute struct {
-	authService       *auth.AuthService
-	convCompletionAPI *ConvCompletionAPI
-	convMCPAPI        *ConvMCPAPI
-}
-
-// NewConvChatRoute creates a new conversation-aware chat route handler
-func NewConvChatRoute(
-	authService *auth.AuthService,
-	convCompletionAPI *ConvCompletionAPI,
-	convMCPAPI *ConvMCPAPI,
-) *ConvChatRoute {
-	return &ConvChatRoute{
-		authService:       authService,
-		convCompletionAPI: convCompletionAPI,
-		convMCPAPI:        convMCPAPI,
-	}
-}
-
-// RegisterRouter registers the conversation-aware chat completion routes
-// This creates the /v1/conv/completions endpoint with authentication middleware
-func (convChatRoute *ConvChatRoute) RegisterRouter(router gin.IRouter) {
-	// Register /v1/conv routes with authentication middleware
-	convChatRouter := router.Group("/conv",
-		convChatRoute.authService.AppUserAuthMiddleware(),
-		convChatRoute.authService.RegisteredUserMiddleware(),
-	)
-	convChatRoute.convCompletionAPI.RegisterRouter(convChatRouter)
-
-	// Register MCP routes separately (without RegisteredUserMiddleware to avoid content type conflicts)
-	convChatRoute.convMCPAPI.RegisterRouter(convChatRouter)
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_completion_nonstream_handler.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_completion_nonstream_handler.go
deleted file mode 100644
index a6f0f362..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_completion_nonstream_handler.go
+++ /dev/null
@@ -1,71 +0,0 @@
-package conv
-
-import (
-	"context"
-
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/domain/inference"
-)
-
-// CompletionNonStreamHandler handles non-streaming completion business logic
-type CompletionNonStreamHandler struct {
-	inferenceProvider   inference.InferenceProvider
-	conversationService *conversation.ConversationService
-}
-
-// NewCompletionNonStreamHandler creates a new CompletionNonStreamHandler instance
-func NewCompletionNonStreamHandler(inferenceProvider inference.InferenceProvider, conversationService *conversation.ConversationService) *CompletionNonStreamHandler {
-	return &CompletionNonStreamHandler{
-		inferenceProvider:   inferenceProvider,
-		conversationService: conversationService,
-	}
-}
-
-// CallCompletionAndGetRestResponse calls the inference model and returns a non-streaming REST response
-func (uc *CompletionNonStreamHandler) CallCompletionAndGetRestResponse(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*ExtendedCompletionResponse, *common.Error) {
-
-	// Call inference provider
-	response, err := uc.inferenceProvider.CreateCompletion(ctx, apiKey, request)
-	if err != nil {
-		return nil, common.NewError(err, "c7d8e9f0-g1h2-3456-cdef-789012345678")
-	}
-
-	// Convert response
-	return uc.ConvertResponse(response), nil
-}
-
-// ConvertResponse converts OpenAI response to our extended response
-func (uc *CompletionNonStreamHandler) ConvertResponse(response *openai.ChatCompletionResponse) *ExtendedCompletionResponse {
-	return &ExtendedCompletionResponse{
-		ChatCompletionResponse: *response,
-	}
-}
-
-// ModifyCompletionResponse modifies the completion response to include item ID and metadata
-func (uc *CompletionNonStreamHandler) ModifyCompletionResponse(response *ExtendedCompletionResponse, conv *conversation.Conversation, conversationCreated bool, assistantItem *conversation.Item, askItemID string, completionItemID string, store bool, storeReasoning bool) *ExtendedCompletionResponse {
-	// Replace ID with item ID if assistant item exists
-	if assistantItem != nil {
-		response.ID = assistantItem.PublicID
-	}
-
-	// Add metadata if conversation exists
-	if conv != nil {
-		title := ""
-		if conv.Title != nil {
-			title = *conv.Title
-		}
-		response.Metadata = &ResponseMetadata{
-			ConversationID:      conv.PublicID,
-			ConversationCreated: conversationCreated,
-			ConversationTitle:   title,
-			AskItemId:           askItemID,
-			CompletionItemId:    completionItemID,
-			Store:               store,
-			StoreReasoning:      storeReasoning,
-		}
-	}
-
-	return response
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_completion_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_completion_route.go
deleted file mode 100644
index f04ff8df..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_completion_route.go
+++ /dev/null
@@ -1,609 +0,0 @@
-package conv
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"strings"
-
-	"github.com/gin-gonic/gin"
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	inferencemodelregistry "menlo.ai/jan-api-gateway/app/domain/inference_model_registry"
-	userdomain "menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/utils/idgen"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-)
-
-const (
-	DefaultConversationTitle = "New Conversation"
-	MaxTitleLength           = 50
-)
-
-type ConvCompletionAPI struct {
-	completionNonStreamHandler *CompletionNonStreamHandler
-	completionStreamHandler    *CompletionStreamHandler
-	conversationService        *conversation.ConversationService
-	authService                *auth.AuthService
-	registry                   *inferencemodelregistry.InferenceModelRegistry
-}
-
-func NewConvCompletionAPI(completionNonStreamHandler *CompletionNonStreamHandler, completionStreamHandler *CompletionStreamHandler, conversationService *conversation.ConversationService, authService *auth.AuthService, registry *inferencemodelregistry.InferenceModelRegistry) *ConvCompletionAPI {
-	return &ConvCompletionAPI{
-		completionNonStreamHandler: completionNonStreamHandler,
-		completionStreamHandler:    completionStreamHandler,
-		conversationService:        conversationService,
-		authService:                authService,
-		registry:                   registry,
-	}
-}
-
-func (completionAPI *ConvCompletionAPI) RegisterRouter(router *gin.RouterGroup) {
-	// Register chat completions under /chat subroute
-	chatRouter := router.Group("/chat")
-	chatRouter.POST("/completions", completionAPI.PostCompletion)
-
-	// Register other endpoints at root level
-	router.GET("/models", completionAPI.GetModels)
-}
-
-// ExtendedChatCompletionRequest extends OpenAI's request with conversation field and store and store_reasoning fields
-type ExtendedChatCompletionRequest struct {
-	openai.ChatCompletionRequest
-	Conversation   string `json:"conversation,omitempty"`
-	Store          bool   `json:"store,omitempty"`           // If true, the response will be stored in the conversation, default is false
-	StoreReasoning bool   `json:"store_reasoning,omitempty"` // If true, the reasoning will be stored in the conversation, default is false
-}
-
-// ResponseMetadata contains additional metadata about the completion response
-type ResponseMetadata struct {
-	ConversationID      string `json:"conversation_id"`
-	ConversationCreated bool   `json:"conversation_created"`
-	ConversationTitle   string `json:"conversation_title"`
-	AskItemId           string `json:"ask_item_id"`
-	CompletionItemId    string `json:"completion_item_id"`
-	Store               bool   `json:"store"`
-	StoreReasoning      bool   `json:"store_reasoning"`
-}
-
-// ExtendedCompletionResponse extends OpenAI's ChatCompletionResponse with additional metadata
-type ExtendedCompletionResponse struct {
-	openai.ChatCompletionResponse
-	Metadata *ResponseMetadata `json:"metadata,omitempty"`
-}
-
-// Model represents a model in the response
-type Model struct {
-	ID      string `json:"id"`
-	Object  string `json:"object"`
-	Created int    `json:"created"`
-	OwnedBy string `json:"owned_by"`
-}
-
-// ModelsResponse represents the response for listing models
-type ModelsResponse struct {
-	Object string  `json:"object"`
-	Data   []Model `json:"data"`
-}
-
-// PostCompletion
-// @Summary Create a conversation-aware chat completion
-// @Description Generates a model response for the given chat conversation with conversation persistence and management. This is the conversation-aware version of the chat completion API that supports both streaming and non-streaming modes with conversation management and storage options.
-// @Description
-// @Description **Streaming Mode (stream=true):**
-// @Description - Returns Server-Sent Events (SSE) with real-time streaming
-// @Description - First event contains conversation metadata
-// @Description - Subsequent events contain completion chunks
-// @Description - Final event contains "[DONE]" marker
-// @Description
-// @Description **Non-Streaming Mode (stream=false or omitted):**
-// @Description - Returns single JSON response with complete completion
-// @Description - Includes conversation metadata in response
-// @Description
-// @Description **Storage Options:**
-// @Description - `store=true`: Saves user message and assistant response to conversation
-// @Description - `store_reasoning=true`: Includes reasoning content in stored messages
-// @Description - `conversation`: ID of existing conversation or empty for new conversation
-// @Description
-// @Description **Features:**
-// @Description - Conversation persistence and history management
-// @Description - Extended request format with conversation and storage options
-// @Description - User authentication required
-// @Description - Automatic conversation creation and management
-// @Tags Conversation-aware Chat API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Produce text/event-stream
-// @Param request body ExtendedChatCompletionRequest true "Extended chat completion request with streaming, storage, and conversation options"
-// @Success 200 {object} ExtendedCompletionResponse "Successful non-streaming response (when stream=false)"
-// @Success 200 {string} string "Successful streaming response (when stream=true) - SSE format with data: {json} events"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request payload or conversation not found"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
-// @Failure 404 {object} responses.ErrorResponse "Conversation not found or user not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/conv/chat/completions [post]
-func (api *ConvCompletionAPI) PostCompletion(reqCtx *gin.Context) {
-	var request ExtendedChatCompletionRequest
-	if err := reqCtx.ShouldBindJSON(&request); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "cf237451-8932-48d1-9cf6-42c4db2d4805",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	// Get user ID for saving messages
-	user, ok := auth.GetUserFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-			Code:  "0199506b-314d-70e2-a8aa-d5fde1569d1d",
-			Error: "user not found",
-		})
-		return
-	}
-	// TODO: Implement admin API key check
-
-	// Handle conversation management
-	conv, conversationCreated, convErr := api.handleConversationManagement(reqCtx, request.Conversation, request.Messages)
-	if convErr != nil {
-		// Conversation doesn't exist, return error
-		reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-			Code:          convErr.GetCode(),
-			ErrorInstance: convErr.GetError(),
-		})
-		return
-	}
-
-	// Generate item IDs for tracking
-	askItemID, _ := idgen.GenerateSecureID("msg", 42)
-	completionItemID, _ := idgen.GenerateSecureID("msg", 42)
-
-	// Handle streaming vs non-streaming requests
-	var response *ExtendedCompletionResponse
-	var err *common.Error
-
-	if request.Stream {
-		// Handle streaming completion - streams SSE events and accumulates response
-		response, err = api.completionStreamHandler.StreamCompletionAndAccumulateResponse(reqCtx, "", request.ChatCompletionRequest, conv, conversationCreated, askItemID, completionItemID)
-	} else {
-		// Handle non-streaming completion
-		response, err = api.completionNonStreamHandler.CallCompletionAndGetRestResponse(reqCtx.Request.Context(), "", request.ChatCompletionRequest)
-	}
-
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(
-			http.StatusBadRequest,
-			responses.ErrorResponse{
-				Code:          err.GetCode(),
-				ErrorInstance: err.GetError(),
-			})
-		return
-	}
-
-	// Process response (common logic for both streaming and non-streaming)
-	modifiedResponse := api.processCompletionResponse(reqCtx, response, request, conv, user, askItemID, completionItemID, conversationCreated)
-
-	// Only send JSON response for non-streaming requests (streaming uses SSE)
-	if !request.Stream && modifiedResponse != nil {
-		reqCtx.JSON(http.StatusOK, modifiedResponse)
-	}
-}
-
-// GetModels
-// @Summary List available models for conversation-aware chat
-// @Description Retrieves a list of available models that can be used for conversation-aware chat completions. This endpoint provides the same model list as the standard /v1/models endpoint but is specifically designed for conversation-aware chat functionality.
-// @Tags Conversation-aware Chat API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Success 200 {object} ModelsResponse "Successful response"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
-// @Router /v1/conv/models [get]
-func (api *ConvCompletionAPI) GetModels(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	models := api.registry.ListModels(ctx)
-
-	// Convert to response format
-	responseData := make([]Model, len(models))
-	for i, model := range models {
-		responseData[i] = Model{
-			ID:      model.ID,
-			Object:  model.Object,
-			Created: model.Created,
-			OwnedBy: model.OwnedBy,
-		}
-	}
-
-	reqCtx.JSON(http.StatusOK, ModelsResponse{
-		Object: "list",
-		Data:   responseData,
-	})
-}
-
-// processCompletionResponse handles the common response processing logic for both streaming and non-streaming
-func (api *ConvCompletionAPI) processCompletionResponse(reqCtx *gin.Context, response *ExtendedCompletionResponse, request ExtendedChatCompletionRequest, conv *conversation.Conversation, user *userdomain.User, askItemID string, completionItemID string, conversationCreated bool) *ExtendedCompletionResponse {
-	var assistantItem *conversation.Item
-
-	// Store messages conditionally based on store flag
-	if request.Store {
-		// Store last input message (user or tool)
-		if storeErr := api.StoreLastInputMessageIfRequested(reqCtx.Request.Context(), request.ChatCompletionRequest, conv, user.ID, askItemID, completionItemID, request.Store, request.StoreReasoning); storeErr != nil {
-			reqCtx.AbortWithStatusJSON(
-				http.StatusBadRequest,
-				responses.ErrorResponse{
-					Code:          storeErr.GetCode(),
-					ErrorInstance: storeErr.GetError(),
-				})
-			return nil
-		}
-
-		// Store assistant response
-		if item, err := api.StoreAssistantResponseIfRequested(reqCtx.Request.Context(), response, conv, user.ID, completionItemID, request.Store, request.StoreReasoning); err != nil {
-			reqCtx.AbortWithStatusJSON(
-				http.StatusBadRequest,
-				responses.ErrorResponse{
-					Code:          err.GetCode(),
-					ErrorInstance: err.GetError(),
-				})
-			return nil
-		} else {
-			assistantItem = item
-		}
-	}
-
-	// Always handle completion response for other logic (like function calls, tool calls, etc.)
-	// This ensures the response is properly set up regardless of store flag
-	// Skip storage if we already handled it with the new store logic
-	api.handleCompletionResponseAndUpdateConversation(reqCtx.Request.Context(), response, conv, user.ID, request.Store)
-
-	// Modify response to include item ID and metadata
-	return api.completionNonStreamHandler.ModifyCompletionResponse(response, conv, conversationCreated, assistantItem, askItemID, completionItemID, request.Store, request.StoreReasoning)
-}
-
-// handleConversationManagement handles conversation loading or creation and returns conversation, created flag, and error
-func (api *ConvCompletionAPI) handleConversationManagement(reqCtx *gin.Context, conversationID string, messages []openai.ChatCompletionMessage) (*conversation.Conversation, bool, *common.Error) {
-	if conversationID != "" {
-		// Try to load existing conversation
-		conv, convErr := api.loadConversation(reqCtx, conversationID)
-		if convErr != nil {
-			return nil, false, convErr
-		}
-		if conv.Title == nil || *conv.Title == "" || *conv.Title == DefaultConversationTitle {
-			title := api.generateTitleFromMessages(messages)
-			conv.Title = &title
-		}
-		return conv, false, nil
-	} else {
-		// Create new conversation
-		conv, conversationCreated := api.createNewConversation(reqCtx, messages)
-		return conv, conversationCreated, nil
-	}
-}
-
-// loadConversation loads an existing conversation by ID
-func (api *ConvCompletionAPI) loadConversation(reqCtx *gin.Context, conversationID string) (*conversation.Conversation, *common.Error) {
-	ctx := reqCtx.Request.Context()
-
-	// Get user from context (set by AppUserAuthMiddleware)
-	user, ok := auth.GetUserFromContext(reqCtx)
-	if !ok {
-		return nil, common.NewErrorWithMessage("User not authenticated", "c1d2e3f4-g5h6-7890-cdef-123456789012")
-	}
-
-	conv, convErr := api.conversationService.GetConversationByPublicIDAndUserID(ctx, conversationID, user.ID)
-	if convErr != nil {
-		return nil, common.NewErrorWithMessage(fmt.Sprintf("Conversation with ID '%s' not found", conversationID), "a1b2c3d4-e5f6-7890-abcd-ef1234567890")
-	}
-
-	if conv == nil {
-		return nil, common.NewErrorWithMessage(fmt.Sprintf("Conversation with ID '%s' not found", conversationID), "b2c3d4e5-f6g7-8901-bcde-f23456789012")
-	}
-
-	return conv, nil
-}
-
-// createNewConversation creates a new conversation
-func (api *ConvCompletionAPI) createNewConversation(reqCtx *gin.Context, messages []openai.ChatCompletionMessage) (*conversation.Conversation, bool) {
-	ctx := reqCtx.Request.Context()
-
-	// Get user from context (set by AppUserAuthMiddleware)
-	user, ok := auth.GetUserFromContext(reqCtx)
-	if !ok {
-		// If no user context, return nil
-		return nil, false
-	}
-
-	title := api.generateTitleFromMessages(messages)
-	conv, convErr := api.conversationService.CreateConversation(ctx, user.ID, &title, true, map[string]string{
-		"model": "jan-v1-4b", // Default model
-	})
-	if convErr != nil {
-		// If creation fails, return nil
-		return nil, false
-	}
-
-	return conv, true // Created new conversation
-}
-
-// TODO should be generate from models, now we just use the first user message
-// generateTitleFromMessages creates a title from the first user message
-func (api *ConvCompletionAPI) generateTitleFromMessages(messages []openai.ChatCompletionMessage) string {
-	if len(messages) == 0 {
-		return DefaultConversationTitle
-	}
-
-	// Find the first user message
-	for _, msg := range messages {
-		if msg.Role == "user" && msg.Content != "" {
-			title := strings.TrimSpace(msg.Content)
-			if len(title) > MaxTitleLength {
-				return title[:MaxTitleLength] + "..."
-			}
-			return title
-		}
-	}
-
-	return DefaultConversationTitle
-}
-
-// handleCompletionResponseAndUpdateConversation handles completion response based on finish_reason and updates conversation
-func (api *ConvCompletionAPI) handleCompletionResponseAndUpdateConversation(ctx context.Context, response *ExtendedCompletionResponse, conv *conversation.Conversation, userID uint, skipStorage bool) {
-	if conv == nil || len(response.Choices) == 0 {
-		return
-	}
-
-	// Loop through all choices in the response
-	for _, choice := range response.Choices {
-		finishReason := choice.FinishReason
-		message := choice.Message
-
-		// Skip storage if already handled by new store logic
-		if skipStorage {
-			continue
-		}
-
-		switch finishReason {
-		case "function_call":
-			// Save the function call to the conversation
-			if message.FunctionCall != nil {
-				api.saveFunctionCallToConversation(ctx, conv, userID, message.FunctionCall, message.ReasoningContent)
-			}
-		case "tool_calls":
-			// Save the tool calls to the conversation
-			if len(message.ToolCalls) > 0 {
-				api.saveToolCallsToConversation(ctx, conv, userID, message.ToolCalls, message.ReasoningContent)
-			}
-		case "stop":
-			// Save the response as assistant message to the conversation
-			if message.Content != "" {
-				api.saveAssistantMessageToConversation(ctx, conv, userID, message.Content, message.ReasoningContent)
-			}
-		case "length":
-			// Do nothing -> tracking via log
-			logger.GetLogger().Error("length finish reason: " + message.Content)
-		case "content_filter":
-			// Do nothing -> tracking via log
-			logger.GetLogger().Error("content filter finish reason: " + message.Content)
-		default:
-			// Handle unknown finish reasons
-			logger.GetLogger().Error("unknown finish reason: " + message.Content)
-		}
-	}
-}
-
-// saveFunctionCallToConversation saves a function call to the conversation
-func (api *ConvCompletionAPI) saveFunctionCallToConversation(ctx context.Context, conv *conversation.Conversation, userID uint, functionCall *openai.FunctionCall, reasoningContent string) {
-	if conv == nil || functionCall == nil {
-		return
-	}
-
-	functionCallContent := []conversation.Content{
-		{
-			Type: "text",
-			Text: &conversation.Text{
-				Value: fmt.Sprintf("Function: %s\nArguments: %s", functionCall.Name, functionCall.Arguments),
-			},
-		},
-	}
-
-	// Add reasoning content if present
-	if reasoningContent != "" {
-		functionCallContent[0].ReasoningContent = &reasoningContent
-	}
-
-	// Add the function call to conversation as a separate item
-	assistantRole := conversation.ItemRoleAssistant
-	api.conversationService.AddItem(ctx, conv, userID, conversation.ItemTypeFunction, &assistantRole, functionCallContent)
-}
-
-// saveToolCallsToConversation saves tool calls to the conversation
-func (api *ConvCompletionAPI) saveToolCallsToConversation(ctx context.Context, conv *conversation.Conversation, userID uint, toolCalls []openai.ToolCall, reasoningContent string) {
-	if conv == nil || len(toolCalls) == 0 {
-		return
-	}
-
-	// Save each tool call as a separate conversation item
-	for _, toolCall := range toolCalls {
-		toolCallContent := []conversation.Content{
-			{
-				Type: "text",
-				Text: &conversation.Text{
-					Value: fmt.Sprintf("Tool Call ID: %s\nType: %s\nFunction: %s\nArguments: %s",
-						toolCall.ID, toolCall.Type, toolCall.Function.Name, toolCall.Function.Arguments),
-				},
-			},
-		}
-
-		// Add reasoning content if present
-		if reasoningContent != "" {
-			toolCallContent[0].ReasoningContent = &reasoningContent
-		}
-
-		// Add the tool call to conversation as a separate item
-		assistantRole := conversation.ItemRoleAssistant
-		api.conversationService.AddItem(ctx, conv, userID, conversation.ItemTypeFunction, &assistantRole, toolCallContent)
-	}
-}
-
-// saveAssistantMessageToConversation saves assistant message to the conversation
-func (api *ConvCompletionAPI) saveAssistantMessageToConversation(ctx context.Context, conv *conversation.Conversation, userID uint, content string, reasoningContent string) {
-	if conv == nil || content == "" {
-		return
-	}
-
-	// Create content structure
-	conversationContent := []conversation.Content{
-		{
-			Type: "text",
-			Text: &conversation.Text{
-				Value: content,
-			},
-		},
-	}
-
-	// Add reasoning content if present
-	if reasoningContent != "" {
-		conversationContent[0].ReasoningContent = &reasoningContent
-	}
-
-	// Add the assistant message to conversation
-	assistantRole := conversation.ItemRoleAssistant
-	api.conversationService.AddItem(ctx, conv, userID, conversation.ItemTypeMessage, &assistantRole, conversationContent)
-}
-
-// StoreLastInputMessageIfRequested conditionally stores the last input message (user or tool) based on the store flag
-func (api *ConvCompletionAPI) StoreLastInputMessageIfRequested(ctx context.Context, request openai.ChatCompletionRequest, conv *conversation.Conversation, userID uint, askItemID string, completionItemID string, store bool, storeReasoning bool) *common.Error {
-	if !store {
-		return nil // Don't store if store flag is false
-	}
-
-	// Validate required parameters
-	if conv == nil {
-		return common.NewError(nil, "c1d2e3f4-g5h6-7890-abcd-ef1234567890")
-	}
-
-	// Store the latest input message (user or tool)
-	if len(request.Messages) == 0 {
-		return nil // No messages to store
-	}
-
-	latestMessage := request.Messages[len(request.Messages)-1]
-	role := conversation.ItemRole(latestMessage.Role)
-
-	content := []conversation.Content{
-		{
-			Type: "text",
-			Text: &conversation.Text{
-				Value: latestMessage.Content,
-			},
-		},
-	}
-
-	if _, err := api.conversationService.AddItemWithID(ctx, conv, userID, conversation.ItemTypeMessage, &role, content, askItemID); err != nil {
-		return err
-	}
-
-	return nil
-}
-
-// StoreAssistantResponseIfRequested conditionally stores the assistant response based on the store flag
-func (api *ConvCompletionAPI) StoreAssistantResponseIfRequested(ctx context.Context, response *ExtendedCompletionResponse, conv *conversation.Conversation, userID uint, completionItemID string, store bool, storeReasoning bool) (*conversation.Item, *common.Error) {
-	if !store {
-		return nil, nil // Don't store if store flag is false
-	}
-
-	// Validate required parameters
-	if response == nil {
-		return nil, common.NewErrorWithMessage("Response is nil", "d2e3f4g5-h6i7-8901-bcde-f23456789012")
-	}
-	if conv == nil {
-		return nil, common.NewErrorWithMessage("Conversation is nil", "e3f4g5h6-i7j8-9012-cdef-345678901234")
-	}
-
-	if len(response.Choices) == 0 {
-		return nil, common.NewErrorWithMessage("No choices to store", "01995b18-1638-719d-8ee2-01375bb2a19c")
-	}
-
-	choice := response.Choices[0]
-	content := choice.Message.Content
-	reasoningContent := choice.Message.ReasoningContent
-	finishReason := string(choice.FinishReason)
-
-	// Don't store if no content available
-	if content == "" && reasoningContent == "" {
-		return nil, nil
-	}
-
-	// Create content array based on finish reason
-	contentArray, err := api.createContentArray(choice, finishReason, content)
-	if err != nil {
-		return nil, err
-	}
-
-	// Add reasoning content if requested
-	if storeReasoning && reasoningContent != "" {
-		contentArray[0].ReasoningContent = &reasoningContent
-	}
-
-	role := conversation.ItemRoleAssistant
-	createdItem, err := api.conversationService.AddItemWithID(ctx, conv, userID, conversation.ItemTypeMessage, &role, contentArray, completionItemID)
-	if err != nil {
-		return nil, err
-	}
-
-	return createdItem, nil
-}
-
-// createContentArray creates the content array based on finish reason and choice
-func (api *ConvCompletionAPI) createContentArray(choice openai.ChatCompletionChoice, finishReason, content string) ([]conversation.Content, *common.Error) {
-	switch finishReason {
-	case "tool_calls":
-		if len(choice.Message.ToolCalls) > 0 {
-			toolCallsJSON, err := json.Marshal(choice.Message.ToolCalls)
-			if err != nil {
-				return nil, common.NewError(err, "f4g5h6i7-j8k9-0123-defg-456789012345")
-			}
-			return []conversation.Content{
-				{
-					Type:         "text",
-					FinishReason: &finishReason,
-					Text: &conversation.Text{
-						Value: string(toolCallsJSON),
-					},
-				},
-			}, nil
-		}
-	case "function_call":
-		if choice.Message.FunctionCall != nil {
-			functionCallJSON, err := json.Marshal(choice.Message.FunctionCall)
-			if err != nil {
-				return nil, common.NewError(err, "g5h6i7j8-k9l0-1234-efgh-567890123456")
-			}
-			return []conversation.Content{
-				{
-					Type:         "text",
-					FinishReason: &finishReason,
-					Text: &conversation.Text{
-						Value: string(functionCallJSON),
-					},
-				},
-			}, nil
-		}
-	}
-
-	// Default case: store regular content (for "stop" and other finish reasons)
-	return []conversation.Content{
-		{
-			Type:         "text",
-			FinishReason: &finishReason,
-			Text: &conversation.Text{
-				Value: content,
-			},
-		},
-	}, nil
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_completion_streaming_handler.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_completion_streaming_handler.go
deleted file mode 100644
index f6f055b8..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_completion_streaming_handler.go
+++ /dev/null
@@ -1,473 +0,0 @@
-package conv
-
-import (
-	"bufio"
-	"context"
-	"encoding/json"
-	"fmt"
-	"strings"
-	"sync"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/domain/common"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/domain/inference"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-)
-
-// Constants for streaming configuration
-const (
-	RequestTimeout    = 120 * time.Second
-	ChannelBufferSize = 100
-	ErrorBufferSize   = 10
-	DataPrefix        = "data: "
-	DoneMarker        = "[DONE]"
-)
-
-// CompletionStreamHandler handles streaming chat completions
-type CompletionStreamHandler struct {
-	inferenceProvider   inference.InferenceProvider
-	conversationService *conversation.ConversationService
-}
-
-// NewCompletionStreamHandler creates a new CompletionStreamHandler
-func NewCompletionStreamHandler(inferenceProvider inference.InferenceProvider, conversationService *conversation.ConversationService) *CompletionStreamHandler {
-	return &CompletionStreamHandler{
-		inferenceProvider:   inferenceProvider,
-		conversationService: conversationService,
-	}
-}
-
-// FunctionCallAccumulator handles streaming function call accumulation
-type FunctionCallAccumulator struct {
-	Name      string
-	Arguments string
-	Complete  bool
-}
-
-// ToolCallAccumulator handles streaming tool call accumulation
-type ToolCallAccumulator struct {
-	ID       string
-	Type     string
-	Index    int
-	Function struct {
-		Name      string
-		Arguments string
-	}
-	Complete bool
-}
-
-// StreamCompletionAndAccumulateResponse streams SSE events to client and accumulates a complete response for internal processing
-func (s *CompletionStreamHandler) StreamCompletionAndAccumulateResponse(reqCtx *gin.Context, apiKey string, request openai.ChatCompletionRequest, conv *conversation.Conversation, conversationCreated bool, askItemID string, completionItemID string) (*ExtendedCompletionResponse, *common.Error) {
-	// Add timeout context
-	ctx, cancel := context.WithTimeout(reqCtx.Request.Context(), RequestTimeout)
-	defer cancel()
-
-	// Set up SSE headers
-	s.setupSSEHeaders(reqCtx)
-
-	// Send conversation metadata event first
-	if conv != nil {
-		if err := s.sendConversationMetadata(reqCtx, conv, conversationCreated, askItemID, completionItemID); err != nil {
-			return nil, common.NewError(err, "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4")
-		}
-	}
-
-	// Create buffered channels for data and errors
-	dataChan := make(chan string, ChannelBufferSize)
-	errChan := make(chan error, ErrorBufferSize)
-
-	var wg sync.WaitGroup
-	wg.Add(1)
-
-	// Start streaming in a goroutine
-	go s.streamResponseToChannel(ctx, apiKey, request, dataChan, errChan, &wg)
-
-	// Accumulators for different types of content
-	var fullContent string
-	var fullReasoning string
-	var functionCallAccumulator = make(map[int]*FunctionCallAccumulator)
-	var toolCallAccumulator = make(map[int]*ToolCallAccumulator)
-
-	// Process data from channels
-	streamingComplete := false
-	for !streamingComplete {
-		select {
-		case line, ok := <-dataChan:
-			if !ok {
-				// Channel closed, streaming complete
-				streamingComplete = true
-				break
-			}
-
-			// Forward the raw line to client
-			if err := s.writeSSELine(reqCtx, line); err != nil {
-				return nil, common.NewError(err, "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4")
-			}
-
-			if data, found := strings.CutPrefix(line, DataPrefix); found {
-				if data == DoneMarker {
-					streamingComplete = true
-					break
-				}
-
-				// Process stream chunk and accumulate content
-				contentChunk, reasoningChunk, functionCallChunk, toolCallChunk := s.processStreamChunkForChannel(data)
-
-				// Accumulate content
-				if contentChunk != "" {
-					fullContent += contentChunk
-				}
-
-				// Accumulate reasoning
-				if reasoningChunk != "" {
-					fullReasoning += reasoningChunk
-				}
-
-				// Handle function call accumulation
-				if functionCallChunk != nil {
-					s.handleStreamingFunctionCall(functionCallChunk, functionCallAccumulator)
-				}
-
-				// Handle tool call accumulation
-				if toolCallChunk != nil {
-					s.handleStreamingToolCall(toolCallChunk, toolCallAccumulator)
-				}
-			}
-
-		case err, ok := <-errChan:
-			if !ok {
-				// Channel closed, no more errors
-				continue
-			}
-			if err != nil {
-				return nil, common.NewError(err, "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4")
-			}
-
-		case <-ctx.Done():
-			return nil, common.NewError(ctx.Err(), "bc82d69c-685b-4556-9d1f-2a4a80ae8ca4")
-		}
-	}
-
-	// Wait for streaming goroutine to complete and close channels
-	wg.Wait()
-
-	close(dataChan)
-	close(errChan)
-
-	// Build the complete response
-	response := s.buildCompleteResponse(fullContent, fullReasoning, functionCallAccumulator, toolCallAccumulator, completionItemID, request.Model, request)
-
-	// Return as ExtendedCompletionResponse
-	return &ExtendedCompletionResponse{
-		ChatCompletionResponse: response,
-	}, nil
-}
-
-// streamResponseToChannel streams the response from inference provider to channels
-func (s *CompletionStreamHandler) streamResponseToChannel(ctx context.Context, apiKey string, request openai.ChatCompletionRequest, dataChan chan<- string, errChan chan<- error, wg *sync.WaitGroup) {
-	defer wg.Done()
-
-	// Get streaming reader from inference provider
-	reader, err := s.inferenceProvider.CreateCompletionStream(ctx, apiKey, request)
-	if err != nil {
-		errChan <- err
-		return
-	}
-	defer func() {
-		if closeErr := reader.Close(); closeErr != nil {
-			// Log the close error but don't send it to errChan to avoid overriding the original error
-			// In a production environment, you might want to use a proper logger here
-			logger.GetLogger().Errorf("unable to close reader: %v", closeErr)
-		}
-	}()
-
-	scanner := bufio.NewScanner(reader)
-	for scanner.Scan() {
-		select {
-		case <-ctx.Done():
-			errChan <- ctx.Err()
-			return
-		default:
-			line := scanner.Text()
-			dataChan <- line
-		}
-	}
-
-	if err := scanner.Err(); err != nil {
-		errChan <- err
-		return
-	}
-}
-
-// setupSSEHeaders sets up the required headers for Server-Sent Events
-func (s *CompletionStreamHandler) setupSSEHeaders(reqCtx *gin.Context) {
-	reqCtx.Header("Content-Type", "text/event-stream")
-	reqCtx.Header("Cache-Control", "no-cache")
-	reqCtx.Header("Connection", "keep-alive")
-	reqCtx.Header("Access-Control-Allow-Origin", "*")
-	reqCtx.Header("Access-Control-Allow-Headers", "Cache-Control")
-}
-
-// writeSSELine writes a line to the SSE stream
-func (s *CompletionStreamHandler) writeSSELine(reqCtx *gin.Context, line string) error {
-	_, err := reqCtx.Writer.Write([]byte(line + "\n"))
-	if err != nil {
-		return err
-	}
-	reqCtx.Writer.Flush()
-	return nil
-}
-
-// writeSSEEvent writes a properly formatted SSE event
-func (s *CompletionStreamHandler) writeSSEEvent(reqCtx *gin.Context, data string) error {
-	_, err := reqCtx.Writer.Write([]byte(fmt.Sprintf("data: %s\n\n", data)))
-	if err != nil {
-		return err
-	}
-	reqCtx.Writer.Flush()
-	return nil
-}
-
-// sendConversationMetadata sends conversation metadata as SSE event
-func (s *CompletionStreamHandler) sendConversationMetadata(reqCtx *gin.Context, conv *conversation.Conversation, conversationCreated bool, askItemID string, completionItemID string) error {
-	if conv == nil {
-		return nil
-	}
-
-	metadata := ResponseMetadata{
-		ConversationID:      conv.PublicID,
-		ConversationCreated: conversationCreated,
-		ConversationTitle:   *conv.Title,
-		AskItemId:           askItemID,
-		CompletionItemId:    completionItemID,
-	}
-
-	jsonData, err := json.Marshal(metadata)
-	if err != nil {
-		return err
-	}
-
-	// Send proper SSE formatted event with double newline
-	return s.writeSSEEvent(reqCtx, string(jsonData))
-}
-
-// processStreamChunkForChannel processes a single stream chunk and returns separate chunks
-func (s *CompletionStreamHandler) processStreamChunkForChannel(data string) (string, string, *openai.FunctionCall, *openai.ToolCall) {
-	// Parse the JSON data to extract content and calls
-	var streamData struct {
-		Choices []struct {
-			Delta struct {
-				Content          string               `json:"content"`
-				ReasoningContent string               `json:"reasoning_content"`
-				FunctionCall     *openai.FunctionCall `json:"function_call,omitempty"`
-				ToolCalls        []openai.ToolCall    `json:"tool_calls,omitempty"`
-			} `json:"delta"`
-		} `json:"choices"`
-	}
-
-	if err := json.Unmarshal([]byte(data), &streamData); err != nil {
-		// Log JSON parsing errors for debugging
-		logger.GetLogger().Errorf("failed to parse stream chunk JSON: %v, data: %s", err, data)
-		return "", "", nil, nil
-	}
-
-	// Extract content, reasoning content, function calls, and tool calls from all choices
-	var contentChunk string
-	var reasoningChunk string
-	var functionCall *openai.FunctionCall
-	var toolCall *openai.ToolCall
-
-	for _, choice := range streamData.Choices {
-		// Check for regular content
-		if choice.Delta.Content != "" {
-			contentChunk += choice.Delta.Content
-		}
-
-		// Check for reasoning content
-		if choice.Delta.ReasoningContent != "" {
-			reasoningChunk += choice.Delta.ReasoningContent
-		}
-
-		// Extract function calls (legacy format)
-		if choice.Delta.FunctionCall != nil {
-			functionCall = choice.Delta.FunctionCall
-		}
-
-		// Extract tool calls (new format)
-		if len(choice.Delta.ToolCalls) > 0 {
-			toolCall = &choice.Delta.ToolCalls[0]
-		}
-	}
-
-	// Return separate chunks
-	return contentChunk, reasoningChunk, functionCall, toolCall
-}
-
-// handleStreamingFunctionCall handles function call accumulation
-func (s *CompletionStreamHandler) handleStreamingFunctionCall(functionCall *openai.FunctionCall, accumulator map[int]*FunctionCallAccumulator) {
-	if functionCall == nil {
-		return
-	}
-
-	// Use index 0 for function calls (legacy format doesn't have index)
-	index := 0
-	if accumulator[index] == nil {
-		accumulator[index] = &FunctionCallAccumulator{}
-	}
-
-	// Add chunk to accumulator
-	if functionCall.Name != "" {
-		accumulator[index].Name = functionCall.Name
-	}
-	if functionCall.Arguments != "" {
-		accumulator[index].Arguments += functionCall.Arguments
-	}
-
-	// Check if complete (has name and arguments ending with })
-	if accumulator[index].Name != "" && accumulator[index].Arguments != "" && strings.HasSuffix(accumulator[index].Arguments, "}") {
-		accumulator[index].Complete = true
-	}
-}
-
-// handleStreamingToolCall handles tool call accumulation
-func (s *CompletionStreamHandler) handleStreamingToolCall(toolCall *openai.ToolCall, accumulator map[int]*ToolCallAccumulator) {
-	if toolCall == nil || toolCall.Index == nil {
-		return
-	}
-
-	index := *toolCall.Index
-	if accumulator[index] == nil {
-		accumulator[index] = &ToolCallAccumulator{
-			ID:    toolCall.ID,
-			Type:  string(toolCall.Type),
-			Index: index,
-		}
-	}
-
-	// Add chunk to accumulator
-	if toolCall.Function.Name != "" {
-		accumulator[index].Function.Name = toolCall.Function.Name
-	}
-	if toolCall.Function.Arguments != "" {
-		accumulator[index].Function.Arguments += toolCall.Function.Arguments
-	}
-
-	// Check if complete (has name and arguments ending with })
-	if accumulator[index].Function.Name != "" && accumulator[index].Function.Arguments != "" && strings.HasSuffix(accumulator[index].Function.Arguments, "}") {
-		accumulator[index].Complete = true
-	}
-}
-
-// buildCompleteResponse builds the complete ChatCompletionResponse from accumulated data
-func (s *CompletionStreamHandler) buildCompleteResponse(content string, reasoning string, functionCallAccumulator map[int]*FunctionCallAccumulator, toolCallAccumulator map[int]*ToolCallAccumulator, completionItemID string, model string, request openai.ChatCompletionRequest) openai.ChatCompletionResponse {
-	// Build a single choice that combines all content, reasoning, and calls
-	message := openai.ChatCompletionMessage{
-		Role:    openai.ChatMessageRoleAssistant,
-		Content: content,
-	}
-
-	// Add reasoning content if present
-	if reasoning != "" {
-		message.ReasoningContent = reasoning
-	}
-
-	var finishReason openai.FinishReason = openai.FinishReasonStop
-
-	// Check for function calls first (legacy format)
-	if len(functionCallAccumulator) > 0 {
-		for _, acc := range functionCallAccumulator {
-			if acc.Complete {
-				message.FunctionCall = &openai.FunctionCall{
-					Name:      acc.Name,
-					Arguments: acc.Arguments,
-				}
-				finishReason = openai.FinishReasonFunctionCall
-				break
-			}
-		}
-	}
-
-	// Check for tool calls (new format) - these take precedence over function calls
-	if len(toolCallAccumulator) > 0 {
-		var toolCalls []openai.ToolCall
-		for _, acc := range toolCallAccumulator {
-			if acc.Complete {
-				toolCalls = append(toolCalls, openai.ToolCall{
-					ID:   acc.ID,
-					Type: openai.ToolType(acc.Type),
-					Function: openai.FunctionCall{
-						Name:      acc.Function.Name,
-						Arguments: acc.Function.Arguments,
-					},
-				})
-			}
-		}
-
-		if len(toolCalls) > 0 {
-			message.ToolCalls = toolCalls
-			finishReason = openai.FinishReasonToolCalls
-		}
-	}
-
-	// Create the single choice with all combined content
-	choices := []openai.ChatCompletionChoice{
-		{
-			Index:        0,
-			Message:      message,
-			FinishReason: finishReason,
-		},
-	}
-
-	// Calculate token usage
-	promptTokens := s.estimateTokens(request.Messages)
-	completionTokens := s.estimateTokens([]openai.ChatCompletionMessage{message})
-	totalTokens := promptTokens + completionTokens
-
-	return openai.ChatCompletionResponse{
-		ID:      completionItemID,
-		Object:  "chat.completion",
-		Created: time.Now().Unix(),
-		Model:   model,
-		Choices: choices,
-		Usage: openai.Usage{
-			PromptTokens:     promptTokens,
-			CompletionTokens: completionTokens,
-			TotalTokens:      totalTokens,
-		},
-	}
-}
-
-// TODO it's raw solution, we need to use the official openai tokenizer like tiktoken
-// estimateTokens provides a rough estimation of token count for messages
-func (s *CompletionStreamHandler) estimateTokens(messages []openai.ChatCompletionMessage) int {
-	var allText strings.Builder
-
-	for _, msg := range messages {
-		allText.WriteString(msg.Content)
-		allText.WriteString(" ")
-
-		if msg.FunctionCall != nil {
-			allText.WriteString(msg.FunctionCall.Name)
-			allText.WriteString(" ")
-			allText.WriteString(msg.FunctionCall.Arguments)
-			allText.WriteString(" ")
-		}
-
-		for _, toolCall := range msg.ToolCalls {
-			allText.WriteString(toolCall.ID)
-			allText.WriteString(" ")
-			allText.WriteString(toolCall.Function.Name)
-			allText.WriteString(" ")
-			allText.WriteString(toolCall.Function.Arguments)
-			allText.WriteString(" ")
-		}
-	}
-
-	// Split by spaces and count words, but normalize whitespace
-	normalized := strings.Join(strings.Fields(allText.String()), " ") // Collapse multiple spaces
-	words := strings.Fields(normalized)
-	return len(words)
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_mcp_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_mcp_route.go
deleted file mode 100644
index d6327faa..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conv/conv_mcp_route.go
+++ /dev/null
@@ -1,105 +0,0 @@
-package conv
-
-import (
-	"bytes"
-	"encoding/json"
-	"io"
-
-	"github.com/gin-gonic/gin"
-	mcpserver "github.com/mark3labs/mcp-go/server"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	mcpimpl "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp/mcp_impl"
-)
-
-// ConvMCPAPI handles MCP (Model Context Protocol) endpoints for conversation-aware chat
-type ConvMCPAPI struct {
-	authService *auth.AuthService
-	serperMCP   *mcpimpl.SerperMCP
-	mcpServer   *mcpserver.MCPServer
-}
-
-// NewConvMCPAPI creates a new ConvMCPAPI instance
-func NewConvMCPAPI(authService *auth.AuthService, serperMCP *mcpimpl.SerperMCP) *ConvMCPAPI {
-	mcpSrv := mcpserver.NewMCPServer("conv-mcp-demo", "0.1.0",
-		mcpserver.WithToolCapabilities(true),
-		mcpserver.WithRecovery(),
-	)
-	return &ConvMCPAPI{
-		authService: authService,
-		serperMCP:   serperMCP,
-		mcpServer:   mcpSrv,
-	}
-}
-
-// RegisterRouter registers the MCP routes for conversation-aware chat
-// ConvMCP
-// @Summary MCP streamable endpoint for conversation-aware chat
-// @Description Handles Model Context Protocol (MCP) requests over an HTTP stream for conversation-aware chat functionality. The response is sent as a continuous stream of data with conversation context.
-// @Tags Conversation-aware Chat API
-// @Security BearerAuth
-// @Accept json
-// @Produce text/event-stream
-// @Param request body any true "MCP request payload"
-// @Success 200 {string} string "Streamed response (SSE or chunked transfer)"
-// @Router /v1/conv/mcp [post]
-func (api *ConvMCPAPI) RegisterRouter(router *gin.RouterGroup) {
-	// Register MCP endpoint (without RegisteredUserMiddleware to avoid content type conflicts)
-	api.serperMCP.RegisterTool(api.mcpServer)
-	mcpHttpHandler := mcpserver.NewStreamableHTTPServer(api.mcpServer)
-
-	// Create a separate router group for MCP that only uses AppUserAuthMiddleware
-	// This avoids the content type conflicts that RegisteredUserMiddleware can cause
-	mcpRouter := router.Group("")
-	mcpRouter.Any(
-		"/mcp",
-		api.authService.AppUserAuthMiddleware(),
-		MCPMethodGuard(map[string]bool{
-			// Initialization / handshake
-			"initialize":                true,
-			"notifications/initialized": true,
-			"ping":                      true,
-
-			// Tools
-			"tools/list": true,
-			"tools/call": true,
-
-			// Prompts
-			"prompts/list": true,
-			"prompts/call": true,
-
-			// Resources
-			"resources/list":           true,
-			"resources/templates/list": true,
-			"resources/read":           true,
-
-			// If you support subscription:
-			"resources/subscribe": true,
-		}),
-		gin.WrapH(mcpHttpHandler))
-}
-
-// MCPMethodGuard is a middleware that guards MCP methods
-func MCPMethodGuard(allowedMethods map[string]bool) gin.HandlerFunc {
-	return func(c *gin.Context) {
-		bodyBytes, err := io.ReadAll(c.Request.Body)
-		if err != nil {
-			c.Abort()
-			return
-		}
-		c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
-		var req struct {
-			Method string `json:"method"`
-		}
-
-		if err := json.Unmarshal(bodyBytes, &req); err != nil {
-			c.Abort()
-			return
-		}
-
-		if !allowedMethods[req.Method] {
-			c.Abort()
-			return
-		}
-		c.Next()
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conversations/conversations_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conversations/conversations_route.go
deleted file mode 100644
index 24e378e4..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/conversations/conversations_route.go
+++ /dev/null
@@ -1,844 +0,0 @@
-package conversations
-
-import (
-	"fmt"
-	"net/http"
-
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses/openai"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-// ConversationAPI handles route registration for V1 conversations
-type ConversationAPI struct {
-	conversationService *conversation.ConversationService
-	authService         *auth.AuthService
-}
-
-// Request structs
-type CreateConversationRequest struct {
-	Title    string                    `json:"title"`
-	Metadata map[string]string         `json:"metadata,omitempty"`
-	Items    []ConversationItemRequest `json:"items,omitempty"`
-}
-
-type UpdateConversationRequest struct {
-	Title    *string            `json:"title"`
-	Metadata *map[string]string `json:"metadata"`
-}
-
-type ConversationItemRequest struct {
-	Type    string                       `json:"type" binding:"required"`
-	Role    conversation.ItemRole        `json:"role,omitempty"`
-	Content []ConversationContentRequest `json:"content" binding:"required"`
-}
-
-type ConversationContentRequest struct {
-	Type string `json:"type" binding:"required"`
-	Text string `json:"text,omitempty"`
-}
-
-type CreateItemsRequest struct {
-	Items []ConversationItemRequest `json:"items" binding:"required"`
-}
-
-// Response structs
-type ExtendedConversationResponse struct {
-	ID        string            `json:"id"`
-	Title     string            `json:"title"`
-	Object    string            `json:"object"`
-	CreatedAt int64             `json:"created_at"`
-	Metadata  map[string]string `json:"metadata"`
-}
-
-type DeletedConversationResponse struct {
-	ID      string `json:"id"`
-	Object  string `json:"object"`
-	Deleted bool   `json:"deleted"`
-}
-
-type ConversationItemResponse struct {
-	ID        string            `json:"id"`
-	Object    string            `json:"object"`
-	Type      string            `json:"type"`
-	Role      *string           `json:"role,omitempty"`
-	Status    *string           `json:"status,omitempty"`
-	CreatedAt int64             `json:"created_at"`
-	Content   []ContentResponse `json:"content,omitempty"`
-}
-
-type ContentResponse struct {
-	Type             string                `json:"type"`
-	FinishReason     *string               `json:"finish_reason,omitempty"`
-	Text             *TextResponse         `json:"text,omitempty"`
-	InputText        *string               `json:"input_text,omitempty"`
-	OutputText       *OutputTextResponse   `json:"output_text,omitempty"`
-	ReasoningContent *string               `json:"reasoning_content,omitempty"`
-	Image            *ImageContentResponse `json:"image,omitempty"`
-	File             *FileContentResponse  `json:"file,omitempty"`
-}
-
-type TextResponse struct {
-	Value string `json:"value"`
-}
-
-type OutputTextResponse struct {
-	Text        string               `json:"text"`
-	Annotations []AnnotationResponse `json:"annotations"`
-}
-
-type ImageContentResponse struct {
-	URL    string `json:"url,omitempty"`
-	FileID string `json:"file_id,omitempty"`
-	Detail string `json:"detail,omitempty"`
-}
-
-type FileContentResponse struct {
-	FileID   string `json:"file_id"`
-	Name     string `json:"name,omitempty"`
-	MimeType string `json:"mime_type,omitempty"`
-	Size     int64  `json:"size,omitempty"`
-}
-
-type AnnotationResponse struct {
-	Type       string `json:"type"`
-	Text       string `json:"text,omitempty"`
-	FileID     string `json:"file_id,omitempty"`
-	URL        string `json:"url,omitempty"`
-	StartIndex int    `json:"start_index"`
-	EndIndex   int    `json:"end_index"`
-	Index      int    `json:"index,omitempty"`
-}
-
-// NewConversationAPI creates a new conversation API instance
-func NewConversationAPI(
-	conversationService *conversation.ConversationService,
-	authService *auth.AuthService) *ConversationAPI {
-	return &ConversationAPI{
-		conversationService,
-		authService,
-	}
-}
-
-// RegisterRouter registers OpenAI-compatible conversation routes
-func (api *ConversationAPI) RegisterRouter(router *gin.RouterGroup) {
-	conversationsRouter := router.Group("/conversations",
-		api.authService.AppUserAuthMiddleware(),
-		api.authService.RegisteredUserMiddleware(),
-	)
-
-	conversationsRouter.POST("", api.CreateConversationHandler)
-	conversationsRouter.GET("", api.ListConversationsHandler)
-
-	conversationMiddleWare := api.conversationService.GetConversationMiddleWare()
-	conversationsRouter.GET(fmt.Sprintf("/:%s", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.GetConversationHandler)
-	conversationsRouter.PATCH(fmt.Sprintf("/:%s", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.UpdateConversationHandler)
-	conversationsRouter.DELETE(fmt.Sprintf("/:%s", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.DeleteConversationHandler)
-	conversationsRouter.POST(fmt.Sprintf("/:%s/items", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.CreateItemsHandler)
-	conversationsRouter.GET(fmt.Sprintf("/:%s/items", conversation.ConversationContextKeyPublicID), conversationMiddleWare, api.ListItemsHandler)
-
-	conversationItemMiddleWare := api.conversationService.GetConversationItemMiddleWare()
-	conversationsRouter.GET(
-		fmt.Sprintf(
-			"/:%s/items/:%s",
-			conversation.ConversationContextKeyPublicID,
-			conversation.ConversationItemContextKeyPublicID,
-		),
-		conversationMiddleWare,
-		conversationItemMiddleWare,
-		api.GetItemHandler,
-	)
-	conversationsRouter.DELETE(
-		fmt.Sprintf(
-			"/:%s/items/:%s",
-			conversation.ConversationContextKeyPublicID,
-			conversation.ConversationItemContextKeyPublicID,
-		),
-		conversationMiddleWare,
-		conversationItemMiddleWare,
-		api.DeleteItemHandler,
-	)
-}
-
-// @Summary List Conversations
-// @Description Retrieves a paginated list of conversations for the authenticated user with OpenAI-compatible response format.
-// @Tags Conversations API
-// @Security BearerAuth
-// @Param limit query int false "The maximum number of items to return" default(20)
-// @Param after query string false "A cursor for use in pagination. The ID of the last object from the previous page"
-// @Param order query string false "Order of items (asc/desc)"
-// @Success 200 {object} openai.ListResponse[ExtendedConversationResponse] "Successfully retrieved the list of conversations"
-// @Failure 400 {object} responses.ErrorResponse "Bad Request - Invalid pagination parameters"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 500 {object} responses.ErrorResponse "Internal Server Error"
-// @Router /v1/conversations [get]
-func (api *ConversationAPI) ListConversationsHandler(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	user, _ := auth.GetUserFromContext(reqCtx)
-	userID := user.ID
-
-	pagination, err := query.GetCursorPaginationFromQuery(reqCtx, func(lastID string) (*uint, error) {
-		convs, convErr := api.conversationService.FindConversationsByFilter(ctx, conversation.ConversationFilter{
-			UserID:   &userID,
-			PublicID: &lastID,
-		}, nil)
-		if convErr != nil {
-			return nil, convErr
-		}
-		if len(convs) != 1 {
-			return nil, fmt.Errorf("invalid conversation")
-		}
-		return &convs[0].ID, nil
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "5f89e23d-d4a0-45ce-ba43-ae2a9be0ca64",
-			Error: "Invalid pagination parameters",
-		})
-		return
-	}
-
-	filter := conversation.ConversationFilter{
-		UserID: &userID,
-	}
-	conversations, convErr := api.conversationService.FindConversationsByFilter(ctx, filter, pagination)
-	if convErr != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "019952d5-6876-7323-96fa-89784b7d082e",
-			ErrorInstance: convErr.GetError(),
-		})
-		return
-	}
-	count, countErr := api.conversationService.CountConversationsByFilter(ctx, filter)
-	if countErr != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "019952d5-1afc-7229-bb57-6928f54d5171",
-			ErrorInstance: convErr.GetError(),
-		})
-		return
-	}
-	var firstId *string
-	var lastId *string
-	hasMore := false
-	if len(conversations) > 0 {
-		firstId = &conversations[0].PublicID
-		lastId = &conversations[len(conversations)-1].PublicID
-		moreRecords, moreErr := api.conversationService.FindConversationsByFilter(ctx, filter, &query.Pagination{
-			Order: pagination.Order,
-			Limit: ptr.ToInt(1),
-			After: &conversations[len(conversations)-1].ID,
-		})
-		if moreErr != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:          "019952d5-983a-73a8-8439-290ae4b4ee51",
-				ErrorInstance: convErr.GetError(),
-			})
-			return
-		}
-		if len(moreRecords) != 0 {
-			hasMore = true
-		}
-	}
-
-	result := functional.Map(conversations, domainToExtendedConversationResponse)
-
-	response := openai.ListResponse[*ExtendedConversationResponse]{
-		Object:  "list",
-		FirstID: firstId,
-		LastID:  lastId,
-		Total:   count,
-		HasMore: hasMore,
-		Data:    result,
-	}
-
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// @Summary Create a conversation
-// @Description Creates a new conversation for the authenticated user with optional items
-// @Tags Conversations API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param request body CreateConversationRequest true "Create conversation request"
-// @Success 200 {object} ExtendedConversationResponse "Created conversation"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request - Bad payload, too many items, or invalid item format"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/conversations [post]
-func (api *ConversationAPI) CreateConversationHandler(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	user, _ := auth.GetUserFromContext(reqCtx)
-	userId := user.ID
-
-	var request CreateConversationRequest
-	if err := reqCtx.ShouldBindJSON(&request); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "e5c96a9e-7ff9-4408-9514-9d206ca85b33",
-			ErrorInstance: err,
-		})
-		return
-	}
-
-	if len(request.Items) > 20 {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "0e5b8426-b1d2-4114-ac81-d3982dc497cf",
-			Error: "Too many items",
-		})
-		return
-	}
-
-	itemsToCreate := make([]*conversation.Item, len(request.Items))
-
-	for i, itemReq := range request.Items {
-		item, ok := NewItemFromConversationItemRequest(itemReq)
-		if !ok {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code:  "1fe8d03b-9e1e-4e52-b5b5-77a25954fc43",
-				Error: "Invalid item format",
-			})
-			return
-		}
-		itemsToCreate[i] = item
-	}
-
-	err := api.conversationService.ValidateItems(ctx, itemsToCreate)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "019952d0-1dc9-746e-82ff-dd42b1e7930f",
-			ErrorInstance: err.GetError(),
-		})
-		return
-	}
-
-	// Create conversation
-	conv, err := api.conversationService.CreateConversation(ctx, userId, &request.Title, true, request.Metadata)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "019952d0-3e32-76ba-a97f-711223df2c84",
-			ErrorInstance: err.GetError(),
-		})
-		return
-	}
-
-	// Add items if provided using batch operation
-	if len(request.Items) > 0 {
-		_, err := api.conversationService.AddMultipleItems(ctx, conv, userId, itemsToCreate)
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code:          "019952d0-6d70-7419-81ae-828d8009ee56",
-				ErrorInstance: err.GetError(),
-			})
-			return
-		}
-	}
-
-	response := domainToExtendedConversationResponse(conv)
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// @Summary Get a conversation
-// @Description Retrieves a conversation by its ID with full metadata and title
-// @Tags Conversations API
-// @Security BearerAuth
-// @Produce json
-// @Param conversation_id path string true "Conversation ID"
-// @Success 200 {object} ExtendedConversationResponse "Conversation details"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Conversation not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/conversations/{conversation_id} [get]
-func (api *ConversationAPI) GetConversationHandler(reqCtx *gin.Context) {
-	conv, ok := conversation.GetConversationFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	response := domainToExtendedConversationResponse(conv)
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// @Summary Update a conversation
-// @Description Updates conversation title and/or metadata
-// @Tags Conversations API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param conversation_id path string true "Conversation ID"
-// @Param request body UpdateConversationRequest true "Update conversation request"
-// @Success 200 {object} ExtendedConversationResponse "Updated conversation"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request payload or update failed"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Conversation not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/conversations/{conversation_id} [patch]
-func (api *ConversationAPI) UpdateConversationHandler(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	conv, ok := conversation.GetConversationFromContext(reqCtx)
-	if !ok {
-		return
-	}
-
-	var request UpdateConversationRequest
-	if err := reqCtx.ShouldBindJSON(&request); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "4183e285-08ef-4a79-8a68-d53cddd0c0e2",
-			Error: "Invalid request payload",
-		})
-		return
-	}
-
-	if request.Title != nil {
-		conv.Title = request.Title
-	}
-	if request.Metadata != nil {
-		conv.Metadata = *request.Metadata
-	}
-
-	conv, err := api.conversationService.UpdateConversation(ctx, conv)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "019952d0-a754-73bc-adbc-781ac31e12d7",
-			ErrorInstance: err,
-		})
-		return
-	}
-
-	response := domainToExtendedConversationResponse(conv)
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// @Summary Delete a conversation
-// @Description Deletes a conversation and all its items permanently
-// @Tags Conversations API
-// @Security BearerAuth
-// @Produce json
-// @Param conversation_id path string true "Conversation ID"
-// @Success 200 {object} DeletedConversationResponse "Deleted conversation"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Conversation not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/conversations/{conversation_id} [delete]
-func (api *ConversationAPI) DeleteConversationHandler(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	conv, ok := conversation.GetConversationFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "a4fb6e9b-00c8-423c-9836-a83080e34d28",
-			Error: "Conversation not found",
-		})
-		return
-	}
-
-	success, err := api.conversationService.DeleteConversation(ctx, conv)
-	if !success {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "019952c3-9836-75ea-9785-a8d035a7c136",
-			ErrorInstance: err.GetError(),
-		})
-	}
-	response := domainToDeletedConversationResponse(conv)
-
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// @Summary Create items in a conversation
-// @Description Adds multiple items to a conversation with OpenAI-compatible format
-// @Tags Conversations API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param conversation_id path string true "Conversation ID"
-// @Param request body CreateItemsRequest true "Create items request"
-// @Success 200 {object} openai.ListResponse[ConversationItemResponse] "Created items"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request payload or invalid item format"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Conversation not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/conversations/{conversation_id}/items [post]
-func (api *ConversationAPI) CreateItemsHandler(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	conv, _ := conversation.GetConversationFromContext(reqCtx)
-
-	var request CreateItemsRequest
-	if err := reqCtx.ShouldBindJSON(&request); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "a4fb6e9b-00c8-423c-9836-a83080e34d28",
-			Error: "Invalid request payload",
-		})
-		return
-	}
-
-	itemsToCreate := make([]*conversation.Item, len(request.Items))
-	for i, itemReq := range request.Items {
-		item, ok := NewItemFromConversationItemRequest(itemReq)
-		if !ok {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code:  "a4fb6e9b-00c8-423c-9836-a83080e34d28",
-				Error: "Invalid item format",
-			})
-			return
-		}
-		itemsToCreate[i] = item
-	}
-
-	err := api.conversationService.ValidateItems(ctx, itemsToCreate)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "019952d1-265e-738b-bada-7918c32a61d2",
-			ErrorInstance: err.GetError(),
-		})
-		return
-	}
-
-	createdItems, err := api.conversationService.AddMultipleItems(ctx, conv, conv.UserID, itemsToCreate)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "019952d1-68dc-73f3-84e3-0f53d8fce318",
-			ErrorInstance: err.GetError(),
-		})
-		return
-	}
-
-	var firstId *string
-	var lastId *string
-	if len(createdItems) > 0 {
-		firstId = &createdItems[0].PublicID
-		lastId = &createdItems[len(createdItems)-1].PublicID
-	}
-
-	response := &openai.ListResponse[*ConversationItemResponse]{
-		Object:  "list",
-		Data:    functional.Map(createdItems, domainToConversationItemResponse),
-		FirstID: firstId,
-		LastID:  lastId,
-		HasMore: false,
-		Total:   int64(len(createdItems)),
-	}
-
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// @Summary List items in a conversation
-// @Description Lists all items in a conversation with OpenAI-compatible pagination
-// @Tags Conversations API
-// @Security BearerAuth
-// @Produce json
-// @Param conversation_id path string true "Conversation ID"
-// @Param limit query int false "Number of items to return (1-100)"
-// @Param after query string false "Cursor for pagination - ID of the last item from previous page"
-// @Param order query string false "Order of items (asc/desc)"
-// @Success 200 {object} openai.ListResponse[ConversationItemResponse] "List of items"
-// @Failure 400 {object} responses.ErrorResponse "Bad Request - Invalid pagination parameters"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Conversation not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/conversations/{conversation_id}/items [get]
-func (api *ConversationAPI) ListItemsHandler(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	conv, _ := conversation.GetConversationFromContext(reqCtx)
-
-	pagination, err := query.GetCursorPaginationFromQuery(reqCtx, func(lastID string) (*uint, error) {
-		items, err := api.conversationService.FindItemsByFilter(ctx, conversation.ItemFilter{
-			PublicID:       &lastID,
-			ConversationID: &conv.ID,
-		}, nil)
-		if err != nil {
-			return nil, fmt.Errorf("%s: %s", err.GetCode(), err.Error())
-		}
-		if len(items) != 1 {
-			return nil, fmt.Errorf("invalid conversation")
-		}
-		return &items[0].ID, nil
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "e9144b73-6fc1-4b16-b9c7-460d8a4ecf6b",
-			Error: "Invalid pagination parameters",
-		})
-		return
-	}
-
-	filter := conversation.ItemFilter{
-		ConversationID: &conv.ID,
-	}
-	itemEntities, filterErr := api.conversationService.FindItemsByFilter(ctx, filter, pagination)
-	if filterErr != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "019952d1-a6d2-76ff-9c10-3e9264056f90",
-			ErrorInstance: filterErr.GetError(),
-		})
-		return
-	}
-
-	var firstId *string
-	var lastId *string
-	hasMore := false
-	if len(itemEntities) > 0 {
-		firstId = &itemEntities[0].PublicID
-		lastId = &itemEntities[len(itemEntities)-1].PublicID
-		moreRecords, moreErr := api.conversationService.FindItemsByFilter(ctx, filter, &query.Pagination{
-			Order: pagination.Order,
-			Limit: ptr.ToInt(1),
-			After: &itemEntities[len(itemEntities)-1].ID,
-		})
-		if moreErr != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:          "019952d1-e914-7466-b527-49e498129426",
-				ErrorInstance: moreErr.GetError(),
-			})
-			return
-		}
-		if len(moreRecords) != 0 {
-			hasMore = true
-		}
-	}
-
-	response := &openai.ListResponse[*ConversationItemResponse]{
-		Object:  "list",
-		Data:    functional.Map(itemEntities, domainToConversationItemResponse),
-		FirstID: firstId,
-		LastID:  lastId,
-		HasMore: hasMore,
-		Total:   int64(len(itemEntities)),
-	}
-
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// @Summary Get an item from a conversation
-// @Description Retrieves a specific item from a conversation with full content details
-// @Tags Conversations API
-// @Security BearerAuth
-// @Produce json
-// @Param conversation_id path string true "Conversation ID"
-// @Param item_id path string true "Item ID"
-// @Success 200 {object} ConversationItemResponse "Item details"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Conversation or item not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/conversations/{conversation_id}/items/{item_id} [get]
-func (api *ConversationAPI) GetItemHandler(reqCtx *gin.Context) {
-	item, ok := conversation.GetConversationItemFromContext(reqCtx)
-	if !ok {
-		return
-	}
-
-	response := domainToConversationItemResponse(item)
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// @Summary Delete an item from a conversation
-// @Description Deletes a specific item from a conversation and returns the deleted item details
-// @Tags Conversations API
-// @Security BearerAuth
-// @Produce json
-// @Param conversation_id path string true "Conversation ID"
-// @Param item_id path string true "Item ID"
-// @Success 200 {object} ConversationItemResponse "Deleted item details"
-// @Failure 400 {object} responses.ErrorResponse "Bad Request - Deletion failed"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Conversation or item not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/conversations/{conversation_id}/items/{item_id} [delete]
-func (api *ConversationAPI) DeleteItemHandler(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	conv, ok := conversation.GetConversationFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "8fcd7439-a81c-48d3-9208-33afaa7146ac",
-		})
-		return
-	}
-	item, ok := conversation.GetConversationItemFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "8a03dd04-0a8d-40b5-8664-01ddfb8bcb48",
-		})
-		return
-	}
-
-	// Use efficient deletion with item public ID instead of loading all items
-	_, err := api.conversationService.DeleteItemWithConversation(ctx, conv, item)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "019952d2-0f0f-730c-9abc-3fecc1db55c2",
-			ErrorInstance: err,
-		})
-		return
-	}
-
-	// OpenAI: Returns the updated Conversation object.
-	response := domainToExtendedConversationResponse(conv)
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-func NewItemFromConversationItemRequest(itemReq ConversationItemRequest) (*conversation.Item, bool) {
-	ok := conversation.ValidateItemType(string(itemReq.Type))
-	if !ok {
-		return nil, false
-	}
-	itemType := conversation.ItemType(itemReq.Type)
-
-	var role *conversation.ItemRole
-	if itemReq.Role != "" {
-		ok := conversation.ValidateItemRole(string(itemReq.Role))
-		if !ok {
-			return nil, false
-		}
-		r := conversation.ItemRole(itemReq.Role)
-		role = &r
-	}
-
-	content := make([]conversation.Content, len(itemReq.Content))
-	for j, c := range itemReq.Content {
-		content[j] = conversation.Content{
-			Type: c.Type,
-			Text: &conversation.Text{
-				Value: c.Text,
-			},
-		}
-	}
-
-	return &conversation.Item{
-		Type:    itemType,
-		Role:    role,
-		Content: content,
-	}, true
-}
-
-func domainToExtendedConversationResponse(entity *conversation.Conversation) *ExtendedConversationResponse {
-	metadata := entity.Metadata
-	if metadata == nil {
-		metadata = make(map[string]string)
-	}
-	return &ExtendedConversationResponse{
-		ID:        entity.PublicID,
-		Object:    "conversation",
-		Title:     ptr.FromString(entity.Title),
-		CreatedAt: entity.CreatedAt.Unix(),
-		Metadata:  metadata,
-	}
-}
-
-func domainToDeletedConversationResponse(entity *conversation.Conversation) *DeletedConversationResponse {
-	return &DeletedConversationResponse{
-		ID:      entity.PublicID,
-		Object:  "conversation.deleted",
-		Deleted: true,
-	}
-}
-
-func domainToConversationItemResponse(entity *conversation.Item) *ConversationItemResponse {
-	response := &ConversationItemResponse{
-		ID:        entity.PublicID,
-		Object:    "conversation.item",
-		Type:      string(entity.Type),
-		Status:    conversation.ItemStatusToStringPtr(entity.Status),
-		CreatedAt: entity.CreatedAt.Unix(),
-		Content:   domainToContentResponse(entity.Content),
-	}
-
-	if entity.Role != nil {
-		role := string(*entity.Role)
-		response.Role = &role
-	}
-
-	return response
-}
-
-func domainToContentResponse(content []conversation.Content) []ContentResponse {
-	if len(content) == 0 {
-		return nil
-	}
-
-	result := make([]ContentResponse, len(content))
-	for i, c := range content {
-		contentResp := ContentResponse{
-			Type: c.Type,
-		}
-
-		// Handle finish reason (available for all content types)
-		if c.FinishReason != nil {
-			contentResp.FinishReason = c.FinishReason
-		}
-
-		// Handle reasoning content (available for all content types)
-		if c.ReasoningContent != nil {
-			contentResp.ReasoningContent = c.ReasoningContent
-		}
-
-		// Handle different content types
-		switch c.Type {
-		case "text":
-			if c.Text != nil {
-				contentResp.Text = &TextResponse{
-					Value: c.Text.Value,
-				}
-			}
-		case "input_text":
-			if c.InputText != nil {
-				contentResp.InputText = c.InputText
-			}
-		case "output_text":
-			if c.OutputText != nil {
-				contentResp.OutputText = &OutputTextResponse{
-					Text:        c.OutputText.Text,
-					Annotations: domainToAnnotationResponse(c.OutputText.Annotations),
-				}
-			}
-		case "image":
-			if c.Image != nil {
-				contentResp.Image = &ImageContentResponse{
-					URL:    c.Image.URL,
-					FileID: c.Image.FileID,
-					Detail: c.Image.Detail,
-				}
-			}
-		case "file":
-			if c.File != nil {
-				contentResp.File = &FileContentResponse{
-					FileID:   c.File.FileID,
-					Name:     c.File.Name,
-					MimeType: c.File.MimeType,
-					Size:     c.File.Size,
-				}
-			}
-		}
-
-		result[i] = contentResp
-	}
-	return result
-}
-
-func domainToAnnotationResponse(annotations []conversation.Annotation) []AnnotationResponse {
-	if len(annotations) == 0 {
-		return nil
-	}
-
-	result := make([]AnnotationResponse, len(annotations))
-	for i, a := range annotations {
-		result[i] = AnnotationResponse{
-			Type:       a.Type,
-			Text:       a.Text,
-			FileID:     a.FileID,
-			URL:        a.URL,
-			StartIndex: a.StartIndex,
-			EndIndex:   a.EndIndex,
-			Index:      a.Index,
-		}
-	}
-	return result
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/mcp/mcp.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/mcp/mcp.go
deleted file mode 100644
index 48f136a2..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/mcp/mcp.go
+++ /dev/null
@@ -1,97 +0,0 @@
-package mcp
-
-import (
-	"bytes"
-	"encoding/json"
-	"io"
-
-	"github.com/gin-gonic/gin"
-	mcpserver "github.com/mark3labs/mcp-go/server"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	mcpimpl "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp/mcp_impl"
-)
-
-func MCPMethodGuard(allowedMethods map[string]bool) gin.HandlerFunc {
-	return func(c *gin.Context) {
-		bodyBytes, err := io.ReadAll(c.Request.Body)
-		if err != nil {
-			c.Abort()
-			return
-		}
-		c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
-		var req struct {
-			Method string `json:"method"`
-		}
-
-		if err := json.Unmarshal(bodyBytes, &req); err != nil {
-			c.Abort()
-			return
-		}
-
-		if !allowedMethods[req.Method] {
-			c.Abort()
-			return
-		}
-		c.Next()
-	}
-}
-
-type MCPAPI struct {
-	SerperMCP   *mcpimpl.SerperMCP
-	MCPServer   *mcpserver.MCPServer
-	authService *auth.AuthService
-}
-
-func NewMCPAPI(serperMCP *mcpimpl.SerperMCP, authService *auth.AuthService) *MCPAPI {
-	mcpSrv := mcpserver.NewMCPServer("demo", "0.1.0",
-		mcpserver.WithToolCapabilities(true),
-		mcpserver.WithRecovery(),
-	)
-	return &MCPAPI{
-		SerperMCP:   serperMCP,
-		MCPServer:   mcpSrv,
-		authService: authService,
-	}
-}
-
-// MCPStream
-// @Summary MCP streamable endpoint
-// @Description Handles Model Context Protocol (MCP) requests over an HTTP stream. The response is sent as a continuous stream of data.
-// @Tags Chat Completions API
-// @Security BearerAuth
-// @Accept json
-// @Produce text/event-stream
-// @Param request body any true "MCP request payload"
-// @Success 200 {string} string "Streamed response (SSE or chunked transfer)"
-// @Router /v1/mcp [post]
-func (mcpAPI *MCPAPI) RegisterRouter(router *gin.RouterGroup) {
-	mcpAPI.SerperMCP.RegisterTool(mcpAPI.MCPServer)
-
-	mcpHttpHandler := mcpserver.NewStreamableHTTPServer(mcpAPI.MCPServer)
-	router.Any(
-		"/mcp",
-		mcpAPI.authService.AppUserAuthMiddleware(),
-		MCPMethodGuard(map[string]bool{
-			// Initialization / handshake
-			"initialize":                true,
-			"notifications/initialized": true,
-			"ping":                      true,
-
-			// Tools
-			"tools/list": true,
-			"tools/call": true,
-
-			// Prompts
-			"prompts/list": true,
-			"prompts/call": true,
-
-			// Resources
-			"resources/list":           true,
-			"resources/templates/list": true,
-			"resources/read":           true,
-
-			// If you support subscription:
-			"resources/subscribe": true,
-		}),
-		gin.WrapH(mcpHttpHandler))
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/mcp/mcp_impl/serper.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/mcp/mcp_impl/serper.go
deleted file mode 100644
index f211d979..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/mcp/mcp_impl/serper.go
+++ /dev/null
@@ -1,108 +0,0 @@
-package mcpimpl
-
-import (
-	"context"
-	"encoding/json"
-
-	"github.com/mark3labs/mcp-go/mcp"
-	mcpserver "github.com/mark3labs/mcp-go/server"
-	mcpservice "menlo.ai/jan-api-gateway/app/domain/mcp"
-	"menlo.ai/jan-api-gateway/app/domain/mcp/serpermcp"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-type SerperMCP struct {
-	SerperService *serpermcp.SerperService
-}
-
-func NewSerperMCP(serperService *serpermcp.SerperService) *SerperMCP {
-	return &SerperMCP{
-		SerperService: serperService,
-	}
-}
-
-type SerperSearchArgs struct {
-	Q           string  `json:"q" jsonschema:"required,description=Search query string"`
-	GL          *string `json:"gl,omitempty" jsonschema:"description=Optional region code for search results in ISO 3166-1 alpha-2 format (e.g., 'us')"`
-	HL          *string `json:"hl,omitempty" jsonschema:"description=Optional language code for search results in ISO 639-1 format (e.g., 'en')"`
-	Location    *string `json:"location,omitempty" jsonschema:"description=Optional location for search results (e.g., 'SoHo, New York, United States', 'California, United States')"`
-	Num         *int    `json:"num,omitempty" jsonschema:"description=Number of results to return (default: 10)"`
-	Tbs         *string `json:"tbs,omitempty" jsonschema:"description=Time-based search filter ('qdr:h' for past hour, 'qdr:d' for past day, 'qdr:w' for past week, 'qdr:m' for past month, 'qdr:y' for past year)"`
-	Page        *int    `json:"page,omitempty" jsonschema:"description=Page number of results to return (default: 1)"`
-	Autocorrect *bool   `json:"autocorrect,omitempty" jsonschema:"description=Whether to autocorrect spelling in query"`
-}
-
-type SerperScrapeArgs struct {
-	Url             string `json:"url" jsonschema:"required,description=The URL of webpage to scrape"`
-	IncludeMarkdown *bool  `json:"includeMarkdown,omitempty" jsonschema:"description=Whether to include markdown content"`
-}
-
-func (s *SerperMCP) RegisterTool(handler *mcpserver.MCPServer) {
-	handler.AddTool(
-		mcp.NewTool("google_search",
-			mcpservice.ReflectToMCPOptions(
-				"Tool to perform web searches via Serper API and retrieve rich results. It is able to retrieve organic search results, people also ask, related searches, and knowledge graph.",
-				SerperSearchArgs{},
-			)...,
-		),
-		func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) {
-			q, err := req.RequireString("q")
-			if err != nil {
-				return nil, err
-			}
-			searchReq := serpermcp.SearchRequest{
-				Q:           q,
-				GL:          ptr.ToString(req.GetString("gl", "us")),
-				Num:         ptr.ToInt(req.GetInt("num", 10)),
-				Page:        ptr.ToInt(req.GetInt("page", 1)),
-				Autocorrect: ptr.ToBool(req.GetBool("autocorrect", true)),
-			}
-			hl := req.GetString("hl", "")
-			if hl != "" {
-				searchReq.HL = &hl
-			}
-			location := req.GetString("location", "")
-			if location != "" {
-				searchReq.Location = &location
-			}
-
-			searchResp, err := s.SerperService.Search(ctx, searchReq)
-			if err != nil {
-				return nil, err
-			}
-			jsonBytes, err := json.Marshal(searchResp)
-			if err != nil {
-				return nil, err
-			}
-
-			return mcp.NewToolResultText(string(jsonBytes)), nil
-		},
-	)
-	handler.AddTool(
-		mcp.NewTool("scrape",
-			mcpservice.ReflectToMCPOptions(
-				"This is a tool to scrape a webpage and retrieve the text, with an option to provide the output in Markdown format.",
-				SerperScrapeArgs{},
-			)...,
-		),
-		func(ctx context.Context, req mcp.CallToolRequest) (*mcp.CallToolResult, error) {
-			url, err := req.RequireString("url")
-			if err != nil {
-				return nil, err
-			}
-			scrapeReq := serpermcp.FetchWebpageRequest{
-				Url:             url,
-				IncludeMarkdown: ptr.ToBool(req.GetBool("includeMarkdown", false)),
-			}
-			searchResp, err := s.SerperService.FetchWebpage(ctx, scrapeReq)
-			if err != nil {
-				return nil, err
-			}
-			jsonBytes, err := json.Marshal(searchResp)
-			if err != nil {
-				return nil, err
-			}
-			return mcp.NewToolResultText(string(jsonBytes)), nil
-		},
-	)
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/models.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/models.go
deleted file mode 100644
index 37c4d624..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/models.go
+++ /dev/null
@@ -1,62 +0,0 @@
-package v1
-
-import (
-	"net/http"
-
-	"github.com/gin-gonic/gin"
-	inferencemodel "menlo.ai/jan-api-gateway/app/domain/inference_model"
-	inferencemodelregistry "menlo.ai/jan-api-gateway/app/domain/inference_model_registry"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-)
-
-type ModelAPI struct {
-	registry *inferencemodelregistry.InferenceModelRegistry
-}
-
-func NewModelAPI(registry *inferencemodelregistry.InferenceModelRegistry) *ModelAPI {
-	return &ModelAPI{
-		registry: registry,
-	}
-}
-
-func (modelAPI *ModelAPI) RegisterRouter(router *gin.RouterGroup) {
-	router.GET("models", modelAPI.GetModels)
-}
-
-// ListModels
-// @Summary List available models
-// @Description Retrieves a list of available models that can be used for chat completions or other tasks.
-// @Tags Chat Completions API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Success 200 {object} ModelsResponse "Successful response"
-// @Router /v1/models [get]
-func (modelAPI *ModelAPI) GetModels(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	models := modelAPI.registry.ListModels(ctx)
-
-	reqCtx.JSON(http.StatusOK, ModelsResponse{
-		Object: "list",
-		Data: functional.Map(models, func(model inferencemodel.Model) Model {
-			return Model{
-				ID:      model.ID,
-				Object:  model.Object,
-				Created: model.Created,
-				OwnedBy: model.OwnedBy,
-			}
-		}),
-	})
-}
-
-type Model struct {
-	ID      string `json:"id"`
-	Object  string `json:"object"`
-	Created int    `json:"created"`
-	OwnedBy string `json:"owned_by"`
-}
-
-type ModelsResponse struct {
-	Object string  `json:"object"`
-	Data   []Model `json:"data"`
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/admin_api_keys.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/admin_api_keys.go
deleted file mode 100644
index 64301c71..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/admin_api_keys.go
+++ /dev/null
@@ -1,341 +0,0 @@
-package organization
-
-import (
-	"fmt"
-	"net/http"
-
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/domain/apikey"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-
-	"menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses/openai"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-type AdminApiKeyAPI struct {
-	organizationService *organization.OrganizationService
-	authService         *auth.AuthService
-	apiKeyService       *apikey.ApiKeyService
-	userService         *user.UserService
-}
-
-func NewAdminApiKeyAPI(
-	organizationService *organization.OrganizationService,
-	authService *auth.AuthService,
-	apiKeyService *apikey.ApiKeyService,
-	userService *user.UserService) *AdminApiKeyAPI {
-	return &AdminApiKeyAPI{
-		organizationService,
-		authService,
-		apiKeyService,
-		userService,
-	}
-}
-
-func (adminApiKeyAPI *AdminApiKeyAPI) RegisterRouter(router *gin.RouterGroup) {
-	permissionAll := adminApiKeyAPI.authService.OrganizationMemberRoleMiddleware(auth.OrganizationMemberRuleAll)
-	permissionOwnerOnly := adminApiKeyAPI.authService.OrganizationMemberRoleMiddleware(auth.OrganizationMemberRuleOwnerOnly)
-	adminApiKeyRouter := router.Group("/admin_api_keys",
-		adminApiKeyAPI.authService.AdminUserAuthMiddleware(),
-		adminApiKeyAPI.authService.RegisteredUserMiddleware(),
-	)
-	adminApiKeyRouter.GET("",
-		permissionAll,
-		adminApiKeyAPI.GetAdminApiKeys,
-	)
-	adminApiKeyRouter.POST("",
-		permissionOwnerOnly,
-		adminApiKeyAPI.CreateAdminApiKey,
-	)
-
-	adminKeyPath := fmt.Sprintf("/:%s", auth.ApikeyContextKeyPublicID)
-	adminApiKeyIdRoute := adminApiKeyRouter.Group(adminKeyPath, adminApiKeyAPI.authService.GetAdminApiKeyFromQuery())
-	adminApiKeyIdRoute.GET("",
-		permissionAll,
-		adminApiKeyAPI.GetAdminApiKey,
-	)
-	adminApiKeyIdRoute.DELETE("",
-		permissionOwnerOnly,
-		adminApiKeyAPI.DeleteAdminApiKey,
-	)
-}
-
-// GetAdminApiKey godoc
-// @Summary Get Admin API Key
-// @Description Retrieves a specific admin API key by its ID.
-// @Tags Administration API
-// @Security BearerAuth
-// @Param id path string true "ID of the admin API key"
-// @Success 200 {object} OrganizationAdminAPIKeyResponse "Successfully retrieved the admin API key"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 404 {object} responses.ErrorResponse "Not Found - API key with the given ID does not exist or does not belong to the organization"
-// @Router /v1/organization/admin_api_keys/{id} [get]
-func (api *AdminApiKeyAPI) GetAdminApiKey(reqCtx *gin.Context) {
-	entity, ok := auth.GetAdminKeyFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	reqCtx.JSON(http.StatusOK, domainToOrganizationAdminAPIKeyResponse(entity))
-}
-
-// GetAdminApiKeys godoc
-// @Summary List Admin API Keys
-// @Description Retrieves a paginated list of all admin API keys for the authenticated organization.
-// @Tags Administration API
-// @Security BearerAuth
-// @Param limit query int false "The maximum number of items to return" default(20)
-// @Param after query string false "A cursor for use in pagination. The ID of the last object from the previous page"
-// @Success 200 {object} AdminApiKeyListResponse "Successfully retrieved the list of admin API keys"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 500 {object} responses.ErrorResponse "Internal Server Error"
-// @Router /v1/organization/admin_api_keys [get]
-func (api *AdminApiKeyAPI) GetAdminApiKeys(reqCtx *gin.Context) {
-	apikeyService := api.apiKeyService
-	ctx := reqCtx.Request.Context()
-	orgEntity, ok := auth.GetAdminOrganizationFromContext(reqCtx)
-	if !ok {
-		return
-	}
-
-	pagination, err := query.GetCursorPaginationFromQuery(reqCtx, func(lastID string) (*uint, error) {
-		apiKey, err := api.apiKeyService.FindOneByFilter(ctx, apikey.ApiKeyFilter{
-			PublicID: &lastID,
-		})
-		if err != nil {
-			return nil, err
-		}
-		return &apiKey.ID, nil
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "5f89e23d-d4a0-45ce-ba43-ae2a9be0ca64",
-			ErrorInstance: err,
-		})
-		return
-	}
-
-	// Fetch all API keys for the organization
-	filter := apikey.ApiKeyFilter{
-		OrganizationID: &orgEntity.ID,
-	}
-	apiKeys, err := apikeyService.Find(ctx, filter, pagination)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "32d59d1a-2eff-4b6f-a198-30a4fa9ff871",
-			Error: "failed to retrieve API keys",
-		})
-		return
-	}
-	total, err := apikeyService.Count(ctx, filter)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "6d067ca3-c891-4343-b2e3-eb430278dd28",
-		})
-		return
-	}
-
-	var firstId *string
-	var lastId *string
-	hasMore := false
-	if len(apiKeys) > 0 {
-		firstId = &apiKeys[0].PublicID
-		lastId = &apiKeys[len(apiKeys)-1].PublicID
-		moreRecords, err := apikeyService.Find(ctx, filter, &query.Pagination{
-			Order: pagination.Order,
-			Limit: ptr.ToInt(1),
-			After: &apiKeys[len(apiKeys)-1].ID,
-		})
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:  "814c5eb7-e2e3-4476-9ae4-d8222063654a",
-				Error: "failed to retrieve API keys",
-			})
-			return
-		}
-		if len(moreRecords) != 0 {
-			hasMore = true
-		}
-	}
-	// TODO; owner
-	result := functional.Map(apiKeys, func(apikey *apikey.ApiKey) *OrganizationAdminAPIKeyResponse {
-		return domainToOrganizationAdminAPIKeyResponse(apikey)
-	})
-
-	response := openai.ListResponse[*OrganizationAdminAPIKeyResponse]{
-		Object:  "list",
-		Data:    result,
-		FirstID: firstId,
-		LastID:  lastId,
-		HasMore: hasMore,
-		Total:   total,
-	}
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// DeleteAdminApiKey godoc
-// @Summary Delete Admin API Key
-// @Description Deletes an admin API key by its ID.
-// @Tags Administration API
-// @Security BearerAuth
-// @Param id path string true "ID of the admin API key to delete"
-// @Success 200 {object} AdminAPIKeyDeletedResponse "Successfully deleted the admin API key"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 404 {object} responses.ErrorResponse "Not Found - API key with the given ID does not exist or does not belong to the organization"
-// @Router /v1/organization/admin_api_keys/{id} [delete]
-func (api *AdminApiKeyAPI) DeleteAdminApiKey(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	entity, ok := auth.GetAdminKeyFromContext(reqCtx)
-	if !ok {
-		return
-	}
-
-	err := api.apiKeyService.Delete(ctx, entity)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "c9a103b2-985c-44b7-9ccd-38e914a2c82b",
-			Error: "invalid or missing API key",
-		})
-		return
-	}
-	reqCtx.JSON(http.StatusOK, AdminAPIKeyDeletedResponse{
-		ID:      entity.PublicID,
-		Object:  "organization.admin_api_key.deleted",
-		Deleted: true,
-	})
-}
-
-// CreateAdminApiKey creates a new admin API key for an organization.
-// @Summary Create Admin API Key
-// @Description Creates a new admin API key for an organization. Requires a valid admin API key in the Authorization header.
-// @Tags Administration API
-// @Accept json
-// @Produce json
-// @Security BearerAuth
-// @Param body body CreateOrganizationAdminAPIKeyRequest true "API key creation request"
-// @Success 200 {object} OrganizationAdminAPIKeyResponse "Successfully created admin API key"
-// @Failure 400 {object} responses.ErrorResponse "Bad request - invalid payload"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Router /v1/organization/admin_api_keys [post]
-func (api *AdminApiKeyAPI) CreateAdminApiKey(reqCtx *gin.Context) {
-	apikeyService := api.apiKeyService
-	ctx := reqCtx.Request.Context()
-	user, ok := auth.GetUserFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	organizationEntity, ok := auth.GetAdminOrganizationFromContext(reqCtx)
-	if !ok {
-		return
-	}
-
-	var requestPayload CreateOrganizationAdminAPIKeyRequest
-	if err := reqCtx.ShouldBindJSON(&requestPayload); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "b6cb35be-8a53-478d-95d1-5e1f64f35c09",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	key, hash, err := apikeyService.GenerateKeyAndHash(ctx, apikey.ApikeyTypeAdmin)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-			Code:  "e00e6ab3-1b43-490e-90df-aae030697f74",
-			Error: err.Error(),
-		})
-		return
-	}
-	apikeyEntity, err := apikeyService.CreateApiKey(ctx, &apikey.ApiKey{
-		KeyHash:        hash,
-		PlaintextHint:  fmt.Sprintf("sk-..%s", key[len(key)-3:]),
-		Description:    requestPayload.Name,
-		Enabled:        true,
-		ApikeyType:     string(apikey.ApikeyTypeAdmin),
-		OwnerPublicID:  user.PublicID,
-		OrganizationID: &organizationEntity.ID,
-		Permissions:    "{}",
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-			Code:  "32d59d1a-2eff-4b6f-a198-30a4fa9ff871",
-			Error: err.Error(),
-		})
-		return
-	}
-	response := domainToOrganizationAdminAPIKeyResponse(apikeyEntity)
-	response.Owner = userToOwnerResponse(user)
-	response.Value = key
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-func domainToOrganizationAdminAPIKeyResponse(entity *apikey.ApiKey) *OrganizationAdminAPIKeyResponse {
-	var lastUsedAt *int64
-	if entity.LastUsedAt != nil {
-		lastUsedAt = ptr.ToInt64(entity.LastUsedAt.Unix())
-	}
-	return &OrganizationAdminAPIKeyResponse{
-		Object:        string(openai.ObjectKeyAdminApiKey),
-		ID:            entity.PublicID,
-		Name:          entity.Description,
-		RedactedValue: entity.PlaintextHint,
-		CreatedAt:     entity.CreatedAt.Unix(),
-		LastUsedAt:    lastUsedAt,
-	}
-}
-
-func userToOwnerResponse(user *user.User) Owner {
-	return Owner{
-		Type:      string(openai.ApikeyTypeUser),
-		Object:    string(openai.OwnerObjectOrganizationUser),
-		ID:        user.PublicID,
-		Name:      user.Name,
-		CreatedAt: user.CreatedAt.Unix(),
-		Role:      string(openai.OwnerRoleOwner),
-	}
-}
-
-// CreateOrganizationAdminAPIKeyRequest defines the request payload for creating an admin API key.
-type CreateOrganizationAdminAPIKeyRequest struct {
-	Name string `json:"name" binding:"required" example:"My Admin API Key" description:"The name of the API key to be created"`
-}
-
-// OrganizationAdminAPIKeyResponse defines the response structure for a created admin API key.
-type OrganizationAdminAPIKeyResponse struct {
-	Object        string `json:"object" example:"api_key" description:"The type of the object, typically 'api_key'"`
-	ID            string `json:"id" example:"key_1234567890" description:"Unique identifier for the API key"`
-	Name          string `json:"name" example:"My Admin API Key" description:"The name of the API key"`
-	RedactedValue string `json:"redacted_value" example:"sk-...abcd" description:"A redacted version of the API key for display purposes"`
-	CreatedAt     int64  `json:"created_at" example:"1698765432" description:"Unix timestamp when the API key was created"`
-	LastUsedAt    *int64 `json:"last_used_at,omitempty" example:"1698765432" description:"Unix timestamp when the API key was last used, if available"`
-	Owner         Owner  `json:"owner" description:"Details of the owner of the API key"`
-	Value         string `json:"value,omitempty" example:"sk-abcdef1234567890" description:"The full API key value, included only in the response upon creation"`
-}
-
-// Owner defines the structure for the owner of an API key.
-type Owner struct {
-	Type      string `json:"type" example:"user" description:"The type of the owner, e.g., 'user'"`
-	Object    string `json:"object" example:"user" description:"The type of the object, typically 'user'"`
-	ID        string `json:"id" example:"user_1234567890" description:"Unique identifier for the owner"`
-	Name      string `json:"name" example:"John Doe" description:"The name of the owner"`
-	CreatedAt int64  `json:"created_at" example:"1698765432" description:"Unix timestamp when the owner was created"`
-	Role      string `json:"role" example:"admin" description:"The role of the owner within the organization"`
-}
-
-type AdminApiKeyListResponse struct {
-	Object  string                            `json:"object" example:"list" description:"The type of the object, always 'list'"`
-	Data    []OrganizationAdminAPIKeyResponse `json:"data" description:"Array of admin API keys"`
-	FirstID *string                           `json:"first_id,omitempty"`
-	LastID  *string                           `json:"last_id,omitempty"`
-	HasMore bool                              `json:"has_more"`
-}
-
-type AdminAPIKeyDeletedResponse struct {
-	ID      string `json:"id"`
-	Object  string `json:"object"`
-	Deleted bool   `json:"deleted"`
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/invites/invites_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/invites/invites_route.go
deleted file mode 100644
index ff4ee926..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/invites/invites_route.go
+++ /dev/null
@@ -1,502 +0,0 @@
-package invites
-
-import (
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	"github.com/google/uuid"
-
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/invite"
-	"menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/domain/project"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses/openai"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-type InvitesRoute struct {
-	inviteService       *invite.InviteService
-	projectService      *project.ProjectService
-	organizationService *organization.OrganizationService
-	authService         *auth.AuthService
-}
-
-func NewInvitesRoute(
-	inviteService *invite.InviteService,
-	projectService *project.ProjectService,
-	organizationService *organization.OrganizationService,
-	authService *auth.AuthService,
-) *InvitesRoute {
-	return &InvitesRoute{
-		inviteService,
-		projectService,
-		organizationService,
-		authService,
-	}
-}
-
-type InviteResponse struct {
-	Object     string     `json:"object"`
-	ID         string     `json:"id"`
-	Email      string     `json:"email"`
-	Role       string     `json:"role"`
-	Status     string     `json:"status"`
-	InvitedAt  time.Time  `json:"invited_at"`
-	ExpiresAt  time.Time  `json:"expires_at"`
-	AcceptedAt *time.Time `json:"accepted_at,omitempty"`
-	Projects   []InviteProject
-}
-
-func (inviteRoute *InvitesRoute) RegisterRouter(router gin.IRouter) {
-	// public router
-	router.POST("/invites/verification", inviteRoute.VerifyInvites)
-
-	permissionAll := inviteRoute.authService.OrganizationMemberRoleMiddleware(auth.OrganizationMemberRuleAll)
-	permissionOwnerOnly := inviteRoute.authService.OrganizationMemberRoleMiddleware(auth.OrganizationMemberRuleOwnerOnly)
-	inviteRouter := router.Group(
-		"/invites",
-		inviteRoute.authService.AdminUserAuthMiddleware(),
-		inviteRoute.authService.RegisteredUserMiddleware(),
-	)
-	inviteRouter.POST("",
-		permissionOwnerOnly,
-		inviteRoute.CreateInvite,
-	)
-	inviteRouter.GET(
-		"",
-		permissionAll,
-		inviteRoute.ListInvites,
-	)
-	inviteIdRoute := inviteRouter.Group(fmt.Sprintf("/:%s", auth.InviteContextKeyPublicID), inviteRoute.authService.AdminInviteMiddleware())
-	inviteIdRoute.GET("",
-		permissionAll,
-		inviteRoute.RetrieveInvite)
-	inviteIdRoute.DELETE("",
-		permissionOwnerOnly,
-		inviteRoute.DeleteInvite,
-	)
-}
-
-// ListInvites godoc
-// @Summary List Organization Invites
-// @Description Retrieves a paginated list of invites for the current organization.
-// @Tags Administration API
-// @Security BearerAuth
-// @Param after query string false "Cursor pointing to a record after which to fetch results"
-// @Param limit query int false "Maximum number of results to return"
-// @Success 200 {object} openai.ListResponse[InviteResponse] "Successfully retrieved list of invites"
-// @Failure 400 {object} responses.ErrorResponse "Invalid or missing query parameter"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/organization/invites [get]
-func (api *InvitesRoute) ListInvites(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	orgEntity, ok := auth.GetAdminOrganizationFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	pagination, err := query.GetCursorPaginationFromQuery(reqCtx, func(after string) (*uint, error) {
-		entity, err := api.inviteService.FindOne(ctx, invite.InvitesFilter{
-			PublicID: &after,
-		})
-		if err != nil {
-			return nil, err
-		}
-		if entity == nil {
-			return nil, fmt.Errorf("record not found")
-		}
-		return &entity.ID, nil
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "4434f5ed-89f4-4a62-9fef-8ca53336dcda",
-			Error: "invalid or missing query parameter",
-		})
-		return
-	}
-
-	filter := invite.InvitesFilter{
-		OrganizationID: &orgEntity.ID,
-	}
-	inviteEntities, err := api.inviteService.FindInvites(ctx, filter, pagination)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "1f79e6aa-a25b-44af-bf9e-b9fbb6e1ceab",
-		})
-		return
-	}
-	pageCursor, err := responses.BuildCursorPage(
-		inviteEntities,
-		func(t *invite.Invite) *string {
-			return &t.PublicID
-		},
-		func() ([]*invite.Invite, error) {
-			return api.inviteService.FindInvites(ctx, filter, &query.Pagination{
-				Order: pagination.Order,
-				Limit: ptr.ToInt(1),
-				After: &inviteEntities[len(inviteEntities)-1].ID,
-			})
-		},
-		func() (int64, error) {
-			return api.inviteService.CountInvites(ctx, filter)
-		},
-	)
-	if err != nil {
-		reqCtx.JSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "59c1efc5-d6a1-4da1-baf8-d7ed0497e088",
-			ErrorInstance: err,
-		})
-		return
-	}
-
-	reqCtx.JSON(http.StatusOK, openai.ListResponse[InviteResponse]{
-		Object:  "list",
-		LastID:  pageCursor.LastID,
-		FirstID: pageCursor.FirstID,
-		HasMore: pageCursor.HasMore,
-		Total:   pageCursor.Total,
-		Data:    functional.Map(inviteEntities, convertInviteEntityToResponse),
-	})
-}
-
-type InviteProject struct {
-	ID   string `json:"id"`
-	Role string `json:"role"`
-}
-
-type CreateInviteUserRequest struct {
-	Email    string          `json:"email"`
-	Role     string          `json:"role"`
-	Projects []InviteProject `json:"projects,omitempty"`
-}
-
-// CreateInvite godoc
-// @Summary Create Invite
-// @Description Creates a new invite for a user to join the organization.
-// @Tags Administration API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param invite body CreateInviteUserRequest true "Invite request payload"
-// @Success 200 {object} InviteResponse "Successfully created invite"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request payload or user already exists"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/organization/invites [post]
-func (api *InvitesRoute) CreateInvite(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	userEntity, ok := auth.GetUserFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "0c781396-68a9-4177-97a8-342af883f7c3",
-		})
-		return
-	}
-	orgEntity, ok := auth.GetAdminOrganizationFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "470ad74e-f9bc-4e8d-b42b-9d506ff11a0a",
-		})
-		return
-	}
-	var requestPayload CreateInviteUserRequest
-	if err := reqCtx.ShouldBindJSON(&requestPayload); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "470ad74e-f9bc-4e8d-b42b-9d506ff11a0a",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	exists, err := api.authService.HasOrganizationUser(ctx, requestPayload.Email, orgEntity.ID)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "398c1de0-1a9f-47e2-8f56-c06e4510f884",
-			Error: err.Error(),
-		})
-		return
-	}
-	if exists {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "ac130c69-e9fd-4dfc-b246-4c6abfa44bbe",
-		})
-		return
-	}
-	projectIDs := functional.Map(requestPayload.Projects, func(proj InviteProject) string {
-		return proj.ID
-	})
-
-	if len(projectIDs) > 0 {
-		projects, err := api.projectService.Find(ctx, project.ProjectFilter{
-			PublicIDs: &projectIDs,
-		}, nil)
-
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code:          "ea649ae7-d82c-48b2-9ef1-626c139f180d",
-				ErrorInstance: err,
-			})
-			return
-		}
-		if len(projects) != len(projectIDs) {
-			reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-				Code: "a08c5ee3-651e-4465-a7c9-5009fec9d5c2",
-			})
-			return
-		}
-	}
-
-	projectsStr, err := json.Marshal(requestPayload.Projects)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "f7957c66-77d6-494f-9ee9-8fa54408a604",
-		})
-		return
-	}
-
-	inviteEntity, err := api.inviteService.CreateInviteWithPublicID(ctx, &invite.Invite{
-		Email:          requestPayload.Email,
-		Role:           requestPayload.Role,
-		Status:         string(invite.InviteStatusPending),
-		OrganizationID: orgEntity.ID,
-		Projects:       string(projectsStr),
-		Secrets:        ptr.ToString(uuid.New().String()),
-	})
-
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "f7957c66-77d6-494f-9ee9-8fa54408a604",
-		})
-		return
-	}
-
-	err = api.inviteService.SendInviteEmail(ctx, invite.EmailMetadata{
-		InviterEmail: userEntity.Email,
-		OrgName:      orgEntity.Name,
-		OrgPublicID:  orgEntity.PublicID,
-		InviteLink: fmt.Sprintf(
-			"%s?code=%s",
-			environment_variables.EnvironmentVariables.INVITE_REDIRECT_URL,
-			*inviteEntity.Secrets,
-		),
-	}, requestPayload.Email)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "8432e05b-bc3e-4432-b3cb-ade6353edacc",
-			ErrorInstance: err,
-		})
-		return
-	}
-	reqCtx.JSON(http.StatusOK, convertInviteEntityToResponse(inviteEntity))
-}
-
-type VerifyInviteUserRequest struct {
-	Code string `json:"code"`
-}
-
-// VerifyInvites godoc
-// @Summary Verify Invite
-// @Description Verifies an invitation code, checks expiration, registers the user if necessary, and assigns project memberships.
-// @Tags Administration API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param verification body VerifyInviteUserRequest true "Verification request payload"
-// @Success 200 {object} InviteResponse "Successfully verified invite"
-// @Failure 400 {object} responses.ErrorResponse "Invalid or expired invite code"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/organization/invites/verification [post]
-func (api *InvitesRoute) VerifyInvites(reqCtx *gin.Context) {
-	var requestPayload VerifyInviteUserRequest
-	if err := reqCtx.ShouldBindJSON(&requestPayload); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "3eec938c-5b05-407a-ae4e-24ce874710fa",
-		})
-		return
-	}
-
-	ctx := reqCtx.Request.Context()
-	inviteEntity, err := api.inviteService.FindOne(ctx, invite.InvitesFilter{
-		Secrets: &requestPayload.Code,
-	})
-	if err != nil || inviteEntity == nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "dc9e8394-2c28-40c9-93e1-7878f67992c4",
-		})
-		return
-	}
-	if inviteEntity.Status != string(invite.InviteStatusPending) {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "54fc9401-a79f-4338-93d2-3d3547ce21a9",
-			Error: "Invalid Status",
-		})
-		return
-	}
-	if inviteEntity.IsExpired() {
-		inviteEntity.Status = string(invite.InviteStatusExpired)
-		api.inviteService.UpdateInvite(ctx, inviteEntity)
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "eb940d50-60bc-498e-9512-93f741a80d7b",
-			Error: "Code Expired.",
-		})
-		return
-	}
-
-	owner, err := api.authService.FindOrRegisterUser(ctx, &user.User{
-		Name:    "Admin",
-		Email:   inviteEntity.Email,
-		Enabled: true,
-		IsGuest: false,
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "049ad2f3-99ed-44f2-8439-f3848bc20639",
-		})
-		return
-	}
-
-	err = api.organizationService.AddMember(ctx, &organization.OrganizationMember{
-		OrganizationID: inviteEntity.OrganizationID,
-		UserID:         owner.ID,
-		Role:           organization.OrganizationMemberRole(inviteEntity.Role),
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "049ad2f3-99ed-44f2-8439-f3848bc20639",
-		})
-		return
-	}
-
-	inviteProjects, err := inviteEntity.GetProjects()
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "61aec0a5-cc63-4f6a-9e50-d4c0feb1984f",
-		})
-		return
-	}
-	if len(inviteProjects) > 0 {
-		projectLookup := functional.ConvertToMap(inviteProjects, func(i invite.InviteProject) string {
-			return i.ID
-		})
-		projectPublicIDs := functional.GetMapKeys(projectLookup)
-		projects, err := api.projectService.Find(ctx, project.ProjectFilter{
-			PublicIDs: &projectPublicIDs,
-		}, nil)
-		if err != nil {
-			reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-				Code: "efa97376-0f19-4c5f-a10a-1d21304c29f2",
-			})
-			return
-		}
-		for _, projectEntity := range projects {
-			inviteProject, ok := projectLookup[projectEntity.PublicID]
-			if !ok {
-				continue
-			}
-			err := api.projectService.AddMember(ctx, &project.ProjectMember{
-				UserID:    owner.ID,
-				ProjectID: projectEntity.ID,
-				Role:      inviteProject.Role,
-			})
-			if err != nil {
-				reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-					Code: "2b0849bd-1fbf-49ae-b74e-a7cad577cc71",
-				})
-				return
-			}
-		}
-	}
-	inviteEntity.Status = string(invite.InviteStatusAccepted)
-	_, err = api.inviteService.UpdateInvite(ctx, inviteEntity)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "dd55db1c-95c9-431c-b435-c00aeb3c4a74",
-		})
-		return
-	}
-	reqCtx.JSON(http.StatusOK, convertInviteEntityToResponse(inviteEntity))
-}
-
-// RetrieveInvite godoc
-// @Summary Retrieve Invite
-// @Description Retrieves a specific invite by its ID.
-// @Tags Administration API
-// @Security BearerAuth
-// @Param invite_id path string true "Public ID of the invite"
-// @Success 200 {object} InviteResponse "Successfully retrieved invite"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 404 {object} responses.ErrorResponse "Invite not found"
-// @Router /v1/organization/invites/{invite_id} [get]
-func (api *InvitesRoute) RetrieveInvite(reqCtx *gin.Context) {
-	inviteEntity, ok := auth.GetAdminInviteFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	reqCtx.JSON(http.StatusOK, convertInviteEntityToResponse(inviteEntity))
-}
-
-// DeleteInvite godoc
-// @Summary Delete Invite
-// @Description Deletes a specific invite by its ID. Only organization owners can delete invites.
-// @Tags Administration API
-// @Security BearerAuth
-// @Param invite_id path string true "Public ID of the invite"
-// @Success 200 {object} openai.DeleteResponse "Successfully deleted invite"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 403 {object} responses.ErrorResponse "Forbidden - only owners can delete invites"
-// @Failure 404 {object} responses.ErrorResponse "Invite not found"
-// @Router /v1/organization/invites/{invite_id} [delete]
-func (api *InvitesRoute) DeleteInvite(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	inviteEntity, ok := auth.GetAdminInviteFromContext(reqCtx)
-	if !ok {
-		return
-	}
-
-	err := api.inviteService.DeleteInviteByID(ctx, inviteEntity.ID)
-	if err != nil {
-		reqCtx.JSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "ea8900d2-cf26-461a-a985-64760e300be1",
-		})
-		return
-	}
-
-	reqCtx.JSON(http.StatusOK, openai.DeleteResponse{
-		Object:  "organization.invite.deleted",
-		ID:      inviteEntity.PublicID,
-		Deleted: true,
-	})
-}
-
-func convertInviteEntityToResponse(entity *invite.Invite) InviteResponse {
-	projectEntities, err := entity.GetProjects()
-	if err != nil {
-		projectEntities = make([]invite.InviteProject, 0)
-	}
-	return InviteResponse{
-		Object:     "organization.invite",
-		ID:         entity.PublicID,
-		Email:      entity.Email,
-		Role:       entity.Role,
-		Status:     entity.Status,
-		InvitedAt:  entity.InvitedAt,
-		AcceptedAt: entity.AcceptedAt,
-		ExpiresAt:  entity.ExpiresAt,
-		Projects: functional.Map(projectEntities, func(item invite.InviteProject) InviteProject {
-			return InviteProject{
-				Role: item.Role,
-				ID:   item.ID,
-			}
-		}),
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/organization_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/organization_route.go
deleted file mode 100644
index 5bc74111..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/organization_route.go
+++ /dev/null
@@ -1,31 +0,0 @@
-package organization
-
-import (
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/invites"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects"
-)
-
-type OrganizationRoute struct {
-	adminApiKeyAPI *AdminApiKeyAPI
-	projectsRoute  *projects.ProjectsRoute
-	inviteRoute    *invites.InvitesRoute
-	authService    *auth.AuthService
-}
-
-func NewOrganizationRoute(adminApiKeyAPI *AdminApiKeyAPI, projectsRoute *projects.ProjectsRoute, inviteRoute *invites.InvitesRoute, authService *auth.AuthService) *OrganizationRoute {
-	return &OrganizationRoute{
-		adminApiKeyAPI,
-		projectsRoute,
-		inviteRoute,
-		authService,
-	}
-}
-
-func (organizationRoute *OrganizationRoute) RegisterRouter(router gin.IRouter) {
-	organizationRouter := router.Group("/organization")
-	organizationRoute.adminApiKeyAPI.RegisterRouter(organizationRouter)
-	organizationRoute.projectsRoute.RegisterRouter(organizationRouter)
-	organizationRoute.inviteRoute.RegisterRouter(organizationRouter)
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/projects/api_keys/api_keys_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/projects/api_keys/api_keys_route.go
deleted file mode 100644
index ad2fb305..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/projects/api_keys/api_keys_route.go
+++ /dev/null
@@ -1,226 +0,0 @@
-package apikeys
-
-import (
-	"fmt"
-	"net/http"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/domain/apikey"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/domain/project"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-)
-
-type ProjectApiKeyRoute struct {
-	organizationService *organization.OrganizationService
-	projectService      *project.ProjectService
-	apikeyService       *apikey.ApiKeyService
-	userService         *user.UserService
-}
-
-func NewProjectApiKeyRoute(
-	organizationService *organization.OrganizationService,
-	projectService *project.ProjectService,
-	apikeyService *apikey.ApiKeyService,
-	userService *user.UserService,
-) *ProjectApiKeyRoute {
-	return &ProjectApiKeyRoute{
-		organizationService,
-		projectService,
-		apikeyService,
-		userService,
-	}
-}
-
-func (api *ProjectApiKeyRoute) RegisterRouter(router gin.IRouter) {
-	apiKeyRouter := router.Group("/api_keys")
-	apiKeyRouter.POST("", api.CreateProjectApiKey)
-	apiKeyRouter.GET("", api.ListProjectApiKey)
-}
-
-// @Summary List new project API key
-// @Description List API keys for a specific project.
-// @Tags Administration API
-// @Accept json
-// @Produce json
-// @Security BearerAuth
-// @Param project_public_id path string true "Project Public ID"
-// @Success 200 {object} responses.GeneralResponse[ApiKeyResponse] "API key created successfully"
-// @Failure 400 {object} responses.ErrorResponse "Bad request, e.g., invalid payload or missing IDs"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized, e.g., invalid or missing token"
-// @Failure 404 {object} responses.ErrorResponse "Not Found, e.g., project or organization not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/organization/projects/{project_public_id}/api_keys [get]
-func (api *ProjectApiKeyRoute) ListProjectApiKey(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	_, ok := auth.GetUserFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	organizationEntity := organization.DEFAULT_ORGANIZATION
-
-	project, ok := auth.GetProjectFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "e50b3d93-f508-401a-b55e-50ffec69e087",
-		})
-		return
-	}
-
-	pagination, err := query.GetPaginationFromQuery(reqCtx)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "1f11f211-7f74-43c9-b7c3-df31fcd2cf4d",
-		})
-		return
-	}
-	filter := apikey.ApiKeyFilter{
-		OrganizationID: &organizationEntity.ID,
-		ProjectID:      &project.ID,
-	}
-	apikeyEntities, err := api.apikeyService.Find(ctx, filter, pagination)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "d6a2ac93-49e9-4d42-8487-c384209adce0",
-		})
-		return
-	}
-
-	reqCtx.JSON(http.StatusOK, responses.ListResponse[ApiKeyResponse]{
-		Status: responses.ResponseCodeOk,
-		Results: functional.Map(apikeyEntities, func(apikeyEntity *apikey.ApiKey) ApiKeyResponse {
-			return ApiKeyResponse{
-				ID:            apikeyEntity.PublicID,
-				PlaintextHint: apikeyEntity.PlaintextHint,
-				Description:   apikeyEntity.Description,
-				Enabled:       apikeyEntity.Enabled,
-				ApikeyType:    apikeyEntity.ApikeyType,
-				Permissions:   apikeyEntity.Permissions,
-				ExpiresAt:     apikeyEntity.ExpiresAt,
-				LastUsedAt:    apikeyEntity.LastUsedAt,
-			}
-		}),
-	})
-}
-
-type CreateApiKeyRequest struct {
-	Description string     `json:"description,omitempty"`
-	ExpiresAt   *time.Time `json:"expiresAt,omitempty"`
-}
-
-// @Summary Create a new project API key
-// @Description Creates a new API key for a specific project.
-// @Tags Administration API
-// @Accept json
-// @Produce json
-// @Security BearerAuth
-// @Param project_public_id path string true "Project Public ID"
-// @Param requestBody body CreateApiKeyRequest true "Request body for creating an API key"
-// @Success 200 {object} responses.GeneralResponse[ApiKeyResponse] "API key created successfully"
-// @Failure 400 {object} responses.ErrorResponse "Bad request, e.g., invalid payload or missing IDs"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized, e.g., invalid or missing token"
-// @Failure 404 {object} responses.ErrorResponse "Not Found, e.g., project or organization not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/organization/projects/{project_public_id}/api_keys [post]
-func (api *ProjectApiKeyRoute) CreateProjectApiKey(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	var req CreateApiKeyRequest
-	// Bind the JSON payload to the struct
-	if err := reqCtx.BindJSON(&req); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "fa1d1cea-7229-446f-9de8-fa254fe6733c",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	user, ok := auth.GetUserFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "a3be84ac-132e-4af1-a4ca-9f70aa49fd70",
-		})
-		return
-	}
-
-	organizationEntity, ok := auth.GetAdminOrganizationFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "fc306f46-7125-4724-8fda-468402606ac7",
-		})
-		return
-	}
-
-	projectEntity, ok := auth.GetProjectFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "e50b3d93-f508-401a-b55e-50ffec69e087",
-		})
-		return
-	}
-
-	api.projectService.FindOneMemberByFilter(ctx, project.ProjectMemberFilter{
-		
-	})
-
-	key, hash, err := api.apikeyService.GenerateKeyAndHash(ctx, apikey.ApikeyTypeProject)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "6d2d10f9-3bab-4d2d-8076-d573d829e397",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	apikeyEntity, err := api.apikeyService.CreateApiKey(ctx, &apikey.ApiKey{
-		KeyHash:        hash,
-		PlaintextHint:  fmt.Sprintf("sk-..%s", key[len(key)-3:]),
-		Description:    req.Description,
-		Enabled:        true,
-		ApikeyType:     string(apikey.ApikeyTypeProject),
-		OwnerPublicID:  user.PublicID,
-		ProjectID:      &projectEntity.ID,
-		OrganizationID: &organizationEntity.ID,
-		Permissions:    "{}",
-		ExpiresAt:      req.ExpiresAt,
-	})
-
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "d7bb0e84-72ba-41bd-8e71-8aec92ec8abe",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	reqCtx.JSON(http.StatusOK, responses.GeneralResponse[ApiKeyResponse]{
-		Status: responses.ResponseCodeOk,
-		Result: ApiKeyResponse{
-			ID:            apikeyEntity.PublicID,
-			Key:           key,
-			PlaintextHint: apikeyEntity.PlaintextHint,
-			Description:   apikeyEntity.Description,
-			Enabled:       apikeyEntity.Enabled,
-			ApikeyType:    apikeyEntity.ApikeyType,
-			Permissions:   apikeyEntity.Permissions,
-			ExpiresAt:     apikeyEntity.ExpiresAt,
-			LastUsedAt:    apikeyEntity.LastUsedAt,
-		},
-	})
-}
-
-type ApiKeyResponse struct {
-	ID            string     `json:"id"`
-	Key           string     `json:"key,omitempty"`
-	PlaintextHint string     `json:"plaintextHint"`
-	Description   string     `json:"description"`
-	Enabled       bool       `json:"enabled"`
-	ApikeyType    string     `json:"apikeyType"`
-	Permissions   string     `json:"permissions"`
-	ExpiresAt     *time.Time `json:"expiresAt"`
-	LastUsedAt    *time.Time `json:"last_usedAt"`
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/projects/project_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/projects/project_route.go
deleted file mode 100644
index 661bedee..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/organization/projects/project_route.go
+++ /dev/null
@@ -1,403 +0,0 @@
-package projects
-
-import (
-	"fmt"
-	"net/http"
-	"strconv"
-	"time"
-
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/domain/apikey"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/domain/project"
-	"menlo.ai/jan-api-gateway/app/domain/query"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses/openai"
-	projectApikeyRoute "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects/api_keys"
-	"menlo.ai/jan-api-gateway/app/utils/functional"
-	"menlo.ai/jan-api-gateway/app/utils/ptr"
-)
-
-type ProjectsRoute struct {
-	projectService     *project.ProjectService
-	apiKeyService      *apikey.ApiKeyService
-	authService        *auth.AuthService
-	projectApiKeyRoute *projectApikeyRoute.ProjectApiKeyRoute
-}
-
-func NewProjectsRoute(
-	projectService *project.ProjectService,
-	apiKeyService *apikey.ApiKeyService,
-	authService *auth.AuthService,
-	projectApiKeyRoute *projectApikeyRoute.ProjectApiKeyRoute,
-) *ProjectsRoute {
-	return &ProjectsRoute{
-		projectService,
-		apiKeyService,
-		authService,
-		projectApiKeyRoute,
-	}
-}
-
-func (projectsRoute *ProjectsRoute) RegisterRouter(router gin.IRouter) {
-	permissionOptional := projectsRoute.authService.DefaultOrganizationMemberOptionalMiddleware()
-	permissionOwnerOnly := projectsRoute.authService.OrganizationMemberRoleMiddleware(auth.OrganizationMemberRuleOwnerOnly)
-	projectsRouter := router.Group(
-		"/projects",
-		projectsRoute.authService.AdminUserAuthMiddleware(),
-		projectsRoute.authService.RegisteredUserMiddleware(),
-	)
-	projectsRouter.GET("",
-		permissionOptional,
-		projectsRoute.GetProjects,
-	)
-	projectsRouter.POST("",
-		permissionOwnerOnly,
-		projectsRoute.CreateProject,
-	)
-
-	projectIdRouter := projectsRouter.Group(
-		fmt.Sprintf("/:%s", auth.ProjectContextKeyPublicID),
-		permissionOptional,
-		projectsRoute.authService.AdminProjectMiddleware(),
-	)
-	projectIdRouter.GET("",
-		projectsRoute.GetProject)
-	projectIdRouter.POST("",
-		permissionOwnerOnly,
-		projectsRoute.UpdateProject,
-	)
-	projectIdRouter.POST("/archive",
-		permissionOwnerOnly,
-		projectsRoute.ArchiveProject,
-	)
-	projectsRoute.projectApiKeyRoute.RegisterRouter(projectIdRouter)
-}
-
-// GetProjects godoc
-// @Summary List Projects
-// @Description Retrieves a paginated list of all projects for the authenticated organization.
-// @Tags Administration API
-// @Security BearerAuth
-// @Param limit query int false "The maximum number of items to return" default(20)
-// @Param after query string false "A cursor for use in pagination. The ID of the last object from the previous page"
-// @Param include_archived query string false "Whether to include archived projects."
-// @Success 200 {object} ProjectListResponse "Successfully retrieved the list of projects"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 500 {object} responses.ErrorResponse "Internal Server Error"
-// @Router /v1/organization/projects [get]
-func (api *ProjectsRoute) GetProjects(reqCtx *gin.Context) {
-	orgEntity, ok := auth.GetAdminOrganizationFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	user, ok := auth.GetUserFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	projectService := api.projectService
-	includeArchivedStr := reqCtx.DefaultQuery("include_archived", "false")
-	includeArchived, err := strconv.ParseBool(includeArchivedStr)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "65e69a2c-5ce0-4a9c-bb61-ee5cc494f948",
-			Error: "invalid or missing query parameter",
-		})
-		return
-	}
-	ctx := reqCtx.Request.Context()
-	pagination, err := query.GetCursorPaginationFromQuery(reqCtx, func(after string) (*uint, error) {
-		entity, err := projectService.FindOne(ctx, project.ProjectFilter{
-			PublicID: &after,
-		})
-		if err != nil {
-			return nil, err
-		}
-		return &entity.ID, nil
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "4434f5ed-89f4-4a62-9fef-8ca53336dcda",
-			Error: "invalid or missing query parameter",
-		})
-		return
-	}
-	projectFilter := project.ProjectFilter{
-		OrganizationID: &orgEntity.ID,
-	}
-	_, ok = auth.GetAdminOrganizationMemberFromContext(reqCtx)
-	if !ok {
-		projectFilter.MemberID = &user.ID
-	}
-	if !includeArchived {
-		projectFilter.Archived = ptr.ToBool(false)
-	}
-	projects, err := projectService.Find(ctx, projectFilter, pagination)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "29d3d0b0-e587-4f20-9adb-1ab9aa666b38",
-			Error: "failed to retrieve projects",
-		})
-		return
-	}
-
-	pageCursor, err := responses.BuildCursorPage(
-		projects,
-		func(t *project.Project) *string {
-			return &t.PublicID
-		},
-		func() ([]*project.Project, error) {
-			return projectService.Find(ctx, projectFilter, &query.Pagination{
-				Order: pagination.Order,
-				Limit: ptr.ToInt(1),
-				After: &projects[len(projects)-1].ID,
-			})
-		},
-		func() (int64, error) {
-			return projectService.CountProjects(ctx, projectFilter)
-		},
-	)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code: "6a0ee74e-d6fd-4be8-91b3-03a594b8cd2e",
-		})
-		return
-	}
-
-	result := functional.Map(projects, func(project *project.Project) ProjectResponse {
-		return domainToProjectResponse(project)
-	})
-
-	response := openai.ListResponse[ProjectResponse]{
-		Object:  "list",
-		Data:    result,
-		HasMore: pageCursor.HasMore,
-		FirstID: pageCursor.FirstID,
-		LastID:  pageCursor.LastID,
-		Total:   int64(pageCursor.Total),
-	}
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// CreateProject godoc
-// @Summary Create Project
-// @Description Creates a new project for an organization.
-// @Tags Administration API
-// @Accept json
-// @Produce json
-// @Security BearerAuth
-// @Param body body CreateProjectRequest true "Project creation request"
-// @Success 200 {object} ProjectResponse "Successfully created project"
-// @Failure 400 {object} responses.ErrorResponse "Bad request - invalid payload"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 500 {object} responses.ErrorResponse "Internal Server Error"
-// @Router /v1/organization/projects [post]
-func (api *ProjectsRoute) CreateProject(reqCtx *gin.Context) {
-	projectService := api.projectService
-	ctx := reqCtx.Request.Context()
-	orgEntity, ok := auth.GetAdminOrganizationFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	var requestPayload CreateProjectRequest
-	if err := reqCtx.ShouldBindJSON(&requestPayload); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  "db8142f8-dc78-4581-a238-6e32288a54ec",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	projectEntity, err := projectService.CreateProjectWithPublicID(ctx, &project.Project{
-		Name:           requestPayload.Name,
-		OrganizationID: orgEntity.ID,
-		Status:         string(project.ProjectStatusActive),
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "e00e6ab3-1b43-490e-90df-aae030697f74",
-			Error: err.Error(),
-		})
-		return
-	}
-
-	orgMember, _ := auth.GetAdminOrganizationMemberFromContext(reqCtx)
-	err = projectService.AddMember(ctx, &project.ProjectMember{
-		UserID:    orgMember.UserID,
-		ProjectID: projectEntity.ID,
-		Role:      string(project.ProjectMemberRoleOwner),
-	})
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:          "e29ddee3-77ea-4ac5-b474-00e2311b68ab",
-			ErrorInstance: err,
-		})
-		return
-	}
-	response := domainToProjectResponse(projectEntity)
-	reqCtx.JSON(http.StatusOK, response)
-}
-
-// GetProject godoc
-// @Summary Get Project
-// @Description Retrieves a specific project by its ID.
-// @Tags Administration API
-// @Security BearerAuth
-// @Param project_id path string true "ID of the project"
-// @Success 200 {object} ProjectResponse "Successfully retrieved the project"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 404 {object} responses.ErrorResponse "Not Found - project with the given ID does not exist or does not belong to the organization"
-// @Router /v1/organization/projects/{project_id} [get]
-func (api *ProjectsRoute) GetProject(reqCtx *gin.Context) {
-	projectEntity, ok := auth.GetProjectFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-			Code:  "42ad3a04-6c17-40db-a10f-640be569c93f",
-			Error: "project not found",
-		})
-		return
-	}
-	reqCtx.JSON(http.StatusOK, domainToProjectResponse(projectEntity))
-}
-
-// UpdateProject godoc
-// @Summary Update Project
-// @Description Updates a specific project by its ID.
-// @Tags Administration API
-// @Accept json
-// @Produce json
-// @Security BearerAuth
-// @Param project_id path string true "ID of the project to update"
-// @Param body body UpdateProjectRequest true "Project update request"
-// @Success 200 {object} ProjectResponse "Successfully updated the project"
-// @Failure 400 {object} responses.ErrorResponse "Bad request - invalid payload"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 404 {object} responses.ErrorResponse "Not Found - project with the given ID does not exist"
-// @Router /v1/organization/projects/{project_id} [post]
-func (api *ProjectsRoute) UpdateProject(reqCtx *gin.Context) {
-	orgMember, ok := auth.GetAdminOrganizationMemberFromContext(reqCtx)
-	if !ok || orgMember.Role != organization.OrganizationMemberRoleOwner {
-		reqCtx.AbortWithStatusJSON(http.StatusUnauthorized, responses.ErrorResponse{
-			Code: "2e531704-2e55-4d55-9ca3-d60e245f75b4",
-		})
-		return
-	}
-	projectService := api.projectService
-	ctx := reqCtx.Request.Context()
-	var requestPayload UpdateProjectRequest
-	if err := reqCtx.ShouldBindJSON(&requestPayload); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:          "b6cb35be-8a53-478d-95d1-5e1f64f35c09",
-			ErrorInstance: err,
-		})
-		return
-	}
-
-	entity, ok := auth.GetProjectFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-			Code:  "42ad3a04-6c17-40db-a10f-640be569c93f",
-			Error: "project not found",
-		})
-		return
-	}
-
-	// Update the project name if provided
-	if requestPayload.Name != nil {
-		entity.Name = *requestPayload.Name
-	}
-
-	updatedEntity, err := projectService.UpdateProject(ctx, entity)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "c9a103b2-985c-44b7-9ccd-38e914a2c82b",
-			Error: "failed to update project",
-		})
-		return
-	}
-
-	reqCtx.JSON(http.StatusOK, domainToProjectResponse(updatedEntity))
-}
-
-// ArchiveProject godoc
-// @Summary Archive Project
-// @Description Archives a specific project by its ID, making it inactive.
-// @Tags Administration API
-// @Security BearerAuth
-// @Param project_id path string true "ID of the project to archive"
-// @Success 200 {object} ProjectResponse "Successfully archived the project"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or missing API key"
-// @Failure 404 {object} responses.ErrorResponse "Not Found - project with the given ID does not exist"
-// @Router /v1/organization/projects/{project_id}/archive [post]
-func (api *ProjectsRoute) ArchiveProject(reqCtx *gin.Context) {
-	projectService := api.projectService
-	ctx := reqCtx.Request.Context()
-
-	entity, ok := auth.GetProjectFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusNotFound, responses.ErrorResponse{
-			Code:  "42ad3a04-6c17-40db-a10f-640be569c93f",
-			Error: "project not found",
-		})
-		return
-	}
-
-	// Set archived status
-	entity.Status = string(project.ProjectStatusArchived)
-	entity.ArchivedAt = ptr.ToTime(time.Now())
-	updatedEntity, err := projectService.UpdateProject(ctx, entity)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  "c9a103b2-985c-44b7-9ccd-38e914a2c82b",
-			Error: "failed to archive project",
-		})
-		return
-	}
-
-	reqCtx.JSON(http.StatusOK, domainToProjectResponse(updatedEntity))
-}
-
-// ProjectResponse defines the response structure for a project.
-type ProjectResponse struct {
-	Object     string `json:"object" example:"project" description:"The type of the object, 'project'"`
-	ID         string `json:"id" example:"proj_1234567890" description:"Unique identifier for the project"`
-	Name       string `json:"name" example:"My First Project" description:"The name of the project"`
-	CreatedAt  int64  `json:"created_at" example:"1698765432" description:"Unix timestamp when the project was created"`
-	ArchivedAt *int64 `json:"archived_at,omitempty" example:"1698765432" description:"Unix timestamp when the project was archived, if applicable"`
-	Status     string `json:"status"`
-}
-
-// CreateProjectRequest defines the request payload for creating a project.
-type CreateProjectRequest struct {
-	Name string `json:"name" binding:"required" example:"New AI Project" description:"The name of the project to be created"`
-}
-
-// UpdateProjectRequest defines the request payload for updating a project.
-type UpdateProjectRequest struct {
-	Name *string `json:"name" example:"Updated AI Project" description:"The new name for the project"`
-}
-
-// ProjectListResponse defines the response structure for a list of projects.
-type ProjectListResponse struct {
-	Object  string            `json:"object" example:"list" description:"The type of the object, 'list'"`
-	Data    []ProjectResponse `json:"data" description:"Array of projects"`
-	FirstID *string           `json:"first_id,omitempty"`
-	LastID  *string           `json:"last_id,omitempty"`
-	HasMore bool              `json:"has_more"`
-}
-
-func domainToProjectResponse(p *project.Project) ProjectResponse {
-	var archivedAt *int64
-	if p.ArchivedAt != nil {
-		archivedAt = ptr.ToInt64(p.CreatedAt.Unix())
-	}
-	return ProjectResponse{
-		Object:     string(openai.ObjectKeyProject),
-		ID:         p.PublicID,
-		Name:       p.Name,
-		CreatedAt:  p.CreatedAt.Unix(),
-		ArchivedAt: archivedAt,
-		Status:     p.Status,
-	}
-}
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/responses/response_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/responses/response_route.go
deleted file mode 100644
index d9a3b642..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/responses/response_route.go
+++ /dev/null
@@ -1,466 +0,0 @@
-package responses
-
-import (
-	"fmt"
-	"net/http"
-
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/response"
-
-	requesttypes "menlo.ai/jan-api-gateway/app/interfaces/http/requests"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/responses"
-)
-
-// Use types from the response packages instead of defining internal types
-
-// ResponseRoute represents the response API routes
-type ResponseRoute struct {
-	responseModelService  *response.ResponseModelService
-	authService           *auth.AuthService
-	responseService       *response.ResponseService
-	streamModelService    *response.StreamModelService
-	nonStreamModelService *response.NonStreamModelService
-}
-
-// NewResponseRoute creates a new ResponseRoute instance
-func NewResponseRoute(responseModelService *response.ResponseModelService, authService *auth.AuthService, responseService *response.ResponseService, streamHandler *response.StreamModelService, nonStreamHandler *response.NonStreamModelService) *ResponseRoute {
-	return &ResponseRoute{
-		responseModelService:  responseModelService,
-		authService:           authService,
-		responseService:       responseService,
-		streamModelService:    streamHandler,
-		nonStreamModelService: nonStreamHandler,
-	}
-}
-
-// RegisterRouter registers the response routes
-func (responseRoute *ResponseRoute) RegisterRouter(router gin.IRouter) {
-	responseRouter := router.Group("/responses")
-	responseRoute.registerRoutes(responseRouter)
-}
-
-// registerRoutes registers all response routes
-func (responseRoute *ResponseRoute) registerRoutes(router *gin.RouterGroup) {
-	// Apply middleware to the entire group
-	responseGroup := router.Group("",
-		responseRoute.authService.AppUserAuthMiddleware(),
-		responseRoute.authService.RegisteredUserMiddleware(),
-	)
-
-	responseGroup.POST("", responseRoute.CreateResponse)
-
-	// Apply response middleware for routes that need response context
-	responseMiddleWare := responseRoute.responseService.GetResponseMiddleWare()
-	responseGroup.GET(fmt.Sprintf("/:%s", string(response.ResponseContextKeyPublicID)), responseMiddleWare, responseRoute.responseModelService.GetResponseHandler)
-	responseGroup.DELETE(fmt.Sprintf("/:%s", string(response.ResponseContextKeyPublicID)), responseMiddleWare, responseRoute.responseModelService.DeleteResponseHandler)
-	responseGroup.POST(fmt.Sprintf("/:%s/cancel", string(response.ResponseContextKeyPublicID)), responseMiddleWare, responseRoute.responseModelService.CancelResponseHandler)
-	responseGroup.GET(fmt.Sprintf("/:%s/input_items", string(response.ResponseContextKeyPublicID)), responseMiddleWare, responseRoute.responseModelService.ListInputItemsHandler)
-}
-
-// CreateResponse creates a new response from LLM
-// @Summary Create a response
-// @Description Creates a new LLM response for the given input. Supports multiple input types including text, images, files, web search, and more.
-// @Description
-// @Description **Supported Input Types:**
-// @Description - `text`: Plain text input
-// @Description - `image`: Image input (URL or base64)
-// @Description - `file`: File input by file ID
-// @Description - `web_search`: Web search input
-// @Description - `file_search`: File search input
-// @Description - `streaming`: Streaming input
-// @Description - `function_calls`: Function calls input
-// @Description - `reasoning`: Reasoning input
-// @Description
-// @Description **Example Request:**
-// @Description ```json
-// @Description {
-// @Description   "model": "gpt-4",
-// @Description   "input": {
-// @Description     "type": "text",
-// @Description     "text": "Hello, how are you?"
-// @Description   },
-// @Description   "max_tokens": 100,
-// @Description   "temperature": 0.7,
-// @Description   "stream": false,
-// @Description   "background": false
-// @Description }
-// @Description ```
-// @Description
-// @Description **Response Format:**
-// @Description The response uses embedded structure where all fields are at the top level:
-// @Description - `jan_status`: Jan API status code (optional)
-// @Description - `id`: Response identifier
-// @Description - `object`: Object type ("response")
-// @Description - `created`: Unix timestamp
-// @Description - `model`: Model used
-// @Description - `status`: Response status
-// @Description - `input`: Input data
-// @Description - `output`: Generated output
-// @Description
-// @Description **Example Response:**
-// @Description ```json
-// @Description {
-// @Description   "jan_status": "000000",
-// @Description   "id": "resp_1234567890",
-// @Description   "object": "response",
-// @Description   "created": 1234567890,
-// @Description   "model": "gpt-4",
-// @Description   "status": "completed",
-// @Description   "input": {
-// @Description     "type": "text",
-// @Description     "text": "Hello, how are you?"
-// @Description   },
-// @Description   "output": {
-// @Description     "type": "text",
-// @Description     "text": {
-// @Description       "value": "I'm doing well, thank you!"
-// @Description     }
-// @Description   }
-// @Description }
-// @Description ```
-// @Description
-// @Description **Response Status:**
-// @Description - `completed`: Response generation finished successfully
-// @Description - `processing`: Response is being generated
-// @Description - `failed`: Response generation failed
-// @Description - `cancelled`: Response was cancelled
-// @Tags Responses API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param request body requesttypes.CreateResponseRequest true "Request payload containing model, input, and generation parameters"
-// @Success 200 {object} responses.Response "Created response"
-// @Success 202 {object} responses.Response "Response accepted for background processing"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request payload"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 422 {object} responses.ErrorResponse "Validation error"
-// @Failure 429 {object} responses.ErrorResponse "Rate limit exceeded"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/responses [post]
-func (responseRoute *ResponseRoute) CreateResponse(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	user, _ := auth.GetUserFromContext(reqCtx)
-	userID := user.ID
-
-	var request requesttypes.CreateResponseRequest
-	if err := reqCtx.ShouldBindJSON(&request); err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "g7h8i9j0-k1l2-3456-ghij-789012345678",
-		})
-		return
-	}
-
-	// Validate request parameters
-	if request.Model == "" {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "h8i9j0k1-l2m3-4567-hijk-890123456789",
-		})
-		return
-	}
-
-	if request.Input == nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "i9j0k1l2-m3n4-5678-ijkl-901234567890",
-		})
-		return
-	}
-
-	// Convert to domain request type
-	domainRequest := &requesttypes.CreateResponseRequest{
-		Model:              request.Model,
-		Input:              request.Input,
-		Stream:             request.Stream,
-		Temperature:        request.Temperature,
-		MaxTokens:          request.MaxTokens,
-		PreviousResponseID: request.PreviousResponseID,
-		SystemPrompt:       request.SystemPrompt,
-		TopP:               request.TopP,
-		TopK:               request.TopK,
-		RepetitionPenalty:  request.RepetitionPenalty,
-		Seed:               request.Seed,
-		Stop:               request.Stop,
-		PresencePenalty:    request.PresencePenalty,
-		FrequencyPenalty:   request.FrequencyPenalty,
-		LogitBias:          request.LogitBias,
-		ResponseFormat:     request.ResponseFormat,
-		Tools:              request.Tools,
-		ToolChoice:         request.ToolChoice,
-		Metadata:           request.Metadata,
-		Background:         request.Background,
-		Timeout:            request.Timeout,
-		User:               request.User,
-		Conversation:       request.Conversation,
-		Store:              request.Store,
-	}
-
-	// Call domain service (pure business logic)
-	result, err := responseRoute.responseModelService.CreateResponse(ctx, userID, domainRequest)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  err.GetCode(),
-			Error: err.Error(),
-		})
-		return
-	}
-
-	// Handle HTTP/SSE concerns directly
-	responseRoute.handleResponseCreation(reqCtx, result, domainRequest)
-}
-
-// handleResponseCreation handles both streaming and non-streaming response creation
-func (responseRoute *ResponseRoute) handleResponseCreation(reqCtx *gin.Context, result *response.ResponseCreationResult, request *requesttypes.CreateResponseRequest) {
-	// Set up streaming headers if needed
-	if result.IsStreaming {
-		reqCtx.Header("Content-Type", "text/event-stream")
-		reqCtx.Header("Cache-Control", "no-cache")
-		reqCtx.Header("Connection", "keep-alive")
-		reqCtx.Header("Access-Control-Allow-Origin", "*")
-		reqCtx.Header("Access-Control-Allow-Headers", "Cache-Control")
-	}
-
-	// Delegate to appropriate handler based on streaming preference
-	if result.IsStreaming {
-		responseRoute.streamModelService.CreateStreamResponse(reqCtx, request, result.APIKey, result.Conversation, result.Response, result.ChatCompletionRequest)
-	} else {
-		responseRoute.nonStreamModelService.CreateNonStreamResponseHandler(reqCtx, request, result.APIKey, result.Conversation, result.Response, result.ChatCompletionRequest)
-	}
-}
-
-// GetResponse retrieves a response by ID
-// @Summary Get a response
-// @Description Retrieves an LLM response by its ID. Returns the complete response object with embedded structure where all fields are at the top level.
-// @Description
-// @Description **Response Format:**
-// @Description The response uses embedded structure where all fields are at the top level:
-// @Description - `jan_status`: Jan API status code (optional)
-// @Description - `id`: Response identifier
-// @Description - `object`: Object type ("response")
-// @Description - `created`: Unix timestamp
-// @Description - `model`: Model used
-// @Description - `status`: Response status
-// @Description - `input`: Input data
-// @Description - `output`: Generated output
-// @Tags Responses API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param response_id path string true "Unique identifier of the response"
-// @Success 200 {object} responses.Response "Response details"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Response not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/responses/{response_id} [get]
-func (responseRoute *ResponseRoute) GetResponse(reqCtx *gin.Context) {
-	resp, ok := response.GetResponseFromContext(reqCtx)
-	if !ok {
-		return
-	}
-	// Convert domain response to API response using the service
-	apiResponse := responseRoute.responseService.ConvertDomainResponseToAPIResponse(resp)
-	reqCtx.JSON(http.StatusOK, apiResponse)
-}
-
-// DeleteResponse deletes a response by ID
-// @Summary Delete a response
-// @Description Deletes an LLM response by its ID. Returns the deleted response object with embedded structure where all fields are at the top level.
-// @Description
-// @Description **Response Format:**
-// @Description The response uses embedded structure where all fields are at the top level:
-// @Description - `jan_status`: Jan API status code (optional)
-// @Description - `id`: Response identifier
-// @Description - `object`: Object type ("response")
-// @Description - `created`: Unix timestamp
-// @Description - `model`: Model used
-// @Description - `status`: Response status (will be "cancelled")
-// @Description - `input`: Input data
-// @Description - `cancelled_at`: Cancellation timestamp
-// @Tags Responses API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param response_id path string true "Unique identifier of the response"
-// @Success 200 {object} responses.Response "Deleted response"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Response not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/responses/{response_id} [delete]
-func (responseRoute *ResponseRoute) DeleteResponse(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	resp, ok := response.GetResponseFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "k1l2m3n4-o5p6-7890-klmn-123456789012",
-		})
-		return
-	}
-
-	success, err := responseRoute.responseService.DeleteResponse(ctx, resp.ID)
-	if !success {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  err.GetCode(),
-			Error: err.Error(),
-		})
-		return
-	}
-	// Convert domain response to API response using the service
-	apiResponse := responseRoute.responseService.ConvertDomainResponseToAPIResponse(resp)
-	reqCtx.JSON(http.StatusOK, apiResponse)
-}
-
-// CancelResponse cancels a running response
-// @Summary Cancel a response
-// @Description Cancels a running LLM response that was created with background=true. Only responses that are currently processing can be cancelled.
-// @Description
-// @Description **Response Format:**
-// @Description The response uses embedded structure where all fields are at the top level:
-// @Description - `jan_status`: Jan API status code (optional)
-// @Description - `id`: Response identifier
-// @Description - `object`: Object type ("response")
-// @Description - `created`: Unix timestamp
-// @Description - `model`: Model used
-// @Description - `status`: Response status (will be "cancelled")
-// @Description - `input`: Input data
-// @Description - `cancelled_at`: Cancellation timestamp
-// @Tags Responses API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param response_id path string true "Unique identifier of the response to cancel"
-// @Success 200 {object} responses.Response "Response cancelled successfully"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request or response cannot be cancelled"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Response not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/responses/{response_id}/cancel [post]
-func (responseRoute *ResponseRoute) CancelResponse(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	resp, ok := response.GetResponseFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "m3n4o5p6-q7r8-9012-mnop-345678901234",
-		})
-		return
-	}
-
-	// TODO
-	// Cancel the stream if it is streaming in go routine and update response status in go routine
-	success, err := responseRoute.responseService.UpdateResponseStatus(ctx, resp.ID, response.ResponseStatusCancelled)
-	if !success {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code:  err.GetCode(),
-			Error: err.Error(),
-		})
-		return
-	}
-
-	// Reload the response to get updated status
-	updatedResp, err := responseRoute.responseService.GetResponseByPublicID(ctx, resp.PublicID)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  err.GetCode(),
-			Error: err.Error(),
-		})
-		return
-	}
-	// Convert domain response to API response using the service
-	apiResponse := responseRoute.responseService.ConvertDomainResponseToAPIResponse(updatedResp)
-	reqCtx.JSON(http.StatusOK, apiResponse)
-}
-
-// ListInputItems lists input items for a response
-// @Summary List input items
-// @Description Retrieves a paginated list of input items for a response. Supports cursor-based pagination for efficient retrieval of large datasets.
-// @Description
-// @Description **Response Format:**
-// @Description The response uses embedded structure where all fields are at the top level:
-// @Description - `jan_status`: Jan API status code (optional)
-// @Description - `first_id`: First item ID for pagination (optional)
-// @Description - `last_id`: Last item ID for pagination (optional)
-// @Description - `has_more`: Whether more items are available (optional)
-// @Description - `id`: Input item identifier
-// @Description - `object`: Object type ("input_item")
-// @Description - `created`: Unix timestamp
-// @Description - `type`: Input type
-// @Description - `text`: Text content (for text type)
-// @Description - `image`: Image content (for image type)
-// @Description - `file`: File content (for file type)
-// @Description
-// @Description **Example Response:**
-// @Description ```json
-// @Description {
-// @Description   "jan_status": "000000",
-// @Description   "first_id": "input_123",
-// @Description   "last_id": "input_456",
-// @Description   "has_more": false,
-// @Description   "id": "input_1234567890",
-// @Description   "object": "input_item",
-// @Description   "created": 1234567890,
-// @Description   "type": "text",
-// @Description   "text": "Hello, world!"
-// @Description }
-// @Description ```
-// @Tags Responses API
-// @Security BearerAuth
-// @Accept json
-// @Produce json
-// @Param response_id path string true "Unique identifier of the response"
-// @Param limit query int false "Maximum number of items to return (default: 20, max: 100)"
-// @Param after query string false "Cursor for pagination - return items after this ID"
-// @Param before query string false "Cursor for pagination - return items before this ID"
-// @Success 200 {object} responses.ListInputItemsResponse "List of input items"
-// @Failure 400 {object} responses.ErrorResponse "Invalid request or pagination parameters"
-// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
-// @Failure 403 {object} responses.ErrorResponse "Access denied"
-// @Failure 404 {object} responses.ErrorResponse "Response not found"
-// @Failure 500 {object} responses.ErrorResponse "Internal server error"
-// @Router /v1/responses/{response_id}/input_items [get]
-func (responseRoute *ResponseRoute) ListInputItems(reqCtx *gin.Context) {
-	ctx := reqCtx.Request.Context()
-	resp, ok := response.GetResponseFromContext(reqCtx)
-	if !ok {
-		reqCtx.AbortWithStatusJSON(http.StatusBadRequest, responses.ErrorResponse{
-			Code: "p6q7r8s9-t0u1-2345-pqrs-678901234567",
-		})
-		return
-	}
-
-	// Get items for this response using the response service
-	items, err := responseRoute.responseService.GetItemsForResponse(ctx, resp.ID, nil)
-	if err != nil {
-		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, responses.ErrorResponse{
-			Code:  err.GetCode(),
-			Error: err.Error(),
-		})
-		return
-	}
-
-	var firstId *string
-	var lastId *string
-	if len(items) > 0 {
-		firstId = &items[0].PublicID
-		lastId = &items[len(items)-1].PublicID
-	}
-
-	// Convert conversation items to input items using the service
-	inputItems := make([]responses.InputItem, 0, len(items))
-	for _, item := range items {
-		inputItem := responseRoute.responseService.ConvertConversationItemToInputItem(item)
-		inputItems = append(inputItems, inputItem)
-	}
-
-	reqCtx.JSON(http.StatusOK, responses.ListInputItemsResponse{
-		Object:  "list",
-		Data:    inputItems,
-		FirstID: firstId,
-		LastID:  lastId,
-		HasMore: false, // For now, we'll return all items without pagination
-	})
-}
-
-// All transformation functions removed - now using service methods
diff --git a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/v1_route.go b/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/v1_route.go
deleted file mode 100644
index c7b6b6b0..00000000
--- a/apps/jan-api-gateway/application/app/interfaces/http/routes/v1/v1_route.go
+++ /dev/null
@@ -1,75 +0,0 @@
-package v1
-
-import (
-	"net/http"
-
-	"github.com/gin-gonic/gin"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/auth"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/chat"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/conv"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/conversations"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/responses"
-	"menlo.ai/jan-api-gateway/config"
-)
-
-type V1Route struct {
-	organizationRoute *organization.OrganizationRoute
-	chatRoute         *chat.ChatRoute
-	convChatRoute     *conv.ConvChatRoute
-	conversationAPI   *conversations.ConversationAPI
-	modelAPI          *ModelAPI
-	mcpAPI            *mcp.MCPAPI
-	authRoute         *auth.AuthRoute
-	responsesRoute    *responses.ResponseRoute
-}
-
-func NewV1Route(
-	organizationRoute *organization.OrganizationRoute,
-	chatRoute *chat.ChatRoute,
-	convChatRoute *conv.ConvChatRoute,
-	conversationAPI *conversations.ConversationAPI,
-	modelAPI *ModelAPI,
-	mcpAPI *mcp.MCPAPI,
-	authRoute *auth.AuthRoute,
-	responsesRoute *responses.ResponseRoute,
-) *V1Route {
-	return &V1Route{
-		organizationRoute,
-		chatRoute,
-		convChatRoute,
-		conversationAPI,
-		modelAPI,
-		mcpAPI,
-		authRoute,
-		responsesRoute,
-	}
-}
-
-func (v1Route *V1Route) RegisterRouter(router gin.IRouter) {
-	v1Router := router.Group("/v1")
-	v1Router.GET("/version", GetVersion)
-	v1Route.chatRoute.RegisterRouter(v1Router)
-	v1Route.convChatRoute.RegisterRouter(v1Router)
-	v1Route.conversationAPI.RegisterRouter(v1Router)
-	v1Route.modelAPI.RegisterRouter(v1Router)
-	v1Route.mcpAPI.RegisterRouter(v1Router)
-	v1Route.organizationRoute.RegisterRouter(v1Router)
-	v1Route.authRoute.RegisterRouter(v1Router)
-	v1Route.responsesRoute.RegisterRouter(v1Router)
-}
-
-// GetVersion godoc
-// @Summary     Get API build version
-// @Description Returns the current build version of the API server.
-// @Tags        Server API
-// @Produce     json
-// @Success     200 {object} map[string]string "version info"
-// @Router      /v1/version [get]
-func GetVersion(c *gin.Context) {
-	c.JSON(http.StatusOK, gin.H{
-		"version":         config.Version,
-		"env_reloaded_at": config.EnvReloadedAt,
-	})
-}
diff --git a/apps/jan-api-gateway/application/app/utils/contextkeys/keys.go b/apps/jan-api-gateway/application/app/utils/contextkeys/keys.go
deleted file mode 100644
index 6405579d..00000000
--- a/apps/jan-api-gateway/application/app/utils/contextkeys/keys.go
+++ /dev/null
@@ -1,8 +0,0 @@
-package contextkeys
-
-type RequestId struct{}
-type HttpClientStartsAt struct{}
-type HttpClientRequestBody struct{}
-type TransactionContextKey struct{}
-
-const SkipMiddleware = "SkipMiddleware"
diff --git a/apps/jan-api-gateway/application/app/utils/emailservice/smtp.go b/apps/jan-api-gateway/application/app/utils/emailservice/smtp.go
deleted file mode 100644
index 1a3710d6..00000000
--- a/apps/jan-api-gateway/application/app/utils/emailservice/smtp.go
+++ /dev/null
@@ -1,67 +0,0 @@
-package emailservice
-
-import (
-	"crypto/tls"
-	"fmt"
-	"net/smtp"
-
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-func SendEmail(to string, subject string, body string) error {
-	envs := environment_variables.EnvironmentVariables
-
-	addr := fmt.Sprintf("%s:%d", envs.SMTP_HOST, envs.SMTP_PORT)
-	tlsConfig := &tls.Config{
-		InsecureSkipVerify: false,
-		ServerName:         envs.SMTP_HOST,
-	}
-
-	conn, err := tls.Dial("tcp", addr, tlsConfig)
-	if err != nil {
-		return fmt.Errorf("TLS dial error: %w", err)
-	}
-
-	client, err := smtp.NewClient(conn, envs.SMTP_HOST)
-	if err != nil {
-		return fmt.Errorf("SMTP client error: %w", err)
-	}
-	defer client.Close()
-
-	auth := smtp.PlainAuth("", envs.SMTP_USERNAME, envs.SMTP_PASSWORD, envs.SMTP_HOST)
-	if err = client.Auth(auth); err != nil {
-		return fmt.Errorf("SMTP auth error: %w", err)
-	}
-
-	headers := ""
-	headers += "MIME-Version: 1.0\r\n"
-	headers += "Content-Type: text/html; charset=\"UTF-8\"\r\n"
-	headers += "From: " + envs.SMTP_SENDER_EMAIL + "\r\n"
-	headers += "To: " + to + "\r\n"
-	headers += "Subject: " + subject + "\r\n"
-
-	msg := headers + "\r\n" + body
-
-	if err = client.Mail(envs.SMTP_SENDER_EMAIL); err != nil {
-		return err
-	}
-
-	if err = client.Rcpt(to); err != nil {
-		return err
-	}
-
-	writer, err := client.Data()
-	if err != nil {
-		return err
-	}
-	_, err = writer.Write([]byte(msg))
-	if err != nil {
-		return err
-	}
-	err = writer.Close()
-	if err != nil {
-		return err
-	}
-
-	return client.Quit()
-}
diff --git a/apps/jan-api-gateway/application/app/utils/functional/functional.go b/apps/jan-api-gateway/application/app/utils/functional/functional.go
deleted file mode 100644
index 14e00602..00000000
--- a/apps/jan-api-gateway/application/app/utils/functional/functional.go
+++ /dev/null
@@ -1,40 +0,0 @@
-package functional
-
-func Map[T, V any](slice []T, f func(T) V) []V {
-	result := make([]V, len(slice))
-	for i, v := range slice {
-		result[i] = f(v)
-	}
-
-	return result
-}
-
-func Distinct[T comparable](slice []T) []T {
-	seen := make(map[T]struct{})
-	result := []T{}
-
-	for _, v := range slice {
-		if _, ok := seen[v]; !ok {
-			result = append(result, v)
-			seen[v] = struct{}{}
-		}
-	}
-	return result
-}
-
-func ConvertToMap[T, V comparable](slice []T, f func(T) V) map[V]T {
-	result := make(map[V]T, len(slice))
-	for _, v := range slice {
-		key := f(v)
-		result[key] = v
-	}
-	return result
-}
-
-func GetMapKeys[K comparable, V any](m map[K]V) []K {
-	keys := make([]K, 0, len(m))
-	for k := range m {
-		keys = append(keys, k)
-	}
-	return keys
-}
diff --git a/apps/jan-api-gateway/application/app/utils/httpclients/jan_inference/client.go b/apps/jan-api-gateway/application/app/utils/httpclients/jan_inference/client.go
deleted file mode 100644
index 5b1f73c2..00000000
--- a/apps/jan-api-gateway/application/app/utils/httpclients/jan_inference/client.go
+++ /dev/null
@@ -1,121 +0,0 @@
-package janinference
-
-import (
-	"bufio"
-	"context"
-	"fmt"
-
-	"github.com/gin-gonic/gin"
-	openai "github.com/sashabaranov/go-openai"
-	"menlo.ai/jan-api-gateway/app/utils/httpclients"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-	"resty.dev/v3"
-)
-
-// consider using "github.com/sashabaranov/go-openai"
-var JanInferenceRestyClient *resty.Client
-
-func Init() {
-	JanInferenceRestyClient = httpclients.NewClient("JanInferenceClient")
-	JanInferenceRestyClient.SetBaseURL(environment_variables.EnvironmentVariables.JAN_INFERENCE_MODEL_URL)
-}
-
-type JanInferenceClient struct {
-	BaseURL string
-}
-
-func NewJanInferenceClient(ctx context.Context) *JanInferenceClient {
-	return &JanInferenceClient{
-		BaseURL: environment_variables.EnvironmentVariables.JAN_INFERENCE_MODEL_URL,
-	}
-}
-
-func (client *JanInferenceClient) CreateChatCompletionStream(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) error {
-	reqCtx, ok := ctx.(*gin.Context)
-	if !ok {
-		return fmt.Errorf("invalid context")
-	}
-	reqCtx.Writer.Header().Set("Content-Type", "text/event-stream")
-	reqCtx.Writer.Header().Set("Cache-Control", "no-cache")
-	reqCtx.Writer.Header().Set("Connection", "keep-alive")
-	reqCtx.Writer.Header().Set("Transfer-Encoding", "chunked")
-
-	req := JanInferenceRestyClient.R().SetBody(request)
-	resp, err := req.
-		SetDoNotParseResponse(true).
-		Post("/v1/chat/completions")
-	if err != nil {
-		return err
-	}
-	defer resp.RawResponse.Body.Close()
-	scanner := bufio.NewScanner(resp.RawResponse.Body)
-	for scanner.Scan() {
-		line := scanner.Text()
-		reqCtx.Writer.Write([]byte(line + "\n"))
-		reqCtx.Writer.Flush()
-	}
-	reqCtx.Writer.Flush()
-	return nil
-}
-
-// CreateChatCompletionStreamChunks returns chunks instead of writing to response
-func (client *JanInferenceClient) CreateChatCompletionStreamChunks(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (<-chan string, error) {
-	chunkChan := make(chan string, 100)
-
-	go func() {
-		defer close(chunkChan)
-
-		req := JanInferenceRestyClient.R().SetBody(request)
-		resp, err := req.
-			SetDoNotParseResponse(true).
-			Post("/v1/chat/completions")
-		if err != nil {
-			chunkChan <- fmt.Sprintf("error: %v", err)
-			return
-		}
-		defer resp.RawResponse.Body.Close()
-
-		scanner := bufio.NewScanner(resp.RawResponse.Body)
-		for scanner.Scan() {
-			line := scanner.Text()
-			chunkChan <- line
-		}
-	}()
-
-	return chunkChan, nil
-}
-
-// TODO: add timeout
-func (client *JanInferenceClient) CreateChatCompletion(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*openai.ChatCompletionResponse, error) {
-	var chatCompletionResponse openai.ChatCompletionResponse
-	_, err := JanInferenceRestyClient.R().
-		SetContext(ctx).
-		SetBody(request).
-		SetResult(&chatCompletionResponse).
-		SetHeader("Content-Type", "application/json").
-		SetAuthToken(apiKey).
-		Post("/v1/chat/completions")
-	return &chatCompletionResponse, err
-}
-
-func (c *JanInferenceClient) GetModels(ctx context.Context) (*ModelsResponse, error) {
-	var result ModelsResponse
-	_, err := JanInferenceRestyClient.R().
-		SetContext(ctx).
-		SetHeader("Content-Type", "application/json").
-		SetResult(&result).
-		Get("/v1/models")
-	return &result, err
-}
-
-type Model struct {
-	ID      string `json:"id"`
-	Object  string `json:"object"`
-	Created int    `json:"created"`
-	OwnedBy string `json:"owned_by"`
-}
-
-type ModelsResponse struct {
-	Object string  `json:"object"`
-	Data   []Model `json:"data"`
-}
diff --git a/apps/jan-api-gateway/application/app/utils/httpclients/resty.go b/apps/jan-api-gateway/application/app/utils/httpclients/resty.go
deleted file mode 100644
index 26d2e5b9..00000000
--- a/apps/jan-api-gateway/application/app/utils/httpclients/resty.go
+++ /dev/null
@@ -1,48 +0,0 @@
-package httpclients
-
-import (
-	"context"
-	"time"
-
-	"github.com/sirupsen/logrus"
-	"menlo.ai/jan-api-gateway/app/utils/contextkeys"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-	"resty.dev/v3"
-)
-
-func NewClient(clientName string) *resty.Client {
-	client := resty.New()
-	client.AddRequestMiddleware(func(c *resty.Client, r *resty.Request) error {
-		start := time.Now()
-		ctx := context.WithValue(r.Context(), contextkeys.HttpClientStartsAt{}, start)
-		ctx = context.WithValue(ctx, contextkeys.HttpClientRequestBody{}, r.Body)
-		r.SetContext(ctx)
-		return nil
-	})
-	client.AddResponseMiddleware(func(c *resty.Client, r *resty.Response) error {
-		logger := logger.GetLogger()
-		requestID := r.Request.Context().Value(contextkeys.RequestId{})
-		startTime, _ := r.Request.Context().Value(contextkeys.HttpClientStartsAt{}).(time.Time)
-		requestBody := r.Request.Context().Value(contextkeys.HttpClientRequestBody{})
-		latency := time.Since(startTime)
-		var responseBody any
-		if !r.Request.DoNotParseResponse {
-			responseBody = r.Result()
-		}
-		logger.WithFields(logrus.Fields{
-			"request_id": requestID,
-			"client":     clientName,
-			"status":     r.StatusCode(),
-			"method":     r.Request.RawRequest.Method,
-			"path":       r.Request.RawRequest.URL.Path,
-			"query":      r.Request.RawRequest.URL.RawQuery,
-			"headers":    r.Request.RawRequest.Header,
-			"req_body":   requestBody,
-			"resp_body":  responseBody,
-			"latency":    latency.String(),
-			"client_ip":  nil,
-		}).Info("")
-		return nil
-	})
-	return client
-}
diff --git a/apps/jan-api-gateway/application/app/utils/httpclients/serper/client.go b/apps/jan-api-gateway/application/app/utils/httpclients/serper/client.go
deleted file mode 100644
index ea2a5d66..00000000
--- a/apps/jan-api-gateway/application/app/utils/httpclients/serper/client.go
+++ /dev/null
@@ -1,111 +0,0 @@
-package serper
-
-import (
-	"context"
-	"fmt"
-
-	"menlo.ai/jan-api-gateway/app/utils/httpclients"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-	"resty.dev/v3"
-)
-
-var SerperRestyClient *resty.Client
-
-func Init() {
-	SerperRestyClient = httpclients.NewClient("SerperClient")
-}
-
-type SerperClient struct {
-	apiKey string
-}
-
-func NewSerperClient() *SerperClient {
-	return &SerperClient{
-		apiKey: environment_variables.EnvironmentVariables.SERPER_API_KEY,
-	}
-}
-
-type TBSTimeRange string
-
-const (
-	TBSAny       TBSTimeRange = ""
-	TBSPastHour  TBSTimeRange = "qdr:h"
-	TBSPastDay   TBSTimeRange = "qdr:d"
-	TBSPastWeek  TBSTimeRange = "qdr:w"
-	TBSPastMonth TBSTimeRange = "qdr:m"
-	TBSPastYear  TBSTimeRange = "qdr:y"
-)
-
-type SearchRequest struct {
-	Q           string        `json:"q"`
-	GL          *string       `json:"gl,omitempty"`
-	HL          *string       `json:"hl,omitempty"`
-	Location    *string       `json:"location,omitempty"`
-	Num         *int          `json:"num,omitempty"`
-	Page        *int          `json:"page,omitempty"`
-	Autocorrect *bool         `json:"autocorrect,omitempty"`
-	TBS         *TBSTimeRange `json:"tbs,omitempty"`
-}
-
-type SearchResponse struct {
-	SearchParameters map[string]interface{}   `json:"searchParameters"`
-	Organic          []map[string]interface{} `json:"organic"`
-	KnowledgeGraph   map[string]interface{}   `json:"knowledgeGraph,omitempty"`
-	Images           []map[string]interface{} `json:"images,omitempty"`
-	News             []map[string]interface{} `json:"news,omitempty"`
-	AnswerBox        map[string]interface{}   `json:"answerBox,omitempty"`
-}
-
-// TODO: add timeout
-func (c *SerperClient) Search(ctx context.Context, query SearchRequest) (*SearchResponse, error) {
-	var result SearchResponse
-
-	resp, err := SerperRestyClient.R().
-		SetContext(ctx).
-		SetHeader("X-API-KEY", c.apiKey).
-		SetHeader("Content-Type", "application/json").
-		SetBody(query).
-		SetResult(&result).
-		Post("https://google.serper.dev/search")
-
-	if err != nil {
-		return nil, err
-	}
-
-	if resp.IsError() {
-		return nil, fmt.Errorf("serper API error: %s", resp.Status())
-	}
-
-	return &result, nil
-}
-
-type FetchWebpageRequest struct {
-	Url             string `json:"url"`
-	IncludeMarkdown *bool  `json:"includeMarkdown"`
-}
-
-type FetchWebpageResponse struct {
-	Text     string                 `json:"text"`
-	Metadata map[string]interface{} `json:"metadata"`
-}
-
-// TODO: add timeout
-func (c *SerperClient) FetchWebpage(ctx context.Context, query FetchWebpageRequest) (*FetchWebpageResponse, error) {
-	var result FetchWebpageResponse
-	resp, err := SerperRestyClient.R().
-		SetContext(ctx).
-		SetHeader("X-API-KEY", c.apiKey).
-		SetHeader("Content-Type", "application/json").
-		SetBody(query).
-		SetResult(&result).
-		Post("https://scrape.serper.dev")
-
-	if err != nil {
-		return nil, err
-	}
-
-	if resp.IsError() {
-		return nil, fmt.Errorf("serper API error: %s", resp.Status())
-	}
-	return &result, nil
-}
diff --git a/apps/jan-api-gateway/application/app/utils/logger/logger.go b/apps/jan-api-gateway/application/app/utils/logger/logger.go
deleted file mode 100644
index e2286df8..00000000
--- a/apps/jan-api-gateway/application/app/utils/logger/logger.go
+++ /dev/null
@@ -1,28 +0,0 @@
-// logger/logger.go
-package logger
-
-import (
-	"os"
-	"sync"
-
-	"github.com/sirupsen/logrus"
-)
-
-var (
-	Logger *logrus.Logger
-	once   sync.Once
-)
-
-// GetLogger returns the singleton logger instance
-func GetLogger() *logrus.Logger {
-	once.Do(func() {
-		Logger = logrus.New()
-		Logger.SetFormatter(&logrus.JSONFormatter{
-			TimestampFormat: "2006-01-02 15:04:05",
-		})
-		Logger.SetOutput(os.Stdout)
-		Logger.SetLevel(logrus.InfoLevel)
-		Logger.SetOutput(os.Stdout)
-	})
-	return Logger
-}
diff --git a/apps/jan-api-gateway/application/cmd/codegen/dbmigration/Migration.md b/apps/jan-api-gateway/application/cmd/codegen/dbmigration/Migration.md
deleted file mode 100644
index 68c325dd..00000000
--- a/apps/jan-api-gateway/application/cmd/codegen/dbmigration/Migration.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Database Migration Procedure
----
-We use [atlas](https://github.com/ariga/atlas) as our migrations tool.
-Before you begin, please ensure your local environment is set up correctly:
-
-To execute cmd/codegen/dbmigration, please check that:
-1. Install Atlas: If you haven't already, install Atlas using Homebrew.
-    ```
-    brew install ariga/tap/atlas
-    ```
-2. Set up PostgreSQL: Ensure you have a local PostgreSQL instance running. Then, connect to it and set up the necessary user and database.
-    ```sql
-    CREATE ROLE migration WITH LOGIN PASSWORD 'migration';
-    ALTER ROLE migration WITH SUPERUSER;
-    CREATE DATABASE migration WITH OWNER = migration;
-    ```
-3. Configure Environment Variables: Set the following environment variables to point your application to the local database.
-    ```
-    export DB_POSTGRESQL_WRITE_DSN="host=localhost user=migration password=migration dbname=migration port=5432 sslmode=disable"
-    export DB_POSTGRESQL_READ1_DSN="host=localhost user=migration password=migration dbname=migration port=5432 sslmode=disable"
-    ```
----
-The migration process is as follows (go run cmd/codegen/dbmigration):
-1. Generate release.hcl: This file represents the current schema of your production database. It's your "from" schema.
-```
-func main() {
-	environment_variables.EnvironmentVariables.LoadFromEnv()
-
-	// git checkout main
-	// generateHcl("main")
-
-	// git checkout release
-	generateHcl("release")
-
-	// generateDiffSql()
-}
-```
-2. Generate main.hcl: This file represents the desired new schema from your main branch. This is your "to" schema.
-    ```
-    func main() {
-        environment_variables.EnvironmentVariables.LoadFromEnv()
-
-        // git checkout main
-        generateHcl("main")
-
-        // git checkout release
-        // generateHcl("release")
-
-        // generateDiffSql()
-    }
-    ```
-3. Create diff.sql: This command generates the SQL statements needed to migrate from the release schema to the main schema. The output is redirected to a file for review.
-    ```
-    func main() {
-        environment_variables.EnvironmentVariables.LoadFromEnv()
-
-        // git checkout main
-        // generateHcl("main")
-
-        // git checkout release
-        // generateHcl("release")
-
-        generateDiffSql()
-    }
-    ```
-4. Validate diff.sql: This is a critical step. Open diff.sql and manually inspect the generated SQL for potentially harmful operations, such as:
-    - Dropping columns: Look for DROP COLUMN statements. This is a destructive change and will result in permanent data loss.
-    - Adding NOT NULL constraints: Directly adding a NOT NULL constraint to an existing column will fail if it contains NULL values. If Atlas generates this, you need to manually split the change into two safer steps (add nullable column, then update rows, and finally add the constraint).
diff --git a/apps/jan-api-gateway/application/cmd/codegen/dbmigration/dbmigration.go b/apps/jan-api-gateway/application/cmd/codegen/dbmigration/dbmigration.go
deleted file mode 100644
index c4c7b4dc..00000000
--- a/apps/jan-api-gateway/application/cmd/codegen/dbmigration/dbmigration.go
+++ /dev/null
@@ -1,93 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"log"
-	"os"
-	"os/exec"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-	_ "menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-// brew install ariga/tap/atlas
-// postgres=# CREATE ROLE migration WITH LOGIN PASSWORD 'migration';
-// postgres=# ALTER ROLE migration WITH SUPERUSER;
-// postgres=# CREATE DATABASE migration WITH OWNER = migration;
-
-func generateHcl(branchName string) {
-	db, err := database.NewDB()
-	if err != nil {
-		panic(err)
-	}
-	err = db.Exec("DROP SCHEMA IF EXISTS public CASCADE;").Error
-	if err != nil {
-		log.Fatalf("failed to drop schema: %v", err)
-		return
-	}
-	err = db.Exec("CREATE SCHEMA public;").Error
-	if err != nil {
-		log.Fatalf("failed to create schema: %v", err)
-		return
-	}
-	db.AutoMigrate(database.DatabaseMigration{})
-	for _, model := range database.SchemaRegistry {
-		err = db.AutoMigrate(model)
-		if err != nil {
-			panic(err)
-		}
-	}
-	atlasCmdStr := `atlas schema inspect -u "postgres://migration:migration@localhost:5432/migration?sslmode=disable" > tmp/` + branchName + `.hcl`
-	atlasCmd := exec.Command("sh", "-c", atlasCmdStr)
-	atlasCmd.Run()
-}
-
-func generateDiffSql() {
-	db, err := database.NewDB()
-	if err != nil {
-		panic(err)
-	}
-
-	err = db.Exec("DROP SCHEMA IF EXISTS public CASCADE;").Error
-	if err != nil {
-		log.Fatalf("failed to drop schema: %v", err)
-	}
-	err = db.Exec("CREATE SCHEMA public;").Error
-	if err != nil {
-		log.Fatalf("failed to create schema: %v", err)
-	}
-
-	atlasCmdStr := `atlas schema diff --dev-url "postgres://migration:migration@localhost:5432/migration?sslmode=disable" --from file://tmp/release.hcl --to file://tmp/main.hcl > tmp/diff.sql`
-	atlasCmd := exec.Command("sh", "-c", atlasCmdStr)
-	atlasCmd.Run()
-}
-
-func createTmpFolder() error {
-	dir, err := os.Getwd()
-	if err != nil {
-		log.Fatal(err)
-		return err
-	}
-	dirPath := fmt.Sprintf("%s/%s", dir, "tmp")
-	err = os.MkdirAll(dirPath, 0755)
-	if err != nil {
-		log.Fatal(err)
-		return err
-	}
-	return nil
-}
-
-func main() {
-	environment_variables.EnvironmentVariables.LoadFromEnv()
-	if err := createTmpFolder(); err != nil {
-		panic(err)
-	}
-	// git checkout main
-	generateHcl("main")
-
-	// git checkout release
-	// generateHcl("release")
-
-	// generateDiffSql()
-}
diff --git a/apps/jan-api-gateway/application/cmd/codegen/gorm/gorm.go b/apps/jan-api-gateway/application/cmd/codegen/gorm/gorm.go
deleted file mode 100644
index dd4926fa..00000000
--- a/apps/jan-api-gateway/application/cmd/codegen/gorm/gorm.go
+++ /dev/null
@@ -1,64 +0,0 @@
-package main
-
-import (
-	"log"
-
-	"gorm.io/driver/postgres"
-	"gorm.io/gen"
-	"gorm.io/gorm"
-
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-	_ "menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-var GormGenerator *gen.Generator
-
-func init() {
-	environment_variables.EnvironmentVariables.LoadFromEnv()
-	db, err := gorm.Open(postgres.Open(environment_variables.EnvironmentVariables.DB_POSTGRESQL_WRITE_DSN))
-	if err != nil {
-		panic(err)
-	}
-
-	GormGenerator = gen.NewGenerator(gen.Config{
-		OutPath:       "./app/infrastructure/database/gormgen",
-		Mode:          gen.WithDefaultQuery | gen.WithQueryInterface | gen.WithoutContext,
-		FieldNullable: true,
-	})
-	GormGenerator.UseDB(db)
-}
-
-func main() {
-	for _, model := range database.SchemaRegistry {
-		GormGenerator.ApplyBasic(model)
-		type Querier interface {
-		}
-		GormGenerator.ApplyInterface(func(Querier) {}, model)
-	}
-	GormGenerator.Execute()
-
-	db, err := database.NewDB()
-	if err != nil {
-		logger.GetLogger().
-			WithField("error_code", "db8499be-ae9d-46dc-ac59-1d2c42520e14").
-			Fatalf("failed to auto migrate schema, error: %v", err)
-	}
-	err = db.Exec("DROP SCHEMA IF EXISTS public CASCADE;").Error
-	if err != nil {
-		log.Fatalf("failed to drop schema: %v", err)
-	}
-	err = db.Exec("CREATE SCHEMA public;").Error
-	if err != nil {
-		log.Fatalf("failed to create schema: %v", err)
-	}
-	for _, model := range database.SchemaRegistry {
-		err = db.AutoMigrate(model)
-		if err != nil {
-			logger.GetLogger().
-				WithField("error_code", "75333e43-8157-4f0a-8e34-aa34e6e7c285").
-				Fatalf("failed to auto migrate schema: %T, error: %v", model, err)
-		}
-	}
-}
diff --git a/apps/jan-api-gateway/application/cmd/server/dataInitializer.go b/apps/jan-api-gateway/application/cmd/server/dataInitializer.go
deleted file mode 100644
index dfe767e4..00000000
--- a/apps/jan-api-gateway/application/cmd/server/dataInitializer.go
+++ /dev/null
@@ -1,23 +0,0 @@
-package main
-
-import (
-	"context"
-
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-)
-
-type DataInitializer struct {
-	authService *auth.AuthService
-}
-
-func (d *DataInitializer) Install(ctx context.Context) error {
-	err := d.installDefaultOrganization(ctx)
-	if err != nil {
-		return err
-	}
-	return nil
-}
-
-func (d *DataInitializer) installDefaultOrganization(ctx context.Context) error {
-	return d.authService.InitOrganization(ctx)
-}
diff --git a/apps/jan-api-gateway/application/cmd/server/server.go b/apps/jan-api-gateway/application/cmd/server/server.go
deleted file mode 100644
index f68bfbe3..00000000
--- a/apps/jan-api-gateway/application/cmd/server/server.go
+++ /dev/null
@@ -1,83 +0,0 @@
-package main
-
-import (
-	"context"
-	nethttp "net/http"
-	_ "net/http/pprof"
-
-	_ "github.com/grafana/pyroscope-go/godeltaprof/http/pprof"
-
-	"github.com/mileusna/crontab"
-	"menlo.ai/jan-api-gateway/app/domain/cron"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-	apphttp "menlo.ai/jan-api-gateway/app/interfaces/http"
-	janinference "menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-	"menlo.ai/jan-api-gateway/app/utils/httpclients/serper"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
-)
-
-type Application struct {
-	HttpServer  *apphttp.HttpServer
-	CronService *cron.CronService
-}
-
-func (application *Application) Start() {
-	// Start cron service
-	cronTab := crontab.New()
-	background := context.Background()
-	application.CronService.Start(background, cronTab)
-
-	// Start HTTP server
-	if err := application.HttpServer.Run(); err != nil {
-		panic(err)
-	}
-}
-
-func init() {
-	logger.GetLogger()
-	environment_variables.EnvironmentVariables.LoadFromEnv()
-	// TODO: refactoring: singleton.
-	janinference.Init()
-	serper.Init()
-}
-
-// @title Jan Server
-// @version 1.0
-// @description This is the API gateway for Jan Server.
-// @BasePath /
-
-// @securityDefinitions.apikey BearerAuth
-// @in header
-// @name Authorization
-// @description Type "Bearer" followed by a space and JWT token.
-func main() {
-	background := context.Background()
-
-	// Expose pprof endpoints for profiling (for Grafana Alloy/Pyroscope Go pull mode)
-	go func() {
-		// Default pprof mux is registered on DefaultServeMux by importing net/http/pprof
-		// Listen on localhost:6060 (or change port as needed)
-		if err := nethttp.ListenAndServe("0.0.0.0:6060", nil); err != nil {
-			logger.GetLogger().Errorf("pprof server failed: %v", err)
-		}
-	}()
-
-	application, err := CreateApplication()
-	if err != nil {
-		panic(err)
-	}
-	err = database.Migration()
-	if err != nil {
-		panic(err)
-	}
-	dataInitializer, err := CreateDataInitializer()
-	if err != nil {
-		panic(err)
-	}
-	err = dataInitializer.Install(background)
-	if err != nil {
-		panic(err)
-	}
-	application.Start()
-}
diff --git a/apps/jan-api-gateway/application/cmd/server/wire.go b/apps/jan-api-gateway/application/cmd/server/wire.go
deleted file mode 100644
index 656a27a5..00000000
--- a/apps/jan-api-gateway/application/cmd/server/wire.go
+++ /dev/null
@@ -1,49 +0,0 @@
-//go:build wireinject
-
-package main
-
-import (
-	"context"
-
-	"github.com/google/wire"
-	"gorm.io/gorm"
-	"menlo.ai/jan-api-gateway/app/domain"
-	"menlo.ai/jan-api-gateway/app/infrastructure"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository"
-	"menlo.ai/jan-api-gateway/app/interfaces/http"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes"
-)
-
-func CreateApplication() (*Application, error) {
-	wire.Build(
-		database.NewDB,
-		repository.RepositoryProvider,
-		infrastructure.InfrastructureProvider,
-		domain.ServiceProvider,
-		routes.RouteProvider,
-		http.NewHttpServer,
-		wire.Struct(new(Application), "*"),
-		provideContext,
-	)
-	return nil, nil
-}
-
-func ProvideDatabase() *gorm.DB {
-	return database.DB
-}
-
-func CreateDataInitializer() (*DataInitializer, error) {
-	wire.Build(
-		ProvideDatabase,
-		repository.RepositoryProvider,
-		infrastructure.InfrastructureProvider,
-		domain.ServiceProvider,
-		wire.Struct(new(DataInitializer), "*"),
-	)
-	return nil, nil
-}
-
-func provideContext() context.Context {
-	return context.Background()
-}
diff --git a/apps/jan-api-gateway/application/cmd/server/wire_gen.go b/apps/jan-api-gateway/application/cmd/server/wire_gen.go
deleted file mode 100644
index bcfec80e..00000000
--- a/apps/jan-api-gateway/application/cmd/server/wire_gen.go
+++ /dev/null
@@ -1,148 +0,0 @@
-// Code generated by Wire. DO NOT EDIT.
-
-//go:generate go run -mod=mod github.com/google/wire/cmd/wire
-//go:build !wireinject
-// +build !wireinject
-
-package main
-
-import (
-	"context"
-	"gorm.io/gorm"
-	"menlo.ai/jan-api-gateway/app/domain/apikey"
-	"menlo.ai/jan-api-gateway/app/domain/auth"
-	"menlo.ai/jan-api-gateway/app/domain/conversation"
-	"menlo.ai/jan-api-gateway/app/domain/cron"
-	"menlo.ai/jan-api-gateway/app/domain/inference_model_registry"
-	"menlo.ai/jan-api-gateway/app/domain/invite"
-	"menlo.ai/jan-api-gateway/app/domain/mcp/serpermcp"
-	"menlo.ai/jan-api-gateway/app/domain/organization"
-	"menlo.ai/jan-api-gateway/app/domain/project"
-	"menlo.ai/jan-api-gateway/app/domain/response"
-	"menlo.ai/jan-api-gateway/app/domain/user"
-	"menlo.ai/jan-api-gateway/app/infrastructure/cache"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/apikeyrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/conversationrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/inviterepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/itemrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/organizationrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/projectrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/responserepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/transaction"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/repository/userrepo"
-	"menlo.ai/jan-api-gateway/app/infrastructure/inference"
-	"menlo.ai/jan-api-gateway/app/interfaces/http"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1"
-	auth2 "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/auth"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/auth/google"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/chat"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/conv"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/conversations"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/mcp/mcp_impl"
-	organization2 "menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/invites"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/organization/projects/api_keys"
-	"menlo.ai/jan-api-gateway/app/interfaces/http/routes/v1/responses"
-	"menlo.ai/jan-api-gateway/app/utils/httpclients/jan_inference"
-)
-
-import (
-	_ "github.com/grafana/pyroscope-go/godeltaprof/http/pprof"
-	_ "net/http/pprof"
-)
-
-// Injectors from wire.go:
-
-func CreateApplication() (*Application, error) {
-	db, err := database.NewDB()
-	if err != nil {
-		return nil, err
-	}
-	transactionDatabase := transaction.NewDatabase(db)
-	organizationRepository := organizationrepo.NewOrganizationGormRepository(transactionDatabase)
-	organizationService := organization.NewService(organizationRepository)
-	userRepository := userrepo.NewUserGormRepository(transactionDatabase)
-	redisCacheService := cache.NewRedisCacheService()
-	userService := user.NewService(userRepository, redisCacheService)
-	apiKeyRepository := apikeyrepo.NewApiKeyGormRepository(transactionDatabase)
-	apiKeyService := apikey.NewService(apiKeyRepository, organizationService)
-	projectRepository := projectrepo.NewProjectGormRepository(transactionDatabase)
-	projectService := project.NewService(projectRepository)
-	inviteRepository := inviterepo.NewInviteGormRepository(transactionDatabase)
-	inviteService := invite.NewInviteService(inviteRepository)
-	authService := auth.NewAuthService(userService, apiKeyService, organizationService, projectService, inviteService)
-	adminApiKeyAPI := organization2.NewAdminApiKeyAPI(organizationService, authService, apiKeyService, userService)
-	projectApiKeyRoute := apikeys.NewProjectApiKeyRoute(organizationService, projectService, apiKeyService, userService)
-	projectsRoute := projects.NewProjectsRoute(projectService, apiKeyService, authService, projectApiKeyRoute)
-	invitesRoute := invites.NewInvitesRoute(inviteService, projectService, organizationService, authService)
-	organizationRoute := organization2.NewOrganizationRoute(adminApiKeyAPI, projectsRoute, invitesRoute, authService)
-	context := provideContext()
-	janInferenceClient := janinference.NewJanInferenceClient(context)
-	inferenceProvider := inference.NewJanInferenceProvider(janInferenceClient)
-	completionAPI := chat.NewCompletionAPI(inferenceProvider, authService)
-	chatRoute := chat.NewChatRoute(authService, completionAPI)
-	conversationRepository := conversationrepo.NewConversationGormRepository(transactionDatabase)
-	itemRepository := itemrepo.NewItemGormRepository(transactionDatabase)
-	conversationService := conversation.NewService(conversationRepository, itemRepository)
-	completionNonStreamHandler := conv.NewCompletionNonStreamHandler(inferenceProvider, conversationService)
-	completionStreamHandler := conv.NewCompletionStreamHandler(inferenceProvider, conversationService)
-	inferenceModelRegistry := inferencemodelregistry.NewInferenceModelRegistry(redisCacheService, janInferenceClient)
-	convCompletionAPI := conv.NewConvCompletionAPI(completionNonStreamHandler, completionStreamHandler, conversationService, authService, inferenceModelRegistry)
-	serperService := serpermcp.NewSerperService()
-	serperMCP := mcpimpl.NewSerperMCP(serperService)
-	convMCPAPI := conv.NewConvMCPAPI(authService, serperMCP)
-	convChatRoute := conv.NewConvChatRoute(authService, convCompletionAPI, convMCPAPI)
-	conversationAPI := conversations.NewConversationAPI(conversationService, authService)
-	modelAPI := v1.NewModelAPI(inferenceModelRegistry)
-	mcpapi := mcp.NewMCPAPI(serperMCP, authService)
-	googleAuthAPI := google.NewGoogleAuthAPI(userService, authService)
-	authRoute := auth2.NewAuthRoute(googleAuthAPI, userService, authService)
-	responseRepository := responserepo.NewResponseGormRepository(transactionDatabase)
-	responseService := response.NewResponseService(responseRepository, itemRepository, conversationService)
-	responseModelService := response.NewResponseModelService(userService, authService, apiKeyService, conversationService, responseService, inferenceModelRegistry)
-	streamModelService := response.NewStreamModelService(responseModelService)
-	nonStreamModelService := response.NewNonStreamModelService(responseModelService)
-	responseRoute := responses.NewResponseRoute(responseModelService, authService, responseService, streamModelService, nonStreamModelService)
-	v1Route := v1.NewV1Route(organizationRoute, chatRoute, convChatRoute, conversationAPI, modelAPI, mcpapi, authRoute, responseRoute)
-	httpServer := http.NewHttpServer(v1Route)
-	cronService := cron.NewService(janInferenceClient, inferenceModelRegistry)
-	application := &Application{
-		HttpServer:  httpServer,
-		CronService: cronService,
-	}
-	return application, nil
-}
-
-func CreateDataInitializer() (*DataInitializer, error) {
-	db := ProvideDatabase()
-	transactionDatabase := transaction.NewDatabase(db)
-	userRepository := userrepo.NewUserGormRepository(transactionDatabase)
-	redisCacheService := cache.NewRedisCacheService()
-	userService := user.NewService(userRepository, redisCacheService)
-	apiKeyRepository := apikeyrepo.NewApiKeyGormRepository(transactionDatabase)
-	organizationRepository := organizationrepo.NewOrganizationGormRepository(transactionDatabase)
-	organizationService := organization.NewService(organizationRepository)
-	apiKeyService := apikey.NewService(apiKeyRepository, organizationService)
-	projectRepository := projectrepo.NewProjectGormRepository(transactionDatabase)
-	projectService := project.NewService(projectRepository)
-	inviteRepository := inviterepo.NewInviteGormRepository(transactionDatabase)
-	inviteService := invite.NewInviteService(inviteRepository)
-	authService := auth.NewAuthService(userService, apiKeyService, organizationService, projectService, inviteService)
-	dataInitializer := &DataInitializer{
-		authService: authService,
-	}
-	return dataInitializer, nil
-}
-
-// wire.go:
-
-func ProvideDatabase() *gorm.DB {
-	return database.DB
-}
-
-func provideContext() context.Context {
-	return context.Background()
-}
diff --git a/apps/jan-api-gateway/application/config/environment_variables/env.go b/apps/jan-api-gateway/application/config/environment_variables/env.go
deleted file mode 100644
index 6e15b1c0..00000000
--- a/apps/jan-api-gateway/application/config/environment_variables/env.go
+++ /dev/null
@@ -1,84 +0,0 @@
-package environment_variables
-
-import (
-	"os"
-	"reflect"
-	"strconv"
-	"strings"
-	"time"
-
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-	"menlo.ai/jan-api-gateway/config"
-)
-
-type EnvironmentVariable struct {
-	JAN_INFERENCE_MODEL_URL     string
-	SERPER_API_KEY              string
-	JWT_SECRET                  []byte
-	OAUTH2_GOOGLE_CLIENT_ID     string
-	OAUTH2_GOOGLE_CLIENT_SECRET string
-	OAUTH2_GOOGLE_REDIRECT_URL  string
-	DB_POSTGRESQL_WRITE_DSN     string
-	DB_POSTGRESQL_READ1_DSN     string
-	APIKEY_SECRET               string
-	ALLOWED_CORS_HOSTS          []string
-	SMTP_HOST                   string
-	SMTP_PORT                   int
-	SMTP_USERNAME               string
-	SMTP_PASSWORD               string
-	SMTP_SENDER_EMAIL           string
-	INVITE_REDIRECT_URL         string
-	ORGANIZATION_ADMIN_EMAILS   []string
-	// Redis configuration
-	REDIS_URL      string
-	REDIS_PASSWORD string
-	REDIS_DB       int
-}
-
-func (ev *EnvironmentVariable) LoadFromEnv() {
-	v := reflect.ValueOf(ev).Elem()
-	t := v.Type()
-	for i := 0; i < v.NumField(); i++ {
-		field := t.Field(i)
-		envKey := field.Name
-		envValue := os.Getenv(envKey)
-		if envValue == "" {
-			logger.GetLogger().Warnf("Missing SYSENV: %s", envKey)
-		}
-		if envValue != "" {
-			switch v.Field(i).Kind() {
-			case reflect.String:
-				v.Field(i).SetString(envValue)
-			case reflect.Int:
-				intV, err := strconv.Atoi(envValue)
-				if err != nil {
-					logger.GetLogger().Errorf("Invalid int value for %s: %s", envKey, envValue)
-				} else {
-					v.Field(i).SetInt(int64(intV))
-				}
-			case reflect.Bool:
-				boolVal, err := strconv.ParseBool(envValue)
-				if err != nil {
-					logger.GetLogger().Errorf("Invalid boolean value for %s: %s", envKey, envValue)
-				} else {
-					v.Field(i).SetBool(boolVal)
-				}
-			case reflect.Slice:
-				if v.Field(i).Type().Elem().Kind() == reflect.Uint8 {
-					v.Field(i).SetBytes([]byte(envValue))
-				} else if v.Field(i).Type().Elem().Kind() == reflect.String {
-					hosts := strings.Split(envValue, ",")
-					v.Field(i).Set(reflect.ValueOf(hosts))
-				} else {
-					logger.GetLogger().Errorf("Unsupported slice type for %s", field.Name)
-				}
-			default:
-				logger.GetLogger().Errorf("Unsupported field type: %s", field.Name)
-			}
-		}
-	}
-	config.EnvReloadedAt = time.Now()
-}
-
-// Singleton
-var EnvironmentVariables = EnvironmentVariable{}
diff --git a/apps/jan-api-gateway/application/config/version.go b/apps/jan-api-gateway/application/config/version.go
deleted file mode 100644
index 8c66836c..00000000
--- a/apps/jan-api-gateway/application/config/version.go
+++ /dev/null
@@ -1,13 +0,0 @@
-package config
-
-import (
-	"strings"
-	"time"
-)
-
-var Version = "dev"
-var EnvReloadedAt = time.Now()
-
-func IsDev() bool {
-	return strings.HasPrefix(Version, "dev")
-}
diff --git a/apps/jan-api-gateway/application/dev-deployment.txt b/apps/jan-api-gateway/application/dev-deployment.txt
deleted file mode 100644
index 95e94cdd..00000000
--- a/apps/jan-api-gateway/application/dev-deployment.txt
+++ /dev/null
@@ -1 +0,0 @@
-v0.0.1
\ No newline at end of file
diff --git a/apps/jan-api-gateway/application/docs/swagger.yaml b/apps/jan-api-gateway/application/docs/swagger.yaml
deleted file mode 100644
index eadf5799..00000000
--- a/apps/jan-api-gateway/application/docs/swagger.yaml
+++ /dev/null
@@ -1,3448 +0,0 @@
-basePath: /
-definitions:
-  app_interfaces_http_routes_v1.Model:
-    properties:
-      created:
-        type: integer
-      id:
-        type: string
-      object:
-        type: string
-      owned_by:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1.ModelsResponse:
-    properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1.Model'
-        type: array
-      object:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_auth.AccessTokenResponse:
-    properties:
-      access_token:
-        type: string
-      expires_in:
-        type: integer
-      object:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_auth.GetMeResponse:
-    properties:
-      email:
-        type: string
-      id:
-        type: string
-      name:
-        type: string
-      object:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_auth_google.AccessTokenResponse:
-    properties:
-      access_token:
-        type: string
-      expires_in:
-        type: integer
-      object:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest:
-    properties:
-      code:
-        type: string
-      state:
-        type: string
-    required:
-    - code
-    type: object
-  app_interfaces_http_routes_v1_auth_google.GoogleLoginUrl:
-    properties:
-      object:
-        type: string
-      url:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conv.ExtendedChatCompletionRequest:
-    properties:
-      chat_template_kwargs:
-        additionalProperties: {}
-        description: |-
-          ChatTemplateKwargs provides a way to add non-standard parameters to the request body.
-          Additional kwargs to pass to the template renderer. Will be accessible by the chat template.
-          Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false}
-          https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes
-        type: object
-      conversation:
-        type: string
-      frequency_penalty:
-        type: number
-      function_call:
-        description: 'Deprecated: use ToolChoice instead.'
-      functions:
-        description: 'Deprecated: use Tools instead.'
-        items:
-          $ref: '#/definitions/openai.FunctionDefinition'
-        type: array
-      guided_choice:
-        description: |-
-          GuidedChoice is a vLLM-specific extension that restricts the model's output
-          to one of the predefined string choices provided in this field. This feature
-          is used to constrain the model's responses to a controlled set of options,
-          ensuring predictable and consistent outputs in scenarios where specific
-          choices are required.
-        items:
-          type: string
-        type: array
-      logit_bias:
-        additionalProperties:
-          type: integer
-        description: |-
-          LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.
-          incorrect: `"logit_bias":{"You": 6}`, correct: `"logit_bias":{"1639": 6}`
-          refs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias
-        type: object
-      logprobs:
-        description: |-
-          LogProbs indicates whether to return log probabilities of the output tokens or not.
-          If true, returns the log probabilities of each output token returned in the content of message.
-          This option is currently not available on the gpt-4-vision-preview model.
-        type: boolean
-      max_completion_tokens:
-        description: |-
-          MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
-          including visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning
-        type: integer
-      max_tokens:
-        description: |-
-          MaxTokens The maximum number of tokens that can be generated in the chat completion.
-          This value can be used to control costs for text generated via API.
-          Deprecated: use MaxCompletionTokens. Not compatible with o1-series models.
-          refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
-        type: integer
-      messages:
-        items:
-          $ref: '#/definitions/openai.ChatCompletionMessage'
-        type: array
-      metadata:
-        additionalProperties:
-          type: string
-        description: Metadata to store with the completion.
-        type: object
-      model:
-        type: string
-      "n":
-        type: integer
-      parallel_tool_calls:
-        description: 'Disable the default behavior of parallel tool calls by setting
-          it: false.'
-      prediction:
-        allOf:
-        - $ref: '#/definitions/openai.Prediction'
-        description: Configuration for a predicted output.
-      presence_penalty:
-        type: number
-      reasoning_effort:
-        description: Controls effort on reasoning for reasoning models. It can be
-          set to "low", "medium", or "high".
-        type: string
-      response_format:
-        $ref: '#/definitions/openai.ChatCompletionResponseFormat'
-      safety_identifier:
-        description: |-
-          A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.
-          The IDs should be a string that uniquely identifies each user.
-          We recommend hashing their username or email address, in order to avoid sending us any identifying information.
-          https://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier
-        type: string
-      seed:
-        type: integer
-      service_tier:
-        allOf:
-        - $ref: '#/definitions/openai.ServiceTier'
-        description: Specifies the latency tier to use for processing the request.
-      stop:
-        items:
-          type: string
-        type: array
-      store:
-        description: If true, the response will be stored in the conversation, default
-          is false
-        type: boolean
-      store_reasoning:
-        description: If true, the reasoning will be stored in the conversation, default
-          is false
-        type: boolean
-      stream:
-        type: boolean
-      stream_options:
-        allOf:
-        - $ref: '#/definitions/openai.StreamOptions'
-        description: 'Options for streaming response. Only set this when you set stream:
-          true.'
-      temperature:
-        type: number
-      tool_choice:
-        description: This can be either a string or an ToolChoice object.
-      tools:
-        items:
-          $ref: '#/definitions/openai.Tool'
-        type: array
-      top_logprobs:
-        description: |-
-          TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each
-          token position, each with an associated log probability.
-          logprobs must be set to true if this parameter is used.
-        type: integer
-      top_p:
-        type: number
-      user:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conv.ExtendedCompletionResponse:
-    properties:
-      choices:
-        items:
-          $ref: '#/definitions/openai.ChatCompletionChoice'
-        type: array
-      created:
-        type: integer
-      id:
-        type: string
-      metadata:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_conv.ResponseMetadata'
-      model:
-        type: string
-      object:
-        type: string
-      prompt_filter_results:
-        items:
-          $ref: '#/definitions/openai.PromptFilterResult'
-        type: array
-      service_tier:
-        $ref: '#/definitions/openai.ServiceTier'
-      system_fingerprint:
-        type: string
-      usage:
-        $ref: '#/definitions/openai.Usage'
-    type: object
-  app_interfaces_http_routes_v1_conv.Model:
-    properties:
-      created:
-        type: integer
-      id:
-        type: string
-      object:
-        type: string
-      owned_by:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conv.ModelsResponse:
-    properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conv.Model'
-        type: array
-      object:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conv.ResponseMetadata:
-    properties:
-      ask_item_id:
-        type: string
-      completion_item_id:
-        type: string
-      conversation_created:
-        type: boolean
-      conversation_id:
-        type: string
-      conversation_title:
-        type: string
-      store:
-        type: boolean
-      store_reasoning:
-        type: boolean
-    type: object
-  app_interfaces_http_routes_v1_conversations.AnnotationResponse:
-    properties:
-      end_index:
-        type: integer
-      file_id:
-        type: string
-      index:
-        type: integer
-      start_index:
-        type: integer
-      text:
-        type: string
-      type:
-        type: string
-      url:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conversations.ContentResponse:
-    properties:
-      file:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.FileContentResponse'
-      finish_reason:
-        type: string
-      image:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ImageContentResponse'
-      input_text:
-        type: string
-      output_text:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.OutputTextResponse'
-      reasoning_content:
-        type: string
-      text:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.TextResponse'
-      type:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conversations.ConversationContentRequest:
-    properties:
-      text:
-        type: string
-      type:
-        type: string
-    required:
-    - type
-    type: object
-  app_interfaces_http_routes_v1_conversations.ConversationItemRequest:
-    properties:
-      content:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationContentRequest'
-        type: array
-      role:
-        $ref: '#/definitions/menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole'
-      type:
-        type: string
-    required:
-    - content
-    - type
-    type: object
-  app_interfaces_http_routes_v1_conversations.ConversationItemResponse:
-    properties:
-      content:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ContentResponse'
-        type: array
-      created_at:
-        type: integer
-      id:
-        type: string
-      object:
-        type: string
-      role:
-        type: string
-      status:
-        type: string
-      type:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conversations.CreateConversationRequest:
-    properties:
-      items:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest'
-        type: array
-      metadata:
-        additionalProperties:
-          type: string
-        type: object
-      title:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conversations.CreateItemsRequest:
-    properties:
-      items:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest'
-        type: array
-    required:
-    - items
-    type: object
-  app_interfaces_http_routes_v1_conversations.DeletedConversationResponse:
-    properties:
-      deleted:
-        type: boolean
-      id:
-        type: string
-      object:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse:
-    properties:
-      created_at:
-        type: integer
-      id:
-        type: string
-      metadata:
-        additionalProperties:
-          type: string
-        type: object
-      object:
-        type: string
-      title:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conversations.FileContentResponse:
-    properties:
-      file_id:
-        type: string
-      mime_type:
-        type: string
-      name:
-        type: string
-      size:
-        type: integer
-    type: object
-  app_interfaces_http_routes_v1_conversations.ImageContentResponse:
-    properties:
-      detail:
-        type: string
-      file_id:
-        type: string
-      url:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conversations.OutputTextResponse:
-    properties:
-      annotations:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.AnnotationResponse'
-        type: array
-      text:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conversations.TextResponse:
-    properties:
-      value:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_conversations.UpdateConversationRequest:
-    properties:
-      metadata:
-        additionalProperties:
-          type: string
-        type: object
-      title:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse:
-    properties:
-      deleted:
-        type: boolean
-      id:
-        type: string
-      object:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse:
-    properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse'
-        type: array
-      first_id:
-        type: string
-      has_more:
-        type: boolean
-      last_id:
-        type: string
-      object:
-        example: list
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest:
-    properties:
-      name:
-        example: My Admin API Key
-        type: string
-    required:
-    - name
-    type: object
-  app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse:
-    properties:
-      created_at:
-        example: 1698765432
-        type: integer
-      id:
-        example: key_1234567890
-        type: string
-      last_used_at:
-        example: 1698765432
-        type: integer
-      name:
-        example: My Admin API Key
-        type: string
-      object:
-        example: api_key
-        type: string
-      owner:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_organization.Owner'
-      redacted_value:
-        example: sk-...abcd
-        type: string
-      value:
-        example: sk-abcdef1234567890
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization.Owner:
-    properties:
-      created_at:
-        example: 1698765432
-        type: integer
-      id:
-        example: user_1234567890
-        type: string
-      name:
-        example: John Doe
-        type: string
-      object:
-        example: user
-        type: string
-      role:
-        example: admin
-        type: string
-      type:
-        example: user
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_invites.CreateInviteUserRequest:
-    properties:
-      email:
-        type: string
-      projects:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteProject'
-        type: array
-      role:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_invites.InviteProject:
-    properties:
-      id:
-        type: string
-      role:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_invites.InviteResponse:
-    properties:
-      accepted_at:
-        type: string
-      email:
-        type: string
-      expires_at:
-        type: string
-      id:
-        type: string
-      invited_at:
-        type: string
-      object:
-        type: string
-      projects:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteProject'
-        type: array
-      role:
-        type: string
-      status:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_invites.VerifyInviteUserRequest:
-    properties:
-      code:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest:
-    properties:
-      name:
-        example: New AI Project
-        type: string
-    required:
-    - name
-    type: object
-  app_interfaces_http_routes_v1_organization_projects.ProjectListResponse:
-    properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
-        type: array
-      first_id:
-        type: string
-      has_more:
-        type: boolean
-      last_id:
-        type: string
-      object:
-        example: list
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_projects.ProjectResponse:
-    properties:
-      archived_at:
-        example: 1698765432
-        type: integer
-      created_at:
-        example: 1698765432
-        type: integer
-      id:
-        example: proj_1234567890
-        type: string
-      name:
-        example: My First Project
-        type: string
-      object:
-        example: project
-        type: string
-      status:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest:
-    properties:
-      name:
-        example: Updated AI Project
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse:
-    properties:
-      apikeyType:
-        type: string
-      description:
-        type: string
-      enabled:
-        type: boolean
-      expiresAt:
-        type: string
-      id:
-        type: string
-      key:
-        type: string
-      last_usedAt:
-        type: string
-      permissions:
-        type: string
-      plaintextHint:
-        type: string
-    type: object
-  app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest:
-    properties:
-      description:
-        type: string
-      expiresAt:
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole:
-    enum:
-    - system
-    - user
-    - assistant
-    - tool
-    type: string
-    x-enum-varnames:
-    - ItemRoleSystem
-    - ItemRoleUser
-    - ItemRoleAssistant
-    - ItemRoleTool
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.CreateResponseRequest:
-    properties:
-      background:
-        description: Whether to run the response in the background.
-        type: boolean
-      conversation:
-        description: The conversation ID to append items to. If not set or set to
-          ClientCreatedRootConversationID, a new conversation will be created.
-        type: string
-      frequency_penalty:
-        description: The frequency penalty to use for this response.
-        type: number
-      input:
-        description: The input to the model. Can be a string or array of strings.
-      logit_bias:
-        additionalProperties:
-          format: float64
-          type: number
-        description: The logit bias to use for this response.
-        type: object
-      max_tokens:
-        description: The maximum number of tokens to generate.
-        type: integer
-      metadata:
-        additionalProperties: {}
-        description: The metadata to use for this response.
-        type: object
-      model:
-        description: The ID of the model to use for this response.
-        type: string
-      presence_penalty:
-        description: The presence penalty to use for this response.
-        type: number
-      previous_response_id:
-        description: The ID of the previous response to continue from. If set, the
-          conversation will be loaded from the previous response.
-        type: string
-      repetition_penalty:
-        description: The repetition penalty to use for this response.
-        type: number
-      response_format:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ResponseFormat'
-        description: The response format to use for this response.
-      seed:
-        description: The seed to use for this response.
-        type: integer
-      stop:
-        description: The stop sequences to use for this response.
-        items:
-          type: string
-        type: array
-      store:
-        description: Whether to store the conversation. If false, no conversation
-          will be created or used.
-        type: boolean
-      stream:
-        description: Whether to stream the response.
-        type: boolean
-      system_prompt:
-        description: The system prompt to use for this response.
-        type: string
-      temperature:
-        description: The temperature to use for this response.
-        type: number
-      timeout:
-        description: The timeout in seconds for this response.
-        type: integer
-      tool_choice:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ToolChoice'
-        description: The tool choice to use for this response.
-      tools:
-        description: The tools to use for this response.
-        items:
-          $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.Tool'
-        type: array
-      top_k:
-        description: The top_k to use for this response.
-        type: integer
-      top_p:
-        description: The top_p to use for this response.
-        type: number
-      user:
-        description: The user to use for this response.
-        type: string
-    required:
-    - input
-    - model
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileInput:
-    properties:
-      file_id:
-        description: The ID of the file.
-        type: string
-    required:
-    - file_id
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileSearchInput:
-    properties:
-      file_ids:
-        description: The IDs of the files to search in.
-        items:
-          type: string
-        type: array
-      max_results:
-        description: The number of results to return.
-        type: integer
-      query:
-        description: The query to search for.
-        type: string
-    required:
-    - file_ids
-    - query
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCall:
-    properties:
-      arguments:
-        additionalProperties: {}
-        description: The arguments to pass to the function.
-        type: object
-      name:
-        description: The name of the function to call.
-        type: string
-    required:
-    - name
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCallsInput:
-    properties:
-      calls:
-        description: The function calls to make.
-        items:
-          $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCall'
-        type: array
-    required:
-    - calls
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionChoice:
-    properties:
-      name:
-        description: The name of the function.
-        type: string
-    required:
-    - name
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionDefinition:
-    properties:
-      description:
-        description: The description of the function.
-        type: string
-      name:
-        description: The name of the function.
-        type: string
-      parameters:
-        additionalProperties: {}
-        description: The parameters of the function.
-        type: object
-    required:
-    - name
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.ImageInput:
-    properties:
-      data:
-        description: The base64 encoded image data.
-        type: string
-      detail:
-        description: The detail level for the image.
-        type: string
-      url:
-        description: The URL of the image.
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.InputType:
-    enum:
-    - text
-    - image
-    - file
-    - web_search
-    - file_search
-    - streaming
-    - function_calls
-    - reasoning
-    type: string
-    x-enum-varnames:
-    - InputTypeText
-    - InputTypeImage
-    - InputTypeFile
-    - InputTypeWebSearch
-    - InputTypeFileSearch
-    - InputTypeStreaming
-    - InputTypeFunctionCalls
-    - InputTypeReasoning
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.ReasoningInput:
-    properties:
-      context:
-        description: The context for the reasoning task.
-        type: string
-      task:
-        description: The reasoning task to perform.
-        type: string
-    required:
-    - task
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.ResponseFormat:
-    properties:
-      type:
-        description: The type of response format.
-        type: string
-    required:
-    - type
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.StreamingInput:
-    properties:
-      body:
-        description: The body to send with the request.
-        type: string
-      headers:
-        additionalProperties:
-          type: string
-        description: The headers to send with the request.
-        type: object
-      method:
-        description: The method to use for the request.
-        type: string
-      url:
-        description: The URL to stream from.
-        type: string
-    required:
-    - url
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.Tool:
-    properties:
-      function:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionDefinition'
-        description: The function definition for function tools.
-      type:
-        description: The type of tool.
-        type: string
-    required:
-    - type
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.ToolChoice:
-    properties:
-      function:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionChoice'
-        description: The function to use for function tool choice.
-      type:
-        description: The type of tool choice.
-        type: string
-    required:
-    - type
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_requests.WebSearchInput:
-    properties:
-      max_results:
-        description: The number of results to return.
-        type: integer
-      query:
-        description: The query to search for.
-        type: string
-      search_engine:
-        description: The search engine to use.
-        type: string
-    required:
-    - query
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.ConversationInfo:
-    properties:
-      id:
-        description: The unique ID of the conversation.
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.DetailedUsage:
-    properties:
-      input_tokens:
-        description: The number of tokens in the prompt.
-        type: integer
-      input_tokens_details:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.TokenDetails'
-        description: Details about input tokens.
-      output_tokens:
-        description: The number of tokens in the completion.
-        type: integer
-      output_tokens_details:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.TokenDetails'
-        description: Details about output tokens.
-      total_tokens:
-        description: The total number of tokens used.
-        type: integer
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse:
-    properties:
-      code:
-        type: string
-      error:
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.FormatType:
-    properties:
-      type:
-        description: The type of format.
-        type: string
-    type: object
-  ? menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse
-  : properties:
-      result:
-        $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse'
-      status:
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.InputItem:
-    properties:
-      created:
-        description: The Unix timestamp (in seconds) when the input item was created.
-        type: integer
-      file:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileInput'
-        description: The file content (for file type).
-      file_search:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileSearchInput'
-        description: The file search content (for file_search type).
-      function_calls:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCallsInput'
-        description: The function calls content (for function_calls type).
-      id:
-        description: The unique identifier for the input item.
-        type: string
-      image:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ImageInput'
-        description: The image content (for image type).
-      object:
-        description: The object type, which is always "input_item".
-        type: string
-      reasoning:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ReasoningInput'
-        description: The reasoning content (for reasoning type).
-      streaming:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.StreamingInput'
-        description: The streaming content (for streaming type).
-      text:
-        description: The text content (for text type).
-        type: string
-      type:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.InputType'
-        description: The type of input item.
-      web_search:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.WebSearchInput'
-        description: The web search content (for web_search type).
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.ListInputItemsResponse:
-    properties:
-      data:
-        description: The list of input items.
-        items:
-          $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.InputItem'
-        type: array
-      first_id:
-        description: The first ID in the list.
-        type: string
-      has_more:
-        description: Whether there are more items available.
-        type: boolean
-      last_id:
-        description: The last ID in the list.
-        type: string
-      object:
-        description: The object type, which is always "list".
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.Reasoning:
-    properties:
-      effort:
-        description: The effort level for reasoning.
-        type: string
-      summary:
-        description: The summary of reasoning.
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response:
-    properties:
-      background:
-        description: Whether the response was run in the background.
-        type: boolean
-      cancelled_at:
-        description: The Unix timestamp (in seconds) when the response was cancelled.
-        type: integer
-      completed_at:
-        description: The Unix timestamp (in seconds) when the response was completed.
-        type: integer
-      conversation:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ConversationInfo'
-        description: The conversation that this response belongs to.
-      created:
-        description: The Unix timestamp (in seconds) when the response was created.
-        type: integer
-      error:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseError'
-        description: The error that occurred during processing, if any.
-      failed_at:
-        description: The Unix timestamp (in seconds) when the response was failed.
-        type: integer
-      frequency_penalty:
-        description: The frequency penalty that was used for this response.
-        type: number
-      id:
-        description: The unique identifier for the response.
-        type: string
-      incomplete_details:
-        description: OpenAI API response fields
-      input:
-        description: The input that was provided to the model. Can be a string or
-          array of strings.
-      instructions: {}
-      logit_bias:
-        additionalProperties:
-          format: float64
-          type: number
-        description: The logit bias that was used for this response.
-        type: object
-      max_output_tokens:
-        type: integer
-      max_tokens:
-        description: The maximum number of tokens that were generated.
-        type: integer
-      metadata:
-        additionalProperties: {}
-        description: The metadata that was provided for this response.
-        type: object
-      model:
-        description: The ID of the model used for this response.
-        type: string
-      object:
-        description: The object type, which is always "response".
-        type: string
-      output:
-        description: The output generated by the model.
-      parallel_tool_calls:
-        type: boolean
-      presence_penalty:
-        description: The presence penalty that was used for this response.
-        type: number
-      previous_response_id:
-        type: string
-      reasoning:
-        $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Reasoning'
-      repetition_penalty:
-        description: The repetition penalty that was used for this response.
-        type: number
-      response_format:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ResponseFormat'
-        description: The response format that was used for this response.
-      seed:
-        description: The seed that was used for this response.
-        type: integer
-      status:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseStatus'
-        description: The status of the response.
-      stop:
-        description: The stop sequences that were used for this response.
-        items:
-          type: string
-        type: array
-      store:
-        type: boolean
-      stream:
-        description: Whether the response was streamed.
-        type: boolean
-      system_prompt:
-        description: The system prompt that was used for this response.
-        type: string
-      temperature:
-        description: The temperature that was used for this response.
-        type: number
-      text:
-        $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.TextFormat'
-      timeout:
-        description: The timeout in seconds that was used for this response.
-        type: integer
-      tool_choice:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ToolChoice'
-        description: The tool choice that was used for this response.
-      tools:
-        description: The tools that were used for this response.
-        items:
-          $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.Tool'
-        type: array
-      top_k:
-        description: The top_k that was used for this response.
-        type: integer
-      top_p:
-        description: The top_p that was used for this response.
-        type: number
-      truncation:
-        type: string
-      usage:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.DetailedUsage'
-        description: The usage statistics for this response.
-      user:
-        description: The user that was provided for this response.
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseError:
-    properties:
-      code:
-        description: The error code.
-        type: string
-      details:
-        additionalProperties: {}
-        description: The error details.
-        type: object
-      message:
-        description: The error message.
-        type: string
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseStatus:
-    enum:
-    - pending
-    - running
-    - completed
-    - cancelled
-    - failed
-    type: string
-    x-enum-varnames:
-    - ResponseStatusPending
-    - ResponseStatusRunning
-    - ResponseStatusCompleted
-    - ResponseStatusCancelled
-    - ResponseStatusFailed
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.TextFormat:
-    properties:
-      format:
-        allOf:
-        - $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.FormatType'
-        description: The format type.
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses.TokenDetails:
-    properties:
-      cached_tokens:
-        description: The number of cached tokens.
-        type: integer
-      reasoning_tokens:
-        description: The number of reasoning tokens.
-        type: integer
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.DeleteResponse:
-    properties:
-      deleted:
-        type: boolean
-      id:
-        type: string
-      object:
-        type: string
-    type: object
-  ? menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse
-  : properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse'
-        type: array
-      first_id:
-        type: string
-      has_more:
-        type: boolean
-      last_id:
-        type: string
-      object:
-        $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList'
-      total:
-        type: integer
-    type: object
-  ? menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ExtendedConversationResponse
-  : properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse'
-        type: array
-      first_id:
-        type: string
-      has_more:
-        type: boolean
-      last_id:
-        type: string
-      object:
-        $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList'
-      total:
-        type: integer
-    type: object
-  ? menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_organization_invites_InviteResponse
-  : properties:
-      data:
-        items:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse'
-        type: array
-      first_id:
-        type: string
-      has_more:
-        type: boolean
-      last_id:
-        type: string
-      object:
-        $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList'
-      total:
-        type: integer
-    type: object
-  menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList:
-    enum:
-    - list
-    type: string
-    x-enum-varnames:
-    - ObjectTypeListList
-  openai.ChatCompletionChoice:
-    properties:
-      content_filter_results:
-        $ref: '#/definitions/openai.ContentFilterResults'
-      finish_reason:
-        allOf:
-        - $ref: '#/definitions/openai.FinishReason'
-        description: |-
-          FinishReason
-          stop: API returned complete message,
-          or a message terminated by one of the stop sequences provided via the stop parameter
-          length: Incomplete model output due to max_tokens parameter or token limit
-          function_call: The model decided to call a function
-          content_filter: Omitted content due to a flag from our content filters
-          null: API response still in progress or incomplete
-      index:
-        type: integer
-      logprobs:
-        $ref: '#/definitions/openai.LogProbs'
-      message:
-        $ref: '#/definitions/openai.ChatCompletionMessage'
-    type: object
-  openai.ChatCompletionMessage:
-    properties:
-      content:
-        type: string
-      function_call:
-        $ref: '#/definitions/openai.FunctionCall'
-      multiContent:
-        items:
-          $ref: '#/definitions/openai.ChatMessagePart'
-        type: array
-      name:
-        description: |-
-          This property isn't in the official documentation, but it's in
-          the documentation for the official library for python:
-          - https://github.com/openai/openai-python/blob/main/chatml.md
-          - https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
-        type: string
-      reasoning_content:
-        description: |-
-          This property is used for the "reasoning" feature supported by deepseek-reasoner
-          which is not in the official documentation.
-          the doc from deepseek:
-          - https://api-docs.deepseek.com/api/create-chat-completion#responses
-        type: string
-      refusal:
-        type: string
-      role:
-        type: string
-      tool_call_id:
-        description: For Role=tool prompts this should be set to the ID given in the
-          assistant's prior request to call a tool.
-        type: string
-      tool_calls:
-        description: For Role=assistant prompts this may be set to the tool calls
-          generated by the model, such as function calls.
-        items:
-          $ref: '#/definitions/openai.ToolCall'
-        type: array
-    type: object
-  openai.ChatCompletionRequest:
-    properties:
-      chat_template_kwargs:
-        additionalProperties: {}
-        description: |-
-          ChatTemplateKwargs provides a way to add non-standard parameters to the request body.
-          Additional kwargs to pass to the template renderer. Will be accessible by the chat template.
-          Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false}
-          https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes
-        type: object
-      frequency_penalty:
-        type: number
-      function_call:
-        description: 'Deprecated: use ToolChoice instead.'
-      functions:
-        description: 'Deprecated: use Tools instead.'
-        items:
-          $ref: '#/definitions/openai.FunctionDefinition'
-        type: array
-      guided_choice:
-        description: |-
-          GuidedChoice is a vLLM-specific extension that restricts the model's output
-          to one of the predefined string choices provided in this field. This feature
-          is used to constrain the model's responses to a controlled set of options,
-          ensuring predictable and consistent outputs in scenarios where specific
-          choices are required.
-        items:
-          type: string
-        type: array
-      logit_bias:
-        additionalProperties:
-          type: integer
-        description: |-
-          LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.
-          incorrect: `"logit_bias":{"You": 6}`, correct: `"logit_bias":{"1639": 6}`
-          refs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias
-        type: object
-      logprobs:
-        description: |-
-          LogProbs indicates whether to return log probabilities of the output tokens or not.
-          If true, returns the log probabilities of each output token returned in the content of message.
-          This option is currently not available on the gpt-4-vision-preview model.
-        type: boolean
-      max_completion_tokens:
-        description: |-
-          MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
-          including visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning
-        type: integer
-      max_tokens:
-        description: |-
-          MaxTokens The maximum number of tokens that can be generated in the chat completion.
-          This value can be used to control costs for text generated via API.
-          Deprecated: use MaxCompletionTokens. Not compatible with o1-series models.
-          refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
-        type: integer
-      messages:
-        items:
-          $ref: '#/definitions/openai.ChatCompletionMessage'
-        type: array
-      metadata:
-        additionalProperties:
-          type: string
-        description: Metadata to store with the completion.
-        type: object
-      model:
-        type: string
-      "n":
-        type: integer
-      parallel_tool_calls:
-        description: 'Disable the default behavior of parallel tool calls by setting
-          it: false.'
-      prediction:
-        allOf:
-        - $ref: '#/definitions/openai.Prediction'
-        description: Configuration for a predicted output.
-      presence_penalty:
-        type: number
-      reasoning_effort:
-        description: Controls effort on reasoning for reasoning models. It can be
-          set to "low", "medium", or "high".
-        type: string
-      response_format:
-        $ref: '#/definitions/openai.ChatCompletionResponseFormat'
-      safety_identifier:
-        description: |-
-          A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.
-          The IDs should be a string that uniquely identifies each user.
-          We recommend hashing their username or email address, in order to avoid sending us any identifying information.
-          https://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier
-        type: string
-      seed:
-        type: integer
-      service_tier:
-        allOf:
-        - $ref: '#/definitions/openai.ServiceTier'
-        description: Specifies the latency tier to use for processing the request.
-      stop:
-        items:
-          type: string
-        type: array
-      store:
-        description: |-
-          Store can be set to true to store the output of this completion request for use in distillations and evals.
-          https://platform.openai.com/docs/api-reference/chat/create#chat-create-store
-        type: boolean
-      stream:
-        type: boolean
-      stream_options:
-        allOf:
-        - $ref: '#/definitions/openai.StreamOptions'
-        description: 'Options for streaming response. Only set this when you set stream:
-          true.'
-      temperature:
-        type: number
-      tool_choice:
-        description: This can be either a string or an ToolChoice object.
-      tools:
-        items:
-          $ref: '#/definitions/openai.Tool'
-        type: array
-      top_logprobs:
-        description: |-
-          TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each
-          token position, each with an associated log probability.
-          logprobs must be set to true if this parameter is used.
-        type: integer
-      top_p:
-        type: number
-      user:
-        type: string
-    type: object
-  openai.ChatCompletionResponse:
-    properties:
-      choices:
-        items:
-          $ref: '#/definitions/openai.ChatCompletionChoice'
-        type: array
-      created:
-        type: integer
-      id:
-        type: string
-      model:
-        type: string
-      object:
-        type: string
-      prompt_filter_results:
-        items:
-          $ref: '#/definitions/openai.PromptFilterResult'
-        type: array
-      service_tier:
-        $ref: '#/definitions/openai.ServiceTier'
-      system_fingerprint:
-        type: string
-      usage:
-        $ref: '#/definitions/openai.Usage'
-    type: object
-  openai.ChatCompletionResponseFormat:
-    properties:
-      json_schema:
-        $ref: '#/definitions/openai.ChatCompletionResponseFormatJSONSchema'
-      type:
-        $ref: '#/definitions/openai.ChatCompletionResponseFormatType'
-    type: object
-  openai.ChatCompletionResponseFormatJSONSchema:
-    properties:
-      description:
-        type: string
-      name:
-        type: string
-      schema: {}
-      strict:
-        type: boolean
-    type: object
-  openai.ChatCompletionResponseFormatType:
-    enum:
-    - json_object
-    - json_schema
-    - text
-    type: string
-    x-enum-varnames:
-    - ChatCompletionResponseFormatTypeJSONObject
-    - ChatCompletionResponseFormatTypeJSONSchema
-    - ChatCompletionResponseFormatTypeText
-  openai.ChatMessageImageURL:
-    properties:
-      detail:
-        $ref: '#/definitions/openai.ImageURLDetail'
-      url:
-        type: string
-    type: object
-  openai.ChatMessagePart:
-    properties:
-      image_url:
-        $ref: '#/definitions/openai.ChatMessageImageURL'
-      text:
-        type: string
-      type:
-        $ref: '#/definitions/openai.ChatMessagePartType'
-    type: object
-  openai.ChatMessagePartType:
-    enum:
-    - text
-    - image_url
-    type: string
-    x-enum-varnames:
-    - ChatMessagePartTypeText
-    - ChatMessagePartTypeImageURL
-  openai.CompletionTokensDetails:
-    properties:
-      accepted_prediction_tokens:
-        type: integer
-      audio_tokens:
-        type: integer
-      reasoning_tokens:
-        type: integer
-      rejected_prediction_tokens:
-        type: integer
-    type: object
-  openai.ContentFilterResults:
-    properties:
-      hate:
-        $ref: '#/definitions/openai.Hate'
-      jailbreak:
-        $ref: '#/definitions/openai.JailBreak'
-      profanity:
-        $ref: '#/definitions/openai.Profanity'
-      self_harm:
-        $ref: '#/definitions/openai.SelfHarm'
-      sexual:
-        $ref: '#/definitions/openai.Sexual'
-      violence:
-        $ref: '#/definitions/openai.Violence'
-    type: object
-  openai.FinishReason:
-    enum:
-    - stop
-    - length
-    - function_call
-    - tool_calls
-    - content_filter
-    - "null"
-    type: string
-    x-enum-varnames:
-    - FinishReasonStop
-    - FinishReasonLength
-    - FinishReasonFunctionCall
-    - FinishReasonToolCalls
-    - FinishReasonContentFilter
-    - FinishReasonNull
-  openai.FunctionCall:
-    properties:
-      arguments:
-        description: call function with arguments in JSON format
-        type: string
-      name:
-        type: string
-    type: object
-  openai.FunctionDefinition:
-    properties:
-      description:
-        type: string
-      name:
-        type: string
-      parameters:
-        description: |-
-          Parameters is an object describing the function.
-          You can pass json.RawMessage to describe the schema,
-          or you can pass in a struct which serializes to the proper JSON schema.
-          The jsonschema package is provided for convenience, but you should
-          consider another specialized library if you require more complex schemas.
-      strict:
-        type: boolean
-    type: object
-  openai.Hate:
-    properties:
-      filtered:
-        type: boolean
-      severity:
-        type: string
-    type: object
-  openai.ImageURLDetail:
-    enum:
-    - high
-    - low
-    - auto
-    type: string
-    x-enum-varnames:
-    - ImageURLDetailHigh
-    - ImageURLDetailLow
-    - ImageURLDetailAuto
-  openai.JailBreak:
-    properties:
-      detected:
-        type: boolean
-      filtered:
-        type: boolean
-    type: object
-  openai.LogProb:
-    properties:
-      bytes:
-        description: Omitting the field if it is null
-        items:
-          type: integer
-        type: array
-      logprob:
-        type: number
-      token:
-        type: string
-      top_logprobs:
-        description: |-
-          TopLogProbs is a list of the most likely tokens and their log probability, at this token position.
-          In rare cases, there may be fewer than the number of requested top_logprobs returned.
-        items:
-          $ref: '#/definitions/openai.TopLogProbs'
-        type: array
-    type: object
-  openai.LogProbs:
-    properties:
-      content:
-        description: Content is a list of message content tokens with log probability
-          information.
-        items:
-          $ref: '#/definitions/openai.LogProb'
-        type: array
-    type: object
-  openai.Prediction:
-    properties:
-      content:
-        type: string
-      type:
-        type: string
-    type: object
-  openai.Profanity:
-    properties:
-      detected:
-        type: boolean
-      filtered:
-        type: boolean
-    type: object
-  openai.PromptFilterResult:
-    properties:
-      content_filter_results:
-        $ref: '#/definitions/openai.ContentFilterResults'
-      index:
-        type: integer
-    type: object
-  openai.PromptTokensDetails:
-    properties:
-      audio_tokens:
-        type: integer
-      cached_tokens:
-        type: integer
-    type: object
-  openai.SelfHarm:
-    properties:
-      filtered:
-        type: boolean
-      severity:
-        type: string
-    type: object
-  openai.ServiceTier:
-    enum:
-    - auto
-    - default
-    - flex
-    - priority
-    type: string
-    x-enum-varnames:
-    - ServiceTierAuto
-    - ServiceTierDefault
-    - ServiceTierFlex
-    - ServiceTierPriority
-  openai.Sexual:
-    properties:
-      filtered:
-        type: boolean
-      severity:
-        type: string
-    type: object
-  openai.StreamOptions:
-    properties:
-      include_usage:
-        description: |-
-          If set, an additional chunk will be streamed before the data: [DONE] message.
-          The usage field on this chunk shows the token usage statistics for the entire request,
-          and the choices field will always be an empty array.
-          All other chunks will also include a usage field, but with a null value.
-        type: boolean
-    type: object
-  openai.Tool:
-    properties:
-      function:
-        $ref: '#/definitions/openai.FunctionDefinition'
-      type:
-        $ref: '#/definitions/openai.ToolType'
-    type: object
-  openai.ToolCall:
-    properties:
-      function:
-        $ref: '#/definitions/openai.FunctionCall'
-      id:
-        type: string
-      index:
-        description: Index is not nil only in chat completion chunk object
-        type: integer
-      type:
-        $ref: '#/definitions/openai.ToolType'
-    type: object
-  openai.ToolType:
-    enum:
-    - function
-    type: string
-    x-enum-varnames:
-    - ToolTypeFunction
-  openai.TopLogProbs:
-    properties:
-      bytes:
-        items:
-          type: integer
-        type: array
-      logprob:
-        type: number
-      token:
-        type: string
-    type: object
-  openai.Usage:
-    properties:
-      completion_tokens:
-        type: integer
-      completion_tokens_details:
-        $ref: '#/definitions/openai.CompletionTokensDetails'
-      prompt_tokens:
-        type: integer
-      prompt_tokens_details:
-        $ref: '#/definitions/openai.PromptTokensDetails'
-      total_tokens:
-        type: integer
-    type: object
-  openai.Violence:
-    properties:
-      filtered:
-        type: boolean
-      severity:
-        type: string
-    type: object
-info:
-  contact: {}
-  description: This is the API gateway for Jan Server.
-  title: Jan Server
-  version: "1.0"
-paths:
-  /v1/auth/google/callback:
-    post:
-      consumes:
-      - application/json
-      description: Handles the callback from the Google OAuth2 provider to exchange
-        the authorization code for a token, verify the user, and issue access and
-        refresh tokens.
-      parameters:
-      - description: Request body containing the authorization code and state
-        in: body
-        name: request
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully authenticated and returned tokens
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_auth_google.AccessTokenResponse'
-        "400":
-          description: Bad request (e.g., invalid state, missing code, or invalid
-            claims)
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized (e.g., a user claim is not found or is invalid
-            in the context)
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal Server Error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      summary: Google OAuth2 Callback
-      tags:
-      - Authentication API
-  /v1/auth/google/login:
-    get:
-      description: Redirects the user to the Google OAuth2 authorization page to initiate
-        the login process.
-      responses:
-        "200":
-          description: redirect url
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_auth_google.GoogleLoginUrl'
-        "500":
-          description: Internal Server Error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      summary: Google OAuth2 Login
-      tags:
-      - Authentication API
-  /v1/auth/guest-login:
-    post:
-      description: JWT-base Guest Login.
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully refreshed the access token
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse'
-        "400":
-          description: Bad Request (e.g., invalid refresh token)
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized (e.g., expired or missing refresh token)
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      summary: Guest Login
-      tags:
-      - Authentication API
-  /v1/auth/logout:
-    get:
-      consumes:
-      - application/json
-      description: Use a valid refresh token to obtain a new access token. The refresh
-        token is typically sent in a cookie.
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully logout
-        "400":
-          description: Bad Request (e.g., invalid refresh token)
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized (e.g., expired or missing refresh token)
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      summary: Refresh an access token
-      tags:
-      - Authentication API
-  /v1/auth/me:
-    get:
-      description: Retrieves the profile of the authenticated user based on the provided
-        JWT.
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully retrieved user profile
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_auth.GetMeResponse'
-        "401":
-          description: Unauthorized (e.g., missing or invalid JWT)
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Get user profile
-      tags:
-      - Authentication API
-  /v1/auth/refresh-token:
-    get:
-      consumes:
-      - application/json
-      description: Use a valid refresh token to obtain a new access token. The refresh
-        token is typically sent in a cookie.
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully refreshed the access token
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse'
-        "400":
-          description: Bad Request (e.g., invalid refresh token)
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized (e.g., expired or missing refresh token)
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      summary: Refresh an access token
-      tags:
-      - Authentication API
-  /v1/chat/completions:
-    post:
-      consumes:
-      - application/json
-      description: |-
-        Generates a model response for the given chat conversation. This is a standard chat completion API that supports both streaming and non-streaming modes without conversation persistence.
-
-        **Streaming Mode (stream=true):**
-        - Returns Server-Sent Events (SSE) with real-time streaming
-        - Streams completion chunks directly from the inference model
-        - Final event contains "[DONE]" marker
-
-        **Non-Streaming Mode (stream=false or omitted):**
-        - Returns single JSON response with complete completion
-        - Standard OpenAI ChatCompletionResponse format
-
-        **Features:**
-        - Supports all OpenAI ChatCompletionRequest parameters
-        - User authentication required
-        - Direct inference model integration
-        - No conversation persistence (stateless)
-      parameters:
-      - description: Chat completion request with streaming options
-        in: body
-        name: request
-        required: true
-        schema:
-          $ref: '#/definitions/openai.ChatCompletionRequest'
-      produces:
-      - application/json
-      - text/event-stream
-      responses:
-        "200":
-          description: 'Successful streaming response (when stream=true) - SSE format
-            with data: {json} events'
-          schema:
-            type: string
-        "400":
-          description: Invalid request payload, empty messages, or inference failure
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized - missing or invalid authentication
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Create a chat completion
-      tags:
-      - Chat Completions API
-  /v1/conv/chat/completions:
-    post:
-      consumes:
-      - application/json
-      description: |-
-        Generates a model response for the given chat conversation with conversation persistence and management. This is the conversation-aware version of the chat completion API that supports both streaming and non-streaming modes with conversation management and storage options.
-
-        **Streaming Mode (stream=true):**
-        - Returns Server-Sent Events (SSE) with real-time streaming
-        - First event contains conversation metadata
-        - Subsequent events contain completion chunks
-        - Final event contains "[DONE]" marker
-
-        **Non-Streaming Mode (stream=false or omitted):**
-        - Returns single JSON response with complete completion
-        - Includes conversation metadata in response
-
-        **Storage Options:**
-        - `store=true`: Saves user message and assistant response to conversation
-        - `store_reasoning=true`: Includes reasoning content in stored messages
-        - `conversation`: ID of existing conversation or empty for new conversation
-
-        **Features:**
-        - Conversation persistence and history management
-        - Extended request format with conversation and storage options
-        - User authentication required
-        - Automatic conversation creation and management
-      parameters:
-      - description: Extended chat completion request with streaming, storage, and
-          conversation options
-        in: body
-        name: request
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conv.ExtendedChatCompletionRequest'
-      produces:
-      - application/json
-      - text/event-stream
-      responses:
-        "200":
-          description: 'Successful streaming response (when stream=true) - SSE format
-            with data: {json} events'
-          schema:
-            type: string
-        "400":
-          description: Invalid request payload or conversation not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized - missing or invalid authentication
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Conversation not found or user not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Create a conversation-aware chat completion
-      tags:
-      - Conversation-aware Chat API
-  /v1/conv/mcp:
-    post:
-      consumes:
-      - application/json
-      description: Handles Model Context Protocol (MCP) requests over an HTTP stream
-        for conversation-aware chat functionality. The response is sent as a continuous
-        stream of data with conversation context.
-      parameters:
-      - description: MCP request payload
-        in: body
-        name: request
-        required: true
-        schema: {}
-      produces:
-      - text/event-stream
-      responses:
-        "200":
-          description: Streamed response (SSE or chunked transfer)
-          schema:
-            type: string
-      security:
-      - BearerAuth: []
-      summary: MCP streamable endpoint for conversation-aware chat
-      tags:
-      - Conversation-aware Chat API
-  /v1/conv/models:
-    get:
-      consumes:
-      - application/json
-      description: Retrieves a list of available models that can be used for conversation-aware
-        chat completions. This endpoint provides the same model list as the standard
-        /v1/models endpoint but is specifically designed for conversation-aware chat
-        functionality.
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successful response
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conv.ModelsResponse'
-        "401":
-          description: Unauthorized - missing or invalid authentication
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: List available models for conversation-aware chat
-      tags:
-      - Conversation-aware Chat API
-  /v1/conversations:
-    get:
-      description: Retrieves a paginated list of conversations for the authenticated
-        user with OpenAI-compatible response format.
-      parameters:
-      - default: 20
-        description: The maximum number of items to return
-        in: query
-        name: limit
-        type: integer
-      - description: A cursor for use in pagination. The ID of the last object from
-          the previous page
-        in: query
-        name: after
-        type: string
-      - description: Order of items (asc/desc)
-        in: query
-        name: order
-        type: string
-      responses:
-        "200":
-          description: Successfully retrieved the list of conversations
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ExtendedConversationResponse'
-        "400":
-          description: Bad Request - Invalid pagination parameters
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal Server Error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: List Conversations
-      tags:
-      - Conversations API
-    post:
-      consumes:
-      - application/json
-      description: Creates a new conversation for the authenticated user with optional
-        items
-      parameters:
-      - description: Create conversation request
-        in: body
-        name: request
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.CreateConversationRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Created conversation
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse'
-        "400":
-          description: Invalid request - Bad payload, too many items, or invalid item
-            format
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Create a conversation
-      tags:
-      - Conversations API
-  /v1/conversations/{conversation_id}:
-    delete:
-      description: Deletes a conversation and all its items permanently
-      parameters:
-      - description: Conversation ID
-        in: path
-        name: conversation_id
-        required: true
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Deleted conversation
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.DeletedConversationResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Conversation not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Delete a conversation
-      tags:
-      - Conversations API
-    get:
-      description: Retrieves a conversation by its ID with full metadata and title
-      parameters:
-      - description: Conversation ID
-        in: path
-        name: conversation_id
-        required: true
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Conversation details
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Conversation not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Get a conversation
-      tags:
-      - Conversations API
-    patch:
-      consumes:
-      - application/json
-      description: Updates conversation title and/or metadata
-      parameters:
-      - description: Conversation ID
-        in: path
-        name: conversation_id
-        required: true
-        type: string
-      - description: Update conversation request
-        in: body
-        name: request
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.UpdateConversationRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Updated conversation
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse'
-        "400":
-          description: Invalid request payload or update failed
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Conversation not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Update a conversation
-      tags:
-      - Conversations API
-  /v1/conversations/{conversation_id}/items:
-    get:
-      description: Lists all items in a conversation with OpenAI-compatible pagination
-      parameters:
-      - description: Conversation ID
-        in: path
-        name: conversation_id
-        required: true
-        type: string
-      - description: Number of items to return (1-100)
-        in: query
-        name: limit
-        type: integer
-      - description: Cursor for pagination - ID of the last item from previous page
-        in: query
-        name: after
-        type: string
-      - description: Order of items (asc/desc)
-        in: query
-        name: order
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: List of items
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse'
-        "400":
-          description: Bad Request - Invalid pagination parameters
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Conversation not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: List items in a conversation
-      tags:
-      - Conversations API
-    post:
-      consumes:
-      - application/json
-      description: Adds multiple items to a conversation with OpenAI-compatible format
-      parameters:
-      - description: Conversation ID
-        in: path
-        name: conversation_id
-        required: true
-        type: string
-      - description: Create items request
-        in: body
-        name: request
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.CreateItemsRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Created items
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse'
-        "400":
-          description: Invalid request payload or invalid item format
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Conversation not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Create items in a conversation
-      tags:
-      - Conversations API
-  /v1/conversations/{conversation_id}/items/{item_id}:
-    delete:
-      description: Deletes a specific item from a conversation and returns the deleted
-        item details
-      parameters:
-      - description: Conversation ID
-        in: path
-        name: conversation_id
-        required: true
-        type: string
-      - description: Item ID
-        in: path
-        name: item_id
-        required: true
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Deleted item details
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse'
-        "400":
-          description: Bad Request - Deletion failed
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Conversation or item not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Delete an item from a conversation
-      tags:
-      - Conversations API
-    get:
-      description: Retrieves a specific item from a conversation with full content
-        details
-      parameters:
-      - description: Conversation ID
-        in: path
-        name: conversation_id
-        required: true
-        type: string
-      - description: Item ID
-        in: path
-        name: item_id
-        required: true
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Item details
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Conversation or item not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Get an item from a conversation
-      tags:
-      - Conversations API
-  /v1/mcp:
-    post:
-      consumes:
-      - application/json
-      description: Handles Model Context Protocol (MCP) requests over an HTTP stream.
-        The response is sent as a continuous stream of data.
-      parameters:
-      - description: MCP request payload
-        in: body
-        name: request
-        required: true
-        schema: {}
-      produces:
-      - text/event-stream
-      responses:
-        "200":
-          description: Streamed response (SSE or chunked transfer)
-          schema:
-            type: string
-      security:
-      - BearerAuth: []
-      summary: MCP streamable endpoint
-      tags:
-      - Chat Completions API
-  /v1/models:
-    get:
-      consumes:
-      - application/json
-      description: Retrieves a list of available models that can be used for chat
-        completions or other tasks.
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successful response
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1.ModelsResponse'
-      security:
-      - BearerAuth: []
-      summary: List available models
-      tags:
-      - Chat Completions API
-  /v1/organization/admin_api_keys:
-    get:
-      description: Retrieves a paginated list of all admin API keys for the authenticated
-        organization.
-      parameters:
-      - default: 20
-        description: The maximum number of items to return
-        in: query
-        name: limit
-        type: integer
-      - description: A cursor for use in pagination. The ID of the last object from
-          the previous page
-        in: query
-        name: after
-        type: string
-      responses:
-        "200":
-          description: Successfully retrieved the list of admin API keys
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal Server Error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: List Admin API Keys
-      tags:
-      - Administration API
-    post:
-      consumes:
-      - application/json
-      description: Creates a new admin API key for an organization. Requires a valid
-        admin API key in the Authorization header.
-      parameters:
-      - description: API key creation request
-        in: body
-        name: body
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully created admin API key
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse'
-        "400":
-          description: Bad request - invalid payload
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Create Admin API Key
-      tags:
-      - Administration API
-  /v1/organization/admin_api_keys/{id}:
-    delete:
-      description: Deletes an admin API key by its ID.
-      parameters:
-      - description: ID of the admin API key to delete
-        in: path
-        name: id
-        required: true
-        type: string
-      responses:
-        "200":
-          description: Successfully deleted the admin API key
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Not Found - API key with the given ID does not exist or does
-            not belong to the organization
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Delete Admin API Key
-      tags:
-      - Administration API
-    get:
-      description: Retrieves a specific admin API key by its ID.
-      parameters:
-      - description: ID of the admin API key
-        in: path
-        name: id
-        required: true
-        type: string
-      responses:
-        "200":
-          description: Successfully retrieved the admin API key
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Not Found - API key with the given ID does not exist or does
-            not belong to the organization
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Get Admin API Key
-      tags:
-      - Administration API
-  /v1/organization/invites:
-    get:
-      description: Retrieves a paginated list of invites for the current organization.
-      parameters:
-      - description: Cursor pointing to a record after which to fetch results
-        in: query
-        name: after
-        type: string
-      - description: Maximum number of results to return
-        in: query
-        name: limit
-        type: integer
-      responses:
-        "200":
-          description: Successfully retrieved list of invites
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_organization_invites_InviteResponse'
-        "400":
-          description: Invalid or missing query parameter
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: List Organization Invites
-      tags:
-      - Administration API
-    post:
-      consumes:
-      - application/json
-      description: Creates a new invite for a user to join the organization.
-      parameters:
-      - description: Invite request payload
-        in: body
-        name: invite
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_invites.CreateInviteUserRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully created invite
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse'
-        "400":
-          description: Invalid request payload or user already exists
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Create Invite
-      tags:
-      - Administration API
-  /v1/organization/invites/{invite_id}:
-    delete:
-      description: Deletes a specific invite by its ID. Only organization owners can
-        delete invites.
-      parameters:
-      - description: Public ID of the invite
-        in: path
-        name: invite_id
-        required: true
-        type: string
-      responses:
-        "200":
-          description: Successfully deleted invite
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.DeleteResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Forbidden - only owners can delete invites
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Invite not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Delete Invite
-      tags:
-      - Administration API
-    get:
-      description: Retrieves a specific invite by its ID.
-      parameters:
-      - description: Public ID of the invite
-        in: path
-        name: invite_id
-        required: true
-        type: string
-      responses:
-        "200":
-          description: Successfully retrieved invite
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Invite not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Retrieve Invite
-      tags:
-      - Administration API
-  /v1/organization/invites/verification:
-    post:
-      consumes:
-      - application/json
-      description: Verifies an invitation code, checks expiration, registers the user
-        if necessary, and assigns project memberships.
-      parameters:
-      - description: Verification request payload
-        in: body
-        name: verification
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_invites.VerifyInviteUserRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully verified invite
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse'
-        "400":
-          description: Invalid or expired invite code
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Verify Invite
-      tags:
-      - Administration API
-  /v1/organization/projects:
-    get:
-      description: Retrieves a paginated list of all projects for the authenticated
-        organization.
-      parameters:
-      - default: 20
-        description: The maximum number of items to return
-        in: query
-        name: limit
-        type: integer
-      - description: A cursor for use in pagination. The ID of the last object from
-          the previous page
-        in: query
-        name: after
-        type: string
-      - description: Whether to include archived projects.
-        in: query
-        name: include_archived
-        type: string
-      responses:
-        "200":
-          description: Successfully retrieved the list of projects
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectListResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal Server Error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: List Projects
-      tags:
-      - Administration API
-    post:
-      consumes:
-      - application/json
-      description: Creates a new project for an organization.
-      parameters:
-      - description: Project creation request
-        in: body
-        name: body
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully created project
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
-        "400":
-          description: Bad request - invalid payload
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal Server Error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Create Project
-      tags:
-      - Administration API
-  /v1/organization/projects/{project_id}:
-    get:
-      description: Retrieves a specific project by its ID.
-      parameters:
-      - description: ID of the project
-        in: path
-        name: project_id
-        required: true
-        type: string
-      responses:
-        "200":
-          description: Successfully retrieved the project
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Not Found - project with the given ID does not exist or does
-            not belong to the organization
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Get Project
-      tags:
-      - Administration API
-    post:
-      consumes:
-      - application/json
-      description: Updates a specific project by its ID.
-      parameters:
-      - description: ID of the project to update
-        in: path
-        name: project_id
-        required: true
-        type: string
-      - description: Project update request
-        in: body
-        name: body
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Successfully updated the project
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
-        "400":
-          description: Bad request - invalid payload
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Not Found - project with the given ID does not exist
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Update Project
-      tags:
-      - Administration API
-  /v1/organization/projects/{project_id}/archive:
-    post:
-      description: Archives a specific project by its ID, making it inactive.
-      parameters:
-      - description: ID of the project to archive
-        in: path
-        name: project_id
-        required: true
-        type: string
-      responses:
-        "200":
-          description: Successfully archived the project
-          schema:
-            $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse'
-        "401":
-          description: Unauthorized - invalid or missing API key
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Not Found - project with the given ID does not exist
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Archive Project
-      tags:
-      - Administration API
-  /v1/organization/projects/{project_public_id}/api_keys:
-    get:
-      consumes:
-      - application/json
-      description: List API keys for a specific project.
-      parameters:
-      - description: Project Public ID
-        in: path
-        name: project_public_id
-        required: true
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: API key created successfully
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse'
-        "400":
-          description: Bad request, e.g., invalid payload or missing IDs
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized, e.g., invalid or missing token
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Not Found, e.g., project or organization not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: List new project API key
-      tags:
-      - Administration API
-    post:
-      consumes:
-      - application/json
-      description: Creates a new API key for a specific project.
-      parameters:
-      - description: Project Public ID
-        in: path
-        name: project_public_id
-        required: true
-        type: string
-      - description: Request body for creating an API key
-        in: body
-        name: requestBody
-        required: true
-        schema:
-          $ref: '#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: API key created successfully
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse'
-        "400":
-          description: Bad request, e.g., invalid payload or missing IDs
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized, e.g., invalid or missing token
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Not Found, e.g., project or organization not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Create a new project API key
-      tags:
-      - Administration API
-  /v1/responses:
-    post:
-      consumes:
-      - application/json
-      description: |-
-        Creates a new LLM response for the given input. Supports multiple input types including text, images, files, web search, and more.
-
-        **Supported Input Types:**
-        - `text`: Plain text input
-        - `image`: Image input (URL or base64)
-        - `file`: File input by file ID
-        - `web_search`: Web search input
-        - `file_search`: File search input
-        - `streaming`: Streaming input
-        - `function_calls`: Function calls input
-        - `reasoning`: Reasoning input
-
-        **Example Request:**
-        ```json
-        {
-        "model": "gpt-4",
-        "input": {
-        "type": "text",
-        "text": "Hello, how are you?"
-        },
-        "max_tokens": 100,
-        "temperature": 0.7,
-        "stream": false,
-        "background": false
-        }
-        ```
-
-        **Response Format:**
-        The response uses embedded structure where all fields are at the top level:
-        - `jan_status`: Jan API status code (optional)
-        - `id`: Response identifier
-        - `object`: Object type ("response")
-        - `created`: Unix timestamp
-        - `model`: Model used
-        - `status`: Response status
-        - `input`: Input data
-        - `output`: Generated output
-
-        **Example Response:**
-        ```json
-        {
-        "jan_status": "000000",
-        "id": "resp_1234567890",
-        "object": "response",
-        "created": 1234567890,
-        "model": "gpt-4",
-        "status": "completed",
-        "input": {
-        "type": "text",
-        "text": "Hello, how are you?"
-        },
-        "output": {
-        "type": "text",
-        "text": {
-        "value": "I'm doing well, thank you!"
-        }
-        }
-        }
-        ```
-
-        **Response Status:**
-        - `completed`: Response generation finished successfully
-        - `processing`: Response is being generated
-        - `failed`: Response generation failed
-        - `cancelled`: Response was cancelled
-      parameters:
-      - description: Request payload containing model, input, and generation parameters
-        in: body
-        name: request
-        required: true
-        schema:
-          $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.CreateResponseRequest'
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Created response
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response'
-        "202":
-          description: Response accepted for background processing
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response'
-        "400":
-          description: Invalid request payload
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "422":
-          description: Validation error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "429":
-          description: Rate limit exceeded
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Create a response
-      tags:
-      - Responses API
-  /v1/responses/{response_id}:
-    delete:
-      consumes:
-      - application/json
-      description: |-
-        Deletes an LLM response by its ID. Returns the deleted response object with embedded structure where all fields are at the top level.
-
-        **Response Format:**
-        The response uses embedded structure where all fields are at the top level:
-        - `jan_status`: Jan API status code (optional)
-        - `id`: Response identifier
-        - `object`: Object type ("response")
-        - `created`: Unix timestamp
-        - `model`: Model used
-        - `status`: Response status (will be "cancelled")
-        - `input`: Input data
-        - `cancelled_at`: Cancellation timestamp
-      parameters:
-      - description: Unique identifier of the response
-        in: path
-        name: response_id
-        required: true
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Deleted response
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response'
-        "400":
-          description: Invalid request
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Response not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Delete a response
-      tags:
-      - Responses API
-    get:
-      consumes:
-      - application/json
-      description: |-
-        Retrieves an LLM response by its ID. Returns the complete response object with embedded structure where all fields are at the top level.
-
-        **Response Format:**
-        The response uses embedded structure where all fields are at the top level:
-        - `jan_status`: Jan API status code (optional)
-        - `id`: Response identifier
-        - `object`: Object type ("response")
-        - `created`: Unix timestamp
-        - `model`: Model used
-        - `status`: Response status
-        - `input`: Input data
-        - `output`: Generated output
-      parameters:
-      - description: Unique identifier of the response
-        in: path
-        name: response_id
-        required: true
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Response details
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response'
-        "400":
-          description: Invalid request
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Response not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Get a response
-      tags:
-      - Responses API
-  /v1/responses/{response_id}/cancel:
-    post:
-      consumes:
-      - application/json
-      description: |-
-        Cancels a running LLM response that was created with background=true. Only responses that are currently processing can be cancelled.
-
-        **Response Format:**
-        The response uses embedded structure where all fields are at the top level:
-        - `jan_status`: Jan API status code (optional)
-        - `id`: Response identifier
-        - `object`: Object type ("response")
-        - `created`: Unix timestamp
-        - `model`: Model used
-        - `status`: Response status (will be "cancelled")
-        - `input`: Input data
-        - `cancelled_at`: Cancellation timestamp
-      parameters:
-      - description: Unique identifier of the response to cancel
-        in: path
-        name: response_id
-        required: true
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: Response cancelled successfully
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response'
-        "400":
-          description: Invalid request or response cannot be cancelled
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Response not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: Cancel a response
-      tags:
-      - Responses API
-  /v1/responses/{response_id}/input_items:
-    get:
-      consumes:
-      - application/json
-      description: |-
-        Retrieves a paginated list of input items for a response. Supports cursor-based pagination for efficient retrieval of large datasets.
-
-        **Response Format:**
-        The response uses embedded structure where all fields are at the top level:
-        - `jan_status`: Jan API status code (optional)
-        - `first_id`: First item ID for pagination (optional)
-        - `last_id`: Last item ID for pagination (optional)
-        - `has_more`: Whether more items are available (optional)
-        - `id`: Input item identifier
-        - `object`: Object type ("input_item")
-        - `created`: Unix timestamp
-        - `type`: Input type
-        - `text`: Text content (for text type)
-        - `image`: Image content (for image type)
-        - `file`: File content (for file type)
-
-        **Example Response:**
-        ```json
-        {
-        "jan_status": "000000",
-        "first_id": "input_123",
-        "last_id": "input_456",
-        "has_more": false,
-        "id": "input_1234567890",
-        "object": "input_item",
-        "created": 1234567890,
-        "type": "text",
-        "text": "Hello, world!"
-        }
-        ```
-      parameters:
-      - description: Unique identifier of the response
-        in: path
-        name: response_id
-        required: true
-        type: string
-      - description: 'Maximum number of items to return (default: 20, max: 100)'
-        in: query
-        name: limit
-        type: integer
-      - description: Cursor for pagination - return items after this ID
-        in: query
-        name: after
-        type: string
-      - description: Cursor for pagination - return items before this ID
-        in: query
-        name: before
-        type: string
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: List of input items
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ListInputItemsResponse'
-        "400":
-          description: Invalid request or pagination parameters
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "401":
-          description: Unauthorized
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "403":
-          description: Access denied
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "404":
-          description: Response not found
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-        "500":
-          description: Internal server error
-          schema:
-            $ref: '#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse'
-      security:
-      - BearerAuth: []
-      summary: List input items
-      tags:
-      - Responses API
-  /v1/version:
-    get:
-      description: Returns the current build version of the API server.
-      produces:
-      - application/json
-      responses:
-        "200":
-          description: version info
-          schema:
-            additionalProperties:
-              type: string
-            type: object
-      summary: Get API build version
-      tags:
-      - Server API
-securityDefinitions:
-  BearerAuth:
-    description: Type "Bearer" followed by a space and JWT token.
-    in: header
-    name: Authorization
-    type: apiKey
-swagger: "2.0"
diff --git a/apps/jan-api-gateway/application/go.mod b/apps/jan-api-gateway/application/go.mod
deleted file mode 100644
index 9b010e6b..00000000
--- a/apps/jan-api-gateway/application/go.mod
+++ /dev/null
@@ -1,92 +0,0 @@
-module menlo.ai/jan-api-gateway
-
-go 1.24.6
-
-require (
-	github.com/gin-gonic/gin v1.10.1
-	github.com/google/wire v0.6.0
-	github.com/grafana/pyroscope-go/godeltaprof v0.1.8
-	github.com/mileusna/crontab v1.2.0
-	github.com/redis/go-redis/v9 v9.14.0
-	github.com/swaggo/swag v1.16.6
-	gorm.io/gen v0.3.27
-	gorm.io/gorm v1.30.1
-	gorm.io/plugin/dbresolver v1.6.2
-	resty.dev/v3 v3.0.0-beta.3
-)
-
-require (
-	cloud.google.com/go/compute/metadata v0.3.0 // indirect
-	filippo.io/edwards25519 v1.1.0 // indirect
-	github.com/KyleBanks/depth v1.2.1 // indirect
-	github.com/cespare/xxhash/v2 v2.3.0 // indirect
-	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
-	github.com/go-jose/go-jose/v4 v4.0.5 // indirect
-	github.com/go-openapi/jsonpointer v0.21.2 // indirect
-	github.com/go-openapi/jsonreference v0.21.0 // indirect
-	github.com/go-openapi/spec v0.21.0 // indirect
-	github.com/go-openapi/swag v0.23.1 // indirect
-	github.com/go-redsync/redsync/v4 v4.13.0 // indirect
-	github.com/go-sql-driver/mysql v1.9.3 // indirect
-	github.com/hashicorp/errwrap v1.1.0 // indirect
-	github.com/hashicorp/go-multierror v1.1.1 // indirect
-	github.com/jackc/pgpassfile v1.0.0 // indirect
-	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
-	github.com/jackc/pgx/v5 v5.6.0 // indirect
-	github.com/jackc/puddle/v2 v2.2.2 // indirect
-	github.com/jinzhu/inflection v1.0.0 // indirect
-	github.com/jinzhu/now v1.1.5 // indirect
-	github.com/josharian/intern v1.0.0 // indirect
-	github.com/klauspost/compress v1.17.8 // indirect
-	github.com/spf13/cast v1.7.1 // indirect
-	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
-	golang.org/x/mod v0.27.0 // indirect
-	golang.org/x/sync v0.16.0 // indirect
-	golang.org/x/tools v0.36.0 // indirect
-	gorm.io/datatypes v1.2.6 // indirect
-	gorm.io/driver/mysql v1.6.0 // indirect
-	gorm.io/hints v1.1.2 // indirect
-)
-
-require (
-	github.com/bahlo/generic-list-go v0.2.0 // indirect
-	github.com/buger/jsonparser v1.1.1 // indirect
-	github.com/bytedance/sonic v1.14.0 // indirect
-	github.com/bytedance/sonic/loader v0.3.0 // indirect
-	github.com/cloudwego/base64x v0.1.6 // indirect
-	github.com/coreos/go-oidc/v3 v3.15.0
-	github.com/gabriel-vasile/mimetype v1.4.9 // indirect
-	github.com/gin-contrib/sse v1.1.0 // indirect
-	github.com/go-playground/locales v0.14.1 // indirect
-	github.com/go-playground/universal-translator v0.18.1 // indirect
-	github.com/go-playground/validator/v10 v10.27.0 // indirect
-	github.com/goccy/go-json v0.10.5 // indirect
-	github.com/golang-jwt/jwt/v5 v5.3.0
-	github.com/google/uuid v1.6.0
-	github.com/invopop/jsonschema v0.13.0 // indirect
-	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
-	github.com/leodido/go-urn v1.4.0 // indirect
-	github.com/mailru/easyjson v0.9.0 // indirect
-	github.com/mark3labs/mcp-go v0.37.0
-	github.com/mattn/go-isatty v0.0.20 // indirect
-	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
-	github.com/modern-go/reflect2 v1.0.2 // indirect
-	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
-	github.com/sashabaranov/go-openai v1.41.1
-	github.com/sirupsen/logrus v1.9.3
-	github.com/swaggo/files v1.0.1
-	github.com/swaggo/gin-swagger v1.6.0
-	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
-	github.com/ugorji/go/codec v1.3.0 // indirect
-	github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
-	golang.org/x/arch v0.19.0 // indirect
-	golang.org/x/crypto v0.41.0 // indirect
-	golang.org/x/net v0.43.0
-	golang.org/x/oauth2 v0.30.0
-	golang.org/x/sys v0.35.0 // indirect
-	golang.org/x/text v0.28.0 // indirect
-	google.golang.org/protobuf v1.36.6 // indirect
-	gopkg.in/yaml.v3 v3.0.1 // indirect
-	gorm.io/driver/postgres v1.6.0
-)
diff --git a/apps/jan-api-gateway/docker/docker-compose.yml b/apps/jan-api-gateway/docker/docker-compose.yml
deleted file mode 100644
index 5f780801..00000000
--- a/apps/jan-api-gateway/docker/docker-compose.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-version: '3.8'
-
-services:
-  postgres:
-    image: postgres:15-alpine
-    container_name: jan-api-gateway-postgres
-    environment:
-      POSTGRES_DB: jan_api_gateway
-      POSTGRES_USER: jan_user
-      POSTGRES_PASSWORD: jan_password
-      POSTGRES_HOST_AUTH_METHOD: trust
-    ports:
-      - "5432:5432"
-    volumes:
-      - postgres_data:/var/lib/postgresql/data
-      - ./init.sql:/docker-entrypoint-initdb.d/init.sql
-    healthcheck:
-      test: ["CMD-SHELL", "pg_isready -U jan_user -d jan_api_gateway"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-    restart: unless-stopped
-
-  valkey:
-    image: valkey/valkey:7-alpine
-    container_name: jan-api-gateway-valkey
-    ports:
-      - "6379:6379"
-    volumes:
-      - valkey_data:/data
-    healthcheck:
-      test: ["CMD", "valkey-cli", "ping"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-    restart: unless-stopped
-    command: valkey-server --appendonly yes
-
-volumes:
-  postgres_data:
-  valkey_data:
diff --git a/apps/jan-api-gateway/docker/init.sql b/apps/jan-api-gateway/docker/init.sql
deleted file mode 100644
index 082ab76b..00000000
--- a/apps/jan-api-gateway/docker/init.sql
+++ /dev/null
@@ -1,21 +0,0 @@
--- Initialize the database
-CREATE DATABASE IF NOT EXISTS jan_api_gateway;
-
--- Create the user if it doesn't exist
-DO $$
-BEGIN
-    IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'jan_user') THEN
-        CREATE ROLE jan_user WITH LOGIN PASSWORD 'jan_password';
-    END IF;
-END
-$$;
-
--- Grant privileges
-GRANT ALL PRIVILEGES ON DATABASE jan_api_gateway TO jan_user;
-GRANT ALL PRIVILEGES ON SCHEMA public TO jan_user;
-GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO jan_user;
-GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO jan_user;
-
--- Set default privileges for future objects
-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO jan_user;
-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO jan_user;
diff --git a/apps/jan-api-gateway/docs/Conversation-Flows.md b/apps/jan-api-gateway/docs/Conversation-Flows.md
deleted file mode 100644
index 28b48858..00000000
--- a/apps/jan-api-gateway/docs/Conversation-Flows.md
+++ /dev/null
@@ -1,240 +0,0 @@
-# Conversation Flow Guide for Frontend Integration
-
-This guide explains how to integrate with the Jan API Gateway's conversation system, including the completion API and response API flows.
-
-## Two Ways to Use Conversations
-
-### Method 1: Completion API with Conversation Management
-
-The completion API automatically handles conversation creation and message appending.
-
-#### Storage Options
-
-The completion API supports two storage flags to control how messages are persisted:
-
-- **`store`** (boolean, optional, default: `false`): When set to `true`, saves both the user message and assistant response to the conversation. When `false`, messages are not stored in the conversation history.
-
-- **`store_reasoning`** (boolean, optional, default: `false`): When set to `true`, includes reasoning content in stored messages. This only takes effect when `store` is also `true`. Useful for models that provide reasoning explanations.
-
-#### Flow:
-1. **First Request** (No conversation ID):
-   ```json
-   POST /v1/chat/completions
-   {
-     "model": "jan-v1-4b",
-     "messages": [
-       {"role": "user", "content": "Hello, how are you?"}
-     ],
-     "stream": false,
-     "store": true,
-     "store_reasoning": false
-   }
-   ```
-
-2. **Completion Response** (New conversation created):
-   ```json
-   {
-     "id": "msg_oc07tomng5fqqi8w6bbxzmbuco19v3f9bq7xriuvpq",
-     "object": "chat.completion",
-     "created": 1234567890,
-     "model": "jan-v1-4b",
-     "choices": [
-       {
-         "message": {
-           "role": "assistant",
-           "content": "I'm doing well, thank you!"
-         },
-         "finish_reason": "stop"
-       }
-     ],
-     "metadata": {
-       "conversation_id": "conv_8zrnfsrj9d8424ngl0n2jbien0af3845gfhvpqc5un",
-       "conversation_created": true,
-       "conversation_title": "Hello, how are you?",
-       "ask_item_id": "msg_049gu35s5kwj65tegn398fnut9o1o7p194xu6a61u3",
-       "completion_item_id": "msg_oc07tomng5fqqi8w6bbxzmbuco19v3f9bq7xriuvpq"
-     }
-   }
-   ```
-
-3. **Continue Conversation** (Use conversation ID):
-   ```json
-   POST /v1/chat/completions
-   {
-     "model": "jan-v1-4b",
-     "messages": [
-       {"role": "user", "content": "What's the weather like?"}
-     ],
-     "conversation": "conv_uzaxr1z1mq38k23r99kl1qq9eelobeam0gw21n8q9z",
-     "stream": false,
-     "store": true,
-     "store_reasoning": false
-   }
-   ```
-
-#### Streaming Support:
-For streaming responses, set `"stream": true` and handle Server-Sent Events (SSE):
-
-```json
-POST /v1/chat/completions
-{
-  "model": "jan-v1-4b",
-  "messages": [
-    {"role": "user", "content": "Hello, how are you?"}
-  ],
-  "stream": true,
-  "store": true,
-  "store_reasoning": false
-}
-```
-
-**Streaming Response Format:**
-
-The server sends multiple SSE events. The first event contains conversation metadata, followed by content chunks:
-
-1. **Metadata Event** (sent first):
-```
-data: {"completion_item_id":"msg_oc07tomng5fqqi8w6bbxzmbuco19v3f9bq7xriuvpq","conversation_created":true,"conversation_id":"conv_8zrnfsrj9d8424ngl0n2jbien0af3845gfhvpqc5un","conversation_title":"333 Tell me name of largest ocean","object":"chat.completion.metadata","ask_item_id":"msg_049gu35s5kwj65tegn398fnut9o1o7p194xu6a61u3"}
-
-```
-
-**Metadata Event Attribute Meanings:**
-
-- `conversation_created`: (boolean) Indicates if a new conversation was created as a result of this request.
-- `conversation_id`: (string) The unique string identifier for the conversation. Use this for subsequent messages in the same conversation.
-- `conversation_title`: (string) The title or summary of the conversation, often generated from the initial user message.
-- `ask_item_id`: (string) The unique string identifier for the user's message that was just sent. This ID can be used to reference the specific ask message in the database.
-- `completion_item_id`: (string) The unique string identifier for the assistant's response message. This ID can be used to reference the specific completion message in the database.
-- `object`: (string) The type of object returned. For this event, it is always `"chat.completion.metadata"` to indicate metadata about the chat completion.
-
-**Finish Reason Values:**
-
-The `finish_reason` field indicates why the completion ended:
-
-- `stop`: The model completed its response naturally
-- `function_call`: The model is requesting to call a function (legacy format)
-- `tool_calls`: The model is requesting to call one or more tools (new format)
-
-2. **Content Chunk Events** (sent continuously):
-```
-data: {"id":"chatcmpl-b61389e4-eddf-935d-9ef4-7c9ab6a6d689","object":"chat.completion.chunk","created":1758067863,"model":"jan-v1-4b","choices":[{"index":0,"delta":{"role":"assistant","content":"I'm"},"logprobs":null,"finish_reason":null}]}
-
-data: {"id":"chatcmpl-b61389e4-eddf-935d-9ef4-7c9ab6a6d689","object":"chat.completion.chunk","created":1758067863,"model":"jan-v1-4b","choices":[{"index":0,"delta":{"content":" doing"},"logprobs":null,"finish_reason":null}]}
-
-data: {"id":"chatcmpl-b61389e4-eddf-935d-9ef4-7c9ab6a6d689","object":"chat.completion.chunk","created":1758067863,"model":"jan-v1-4b","choices":[{"index":0,"delta":{"content":" well, thank you!"},"logprobs":null,"finish_reason":"stop"}]}
-```
-
-**Example with Tool Calls:**
-```
-data: {"id":"chatcmpl-b61389e4-eddf-935d-9ef4-7c9ab6a6d689","object":"chat.completion.chunk","created":1758067863,"model":"jan-v1-4b","choices":[{"index":0,"delta":{"tool_calls":[{"id":"call_123","type":"function","function":{"name":"get_weather","arguments":"{\"location\":\"New York\"}"}}]},"logprobs":null,"finish_reason":"tool_calls"}]}
-```
-
-**Example with Function Call (Legacy):**
-```
-data: {"id":"chatcmpl-b61389e4-eddf-935d-9ef4-7c9ab6a6d689","object":"chat.completion.chunk","created":1758067863,"model":"jan-v1-4b","choices":[{"index":0,"delta":{"function_call":{"name":"get_weather","arguments":"{\"location\":\"New York\"}"}},"logprobs":null,"finish_reason":"function_call"}]}
-```
-
-
-2. **Continue Conversation** (Use conversation ID):
-   ```json
-   POST /v1/chat/completions
-   {
-     "model": "jan-v1-4b",
-     "messages": [
-       {"role": "user", "content": "What's the weather like?"}
-     ],
-     "conversation": "conv_uzaxr1z1mq38k23r99kl1qq9eelobeam0gw21n8q9z",
-     "stream": false,
-     "store": true,
-     "store_reasoning": false
-   }
-   ```
-
-### Method 2: Direct Conversation Management
-
-Use the conversation API to explicitly manage conversations and their messages.
-
-#### 1. Create Conversation:
-```json
-POST /v1/conversations
-{
-  "title": "My Chat Session",
-  "metadata": {
-    "model": "jan-v1-4b",
-    "session_type": "chat"
-  }
-}
-```
-
-**Response:**
-```json
-{
-  "id": "conv_abc123...",
-  "object": "conversation",
-  "title": "My Chat Session",
-  "created_at": 1234567890,
-  "metadata": {
-    "model": "jan-v1-4b",
-    "session_type": "chat"
-  }
-}
-```
-
-#### 2. Add Messages to Conversation:
-```json
-POST /v1/conversations/{conversation_id}/items
-{
-  "items": [
-    {
-      "type": "message",
-      "role": "user",
-      "content": [
-        {
-          "type": "text",
-          "text": "Hello, how are you?"
-        }
-      ]
-    }
-  ]
-}
-```
-
-#### 3. List Conversation Items:
-```http
-GET /v1/conversations/{conversation_id}/items
-```
-
-#### 4. Delete Specific Conversation Items:
-```http
-GET /v1/conversations/{conversation_id}/items/{item_id}
-```
-
-#### 5. Update Conversation:
-```json
-PATCH /v1/conversations/{conversation_id}
-{
-  "title": "Updated Chat Title"
-}
-```
-
-#### 6. List All Conversations:
-```http
-GET /v1/conversations?limit=20&after=cursor_id
-```
-
-### Common Error Codes
-
-- `0199506b-314d-70e2-a8aa-d5fde1569d1d` - User not found
-- `a1b2c3d4-e5f6-7890-abcd-ef1234567890` - Conversation not found
-- `cf237451-8932-48d1-9cf6-42c4db2d4805` - Invalid request payload
-- `c6d6bafd-b9f3-4ebb-9c90-a21b07308ebc` - Unauthorized access
-
-### HTTP Status Codes
-
-- `200` - Success
-- `400` - Bad Request (invalid payload)
-- `401` - Unauthorized (invalid API key)
-- `404` - Not Found (conversation/resource not found)
-- `422` - Validation Error
-- `429` - Rate Limit Exceeded
-- `500` - Internal Server Error
\ No newline at end of file
diff --git a/apps/jan-api-gateway/docs/System_Design.png b/apps/jan-api-gateway/docs/System_Design.png
deleted file mode 100644
index 2f05b8c8..00000000
Binary files a/apps/jan-api-gateway/docs/System_Design.png and /dev/null differ
diff --git a/apps/jan-api-gateway/docs/system-design-mermaid.txt b/apps/jan-api-gateway/docs/system-design-mermaid.txt
deleted file mode 100644
index 18c7d12e..00000000
--- a/apps/jan-api-gateway/docs/system-design-mermaid.txt
+++ /dev/null
@@ -1,134 +0,0 @@
-graph TB
-    subgraph "Client Layer"
-        WEB[Web Client]
-        API_CLIENT[API Client]
-        MOBILE[Mobile App]
-        PROFILER[Profiling Tools<br/>pprof/Pyroscope]
-    end
-    
-    subgraph "Jan API Gateway"
-        subgraph "HTTP Layer"
-            GIN[Gin Router<br/>Port 8080]
-            MIDDLEWARE[Middleware<br/>- CORS<br/>- JWT Auth<br/>- Logging<br/>- Transaction<br/>- Request ID]
-        end
-        
-        subgraph "Route Handlers"
-            AUTH_ROUTE[Authentication API<br/>- Google OAuth2<br/>- JWT Management<br/>- Guest Login]
-            CHAT_ROUTE[Chat Completions API<br/>- OpenAI Compatible<br/>- Streaming Support<br/>- Models & MCP]
-            CONV_ROUTE[Conversation-aware Chat API<br/>- Context-aware Completions<br/>- MCP Integration]
-            CONV_MGMT_ROUTE[Conversations API<br/>- CRUD Operations<br/>- Item Management]
-            ADMIN_ROUTE[Administration API<br/>- Organization Management<br/>- Project Management<br/>- API Key Management]
-            RESPONSE_ROUTE[Responses API<br/>- Response Tracking<br/>- Status Management]
-            SERVER_ROUTE[Server API<br/>- Version Info<br/>- Health Checks]
-        end
-        
-        subgraph "Business Logic Layer"
-            USER_SVC[User Service<br/>- Authentication<br/>- Profile Management<br/>- Guest Users]
-            CONV_SVC[Conversation Service<br/>- Message Storage<br/>- Thread Management<br/>- Context Handling]
-            ORG_SVC[Organization Service<br/>- Multi-tenancy<br/>- Access Control<br/>- Project Management]
-            API_KEY_SVC[API Key Service<br/>- Key Generation<br/>- Validation<br/>- Scoped Access]
-            MCP_SVC[MCP Service<br/>- Tool Integration<br/>- External APIs<br/>- JSON-RPC 2.0]
-            RESPONSE_SVC[Response Service<br/>- Status Tracking<br/>- Usage Statistics<br/>- Error Handling]
-            HEALTH_SVC[Health Service<br/>- Model Monitoring<br/>- Cron Jobs<br/>- Registry Management]
-        end
-        
-        subgraph "Data Layer"
-            USER_REPO[User Repository<br/>- Guest Users<br/>- Profile Data]
-            CONV_REPO[Conversation Repository<br/>- Thread Storage<br/>- Metadata]
-            ITEM_REPO[Item Repository<br/>- Messages<br/>- Content Types]
-            ORG_REPO[Organization Repository<br/>- Multi-tenant Data<br/>- Access Control]
-            PROJECT_REPO[Project Repository<br/>- Resource Isolation<br/>- Member Management]
-            API_KEY_REPO[API Key Repository<br/>- Scoped Keys<br/>- Usage Tracking]
-            RESPONSE_REPO[Response Repository<br/>- Status Tracking<br/>- Usage Data]
-            TX_MGR[Transaction Manager<br/>- Auto Rollback<br/>- Context Handling<br/>- Read/Write Split]
-        end
-        
-        subgraph "Infrastructure"
-            DB_WRITE[(PostgreSQL<br/>Write Replica)]
-            DB_READ[(PostgreSQL<br/>Read Replicas)]
-            PPROF[pprof Server<br/>Port 6060]
-            CRON[Cron Scheduler<br/>Health Checks]
-        end
-        
-        subgraph "External Services"
-            JAN_INFERENCE[Jan Inference<br/>Service]
-            SERPER_API[Serper API<br/>Web Search]
-            GOOGLE_OAUTH[Google OAuth2<br/>Authentication]
-            PYROSCOPE[Grafana Pyroscope<br/>Continuous Profiling]
-        end
-    end
-    
-    %% Client connections
-    WEB --> GIN
-    API_CLIENT --> GIN
-    MOBILE --> GIN
-    PROFILER --> PPROF
-    
-    %% HTTP layer connections
-    GIN --> MIDDLEWARE
-    MIDDLEWARE --> AUTH_ROUTE
-    MIDDLEWARE --> CHAT_ROUTE
-    MIDDLEWARE --> CONV_ROUTE
-    MIDDLEWARE --> CONV_MGMT_ROUTE
-    MIDDLEWARE --> ADMIN_ROUTE
-    MIDDLEWARE --> RESPONSE_ROUTE
-    MIDDLEWARE --> SERVER_ROUTE
-    
-    %% Route to service connections
-    AUTH_ROUTE --> USER_SVC
-    CHAT_ROUTE --> CONV_SVC
-    CHAT_ROUTE --> MCP_SVC
-    CHAT_ROUTE --> HEALTH_SVC
-    CONV_ROUTE --> CONV_SVC
-    CONV_ROUTE --> MCP_SVC
-    CONV_MGMT_ROUTE --> CONV_SVC
-    ADMIN_ROUTE --> ORG_SVC
-    ADMIN_ROUTE --> API_KEY_SVC
-    RESPONSE_ROUTE --> RESPONSE_SVC
-    SERVER_ROUTE --> HEALTH_SVC
-    
-    %% Service to repository connections
-    USER_SVC --> USER_REPO
-    CONV_SVC --> CONV_REPO
-    CONV_SVC --> ITEM_REPO
-    ORG_SVC --> ORG_REPO
-    ORG_SVC --> PROJECT_REPO
-    API_KEY_SVC --> API_KEY_REPO
-    RESPONSE_SVC --> RESPONSE_REPO
-    
-    %% Repository to database connections
-    USER_REPO --> TX_MGR
-    CONV_REPO --> TX_MGR
-    ITEM_REPO --> TX_MGR
-    ORG_REPO --> TX_MGR
-    PROJECT_REPO --> TX_MGR
-    API_KEY_REPO --> TX_MGR
-    RESPONSE_REPO --> TX_MGR
-    TX_MGR --> DB_WRITE
-    TX_MGR --> DB_READ
-    
-    %% Infrastructure connections
-    HEALTH_SVC --> CRON
-    HEALTH_SVC --> JAN_INFERENCE
-    
-    %% External service connections
-    CHAT_ROUTE --> JAN_INFERENCE
-    CONV_ROUTE --> JAN_INFERENCE
-    MCP_SVC --> SERPER_API
-    AUTH_ROUTE --> GOOGLE_OAUTH
-    PPROF --> PYROSCOPE
-    
-    %% Styling
-    classDef clientClass fill:#e1f5fe
-    classDef gatewayClass fill:#f3e5f5
-    classDef serviceClass fill:#e8f5e8
-    classDef dataClass fill:#fff3e0
-    classDef externalClass fill:#fce4ec
-    classDef infraClass fill:#f1f8e9
-    
-    class WEB,API_CLIENT,MOBILE,PROFILER clientClass
-    class GIN,MIDDLEWARE,AUTH_ROUTE,CHAT_ROUTE,CONV_ROUTE,CONV_MGMT_ROUTE,ADMIN_ROUTE,RESPONSE_ROUTE,SERVER_ROUTE gatewayClass
-    class USER_SVC,CONV_SVC,ORG_SVC,API_KEY_SVC,MCP_SVC,RESPONSE_SVC,HEALTH_SVC serviceClass
-    class USER_REPO,CONV_REPO,ITEM_REPO,ORG_REPO,PROJECT_REPO,API_KEY_REPO,RESPONSE_REPO,TX_MGR,DB_WRITE,DB_READ dataClass
-    class JAN_INFERENCE,SERPER_API,GOOGLE_OAUTH,PYROSCOPE externalClass
-    class PPROF,CRON infraClass
\ No newline at end of file
diff --git a/charts/jan-server/Chart.lock b/charts/jan-server/Chart.lock
deleted file mode 100644
index 714ec05f..00000000
--- a/charts/jan-server/Chart.lock
+++ /dev/null
@@ -1,9 +0,0 @@
-dependencies:
-- name: postgresql
-  repository: https://charts.bitnami.com/bitnami
-  version: 16.7.27
-- name: valkey-cluster
-  repository: oci://registry-1.docker.io/bitnamicharts
-  version: 3.0.24
-digest: sha256:b2b995e3a0d8f36c673cadb85e06908f641a9c00faee0a94cc948edf199916e5
-generated: "2025-09-26T09:36:22.7343617+07:00"
diff --git a/charts/jan-server/Chart.yaml b/charts/jan-server/Chart.yaml
deleted file mode 100644
index b323ea83..00000000
--- a/charts/jan-server/Chart.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-apiVersion: v2
-name: jan-server
-description: A Helm chart for deploying the API Gateway service
-type: application
-version: 0.1.0
-appVersion: "v0.0.11"
-
-dependencies:
-  - name: postgresql
-    version: "16.7.27"
-    repository: https://charts.bitnami.com/bitnami
-    condition: postgresql.enabled
-  - name: valkey-cluster
-    version: "3.0.24"
-    repository: oci://registry-1.docker.io/bitnamicharts
-    condition: valkey.enabled
-    alias: valkey
diff --git a/charts/jan-server/README.md b/charts/jan-server/README.md
deleted file mode 100644
index 2df6a344..00000000
--- a/charts/jan-server/README.md
+++ /dev/null
@@ -1,125 +0,0 @@
-# Jan Gateway Helm Chart
-
-![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v0.0.11](https://img.shields.io/badge/AppVersion-v0.0.11-informational?style=flat-square)
-
-Helm chart for deploying the Jan Gateway with optional AI/ML inference support.
-
-## Prerequisites
-
-- Kubernetes 1.24+
-- Helm 3.8+
-- Storage class with `ReadWriteMany` (required when inference is enabled)
-- GPU nodes (optional, only required for GPU-backed inference)
-
-## Quick Start
-
-> **Default behavior:** `inference.enabled` is `true`. Installing the chart with default values will also install the GPU Operator, KubeRay Operator, Envoy Gateway, and Aibrix components, if they are already present in the cluster please skip each corresponding dependency by setting its toggle to `false`.
-
-### Install without inference
-
-```bash
-helm install jan-server ./jan-server \
-  --namespace jan-system \
-  --create-namespace
-```
-
-### Install with inference enabled
-
-```bash
-helm install jan-server ./jan-server \
-  --namespace jan-system \
-  --create-namespace \
-  --set inference.enabled=true \
-  --set inference.dependencies.gpuOperator.enabled=true
-```
-
-## Values
-
-| Key | Type | Default | Description |
-| --- | ---- | ------- | ----------- |
-| **Core** | | | |
-| gateway.replicaCount | int | `1` | Number of application replicas |
-| gateway.image.repository | string | `"janai/jan-server"` | Container image repository |
-| gateway.image.tag | string | `""` | Image tag (defaults to chart appVersion) |
-| **Dependencies** | | | |
-| postgresql.enabled | bool | `true` | Deploy bundled PostgreSQL chart |
-| valkey.enabled | bool | `true` | Deploy bundled Valkey (Redis) chart |
-| **Inference** | | | |
-| inference.enabled | bool | `false` | Toggle AI/ML inference components |
-| inference.dependencies.gpuOperator.enabled | bool | `true` | Install NVIDIA GPU Operator when inference enabled |
-| inference.dependencies.kuberayOperator.enabled | bool | `true` | Install KubeRay operator when inference enabled |
-| inference.cleanup.autoCleanupDependencies | bool | `true` | Remove operator releases on uninstall |
-
-> See `values.yaml` for the full list of configuration options.
->
-> **Inference dependencies:** end users may install GPU Operator, KubeRay, Envoy Gateway, and Aibrix independently. When those operators are not yet present, turning the corresponding `inference.enabled` and dependency toggles to `true` triggers the chart hooks to install them automatically.
-
-## Configuration Examples
-
-### Minimal install
-
-```yaml
-inference:
-  enabled: false
-
-gateway:
-  replicaCount: 1
-  resources:
-    requests:
-      cpu: 250m
-      memory: 256Mi
-    limits:
-      cpu: 500m
-      memory: 512Mi
-  env:
-    - name: JAN_INFERENCE_MODEL_URL
-      value: "https://your-external-inference-endpoint"
-```
-
-> When inference is disabled, update the `gateway.env` entry for `JAN_INFERENCE_MODEL_URL` to point at your own external model endpoint. The default value targets the in-cluster Envoy endpoint that is created when inference with Aibrix is enabled.
-
-### Inference-enabled install
-
-```yaml
-inference:
-  enabled: true
-  storage:
-    enabled: true
-    storageClassName: "nfs-client"
-    size: "200Gi"
-  dependencies:
-    gpuOperator:
-      enabled: true
-    kuberayOperator:
-      enabled: true
-  cleanup:
-    autoCleanupDependencies: false
-    cleanupGpuOperator: false
-```
-
-## Upgrade
-
-```bash
-helm upgrade jan-server ./jan-server \
-  --namespace jan-system \
-  --reuse-values
-```
-
-## Uninstall
-
-```bash
-helm uninstall jan-server --namespace jan-system
-
-# enable cleanup if inference dependencies were installed
-helm upgrade jan-server ./jan-server \
-  --namespace jan-system \
-  --set inference.cleanup.autoCleanupDependencies=true
-helm uninstall jan-server --namespace jan-system
-```
-
-## Maintainers
-
-| Name | Email | Url |
-| --- | --- | --- |
-| Jan.ai Team | <support@jan.ai> | <https://jan.ai> |
-
diff --git a/charts/jan-server/templates/_crd-helpers.tpl b/charts/jan-server/templates/_crd-helpers.tpl
deleted file mode 100644
index a5cd4919..00000000
--- a/charts/jan-server/templates/_crd-helpers.tpl
+++ /dev/null
@@ -1,81 +0,0 @@
-{{/*
-Helper template to check if required CRDs exist.
-This will be used in other templates to conditionally render resources.
-*/}}
-{{- define "jan-server.crdsExist" -}}
-{{- $crdsReady := true }}
-
-{{/* Check GPU Operator CRDs if enabled */}}
-{{- if and .Values.inference.enabled .Values.inference.dependencies.gpuOperator.enabled }}
-{{- if not (lookup "apiextensions.k8s.io/v1" "CustomResourceDefinition" "" "clusterpolicies.nvidia.com") }}
-{{- $crdsReady = false }}
-{{- end }}
-{{- end }}
-
-{{/* Check KubeRay CRDs if enabled */}}
-{{- if and .Values.inference.enabled .Values.inference.dependencies.kuberayOperator.enabled }}
-{{- if not (lookup "apiextensions.k8s.io/v1" "CustomResourceDefinition" "" "rayclusters.ray.io") }}
-{{- $crdsReady = false }}
-{{- end }}
-{{- end }}
-
-{{/* Check Envoy Gateway CRDs if enabled */}}
-{{- if and .Values.inference.enabled .Values.inference.dependencies.envoyGateway.enabled }}
-{{- if not (lookup "apiextensions.k8s.io/v1" "CustomResourceDefinition" "" "gatewayclasses.gateway.networking.k8s.io") }}
-{{- $crdsReady = false }}
-{{- end }}
-{{- end }}
-
-{{/* Check Aibrix CRDs if enabled */}}
-{{- if and .Values.inference.enabled .Values.inference.dependencies.aibrix.enabled }}
-{{- if not (lookup "apiextensions.k8s.io/v1" "CustomResourceDefinition" "" "podautoscalers.autoscaling.aibrix.ai") }}
-{{- $crdsReady = false }}
-{{- end }}
-{{- end }}
-
-{{- $crdsReady }}
-{{- end }}
-
-{{/*
-Helper template to generate CRD validation warning
-*/}}
-{{- define "jan-server.crdValidationWarning" -}}
-{{- $missingCrds := list }}
-
-{{- if and .Values.inference.enabled .Values.inference.dependencies.gpuOperator.enabled }}
-{{- if not (lookup "apiextensions.k8s.io/v1" "CustomResourceDefinition" "" "clusterpolicies.nvidia.com") }}
-{{- $missingCrds = append $missingCrds "clusterpolicies.nvidia.com (GPU Operator)" }}
-{{- end }}
-{{- end }}
-
-{{- if and .Values.inference.enabled .Values.inference.dependencies.kuberayOperator.enabled }}
-{{- if not (lookup "apiextensions.k8s.io/v1" "CustomResourceDefinition" "" "rayclusters.ray.io") }}
-{{- $missingCrds = append $missingCrds "rayclusters.ray.io (KubeRay)" }}
-{{- end }}
-{{- end }}
-
-{{- if and .Values.inference.enabled .Values.inference.dependencies.envoyGateway.enabled }}
-{{- if not (lookup "apiextensions.k8s.io/v1" "CustomResourceDefinition" "" "gatewayclasses.gateway.networking.k8s.io") }}
-{{- $missingCrds = append $missingCrds "gatewayclasses.gateway.networking.k8s.io (Envoy Gateway)" }}
-{{- end }}
-{{- end }}
-
-{{- if and .Values.inference.enabled .Values.inference.dependencies.aibrix.enabled }}
-{{- if not (lookup "apiextensions.k8s.io/v1" "CustomResourceDefinition" "" "podautoscalers.autoscaling.aibrix.ai") }}
-{{- $missingCrds = append $missingCrds "podautoscalers.autoscaling.aibrix.ai (Aibrix)" }}
-{{- end }}
-{{- end }}
-
-{{- if $missingCrds }}
-WARNING: The following CRDs are missing and will be installed by pre-install hook:
-{{- range $missingCrds }}
-  - {{ . }}
-{{- end }}
-
-If the hook fails, you can install dependencies manually:
-  helm upgrade --install gpu-operator nvidia/gpu-operator --namespace gpu-operator-resources --create-namespace
-  helm upgrade --install kuberay-operator kuberay/kuberay-operator --namespace kuberay-system --create-namespace
-  helm upgrade --install envoy-gateway oci://docker.io/envoyproxy/gateway-helm --namespace envoy-gateway-system --create-namespace
-  helm upgrade --install aibrix aibrix/aibrix --namespace aibrix-system --create-namespace
-{{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/_helpers.tpl b/charts/jan-server/templates/_helpers.tpl
deleted file mode 100644
index da3aab08..00000000
--- a/charts/jan-server/templates/_helpers.tpl
+++ /dev/null
@@ -1,47 +0,0 @@
-{{/*
-Generate chart name
-*/}}
-{{- define "jan-server.name" -}}
-{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
-{{- end }}
-
-{{/*
-Generate fully qualified app name
-*/}}
-{{- define "jan-server.fullname" -}}
-{{- if .Values.fullnameOverride }}
-{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
-{{- else }}
-{{- printf "%s-%s" .Release.Name (include "jan-server.name" .) | trunc 63 | trimSuffix "-" }}
-{{- end }}
-{{- end }}
-
-{{/*
-Labels
-*/}}
-{{- define "jan-server.labels" -}}
-helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
-app.kubernetes.io/name: {{ include "jan-server.name" . }}
-app.kubernetes.io/instance: {{ .Release.Name }}
-app.kubernetes.io/version: {{ .Chart.AppVersion }}
-app.kubernetes.io/managed-by: {{ .Release.Service }}
-{{- end }}
-
-{{/*
-Selector labels
-*/}}
-{{- define "jan-server.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "jan-server.name" . }}
-app.kubernetes.io/instance: {{ .Release.Name }}
-{{- end }}
-
-{{/*
-Create the name of the service account to use
-*/}}
-{{- define "jan-server.serviceAccountName" -}}
-{{- if .Values.gateway.serviceAccount.create }}
-{{- default (include "jan-server.fullname" .) .Values.gateway.serviceAccount.name }}
-{{- else }}
-{{- default "default" .Values.gateway.serviceAccount.name }}
-{{- end }}
-{{- end }}
diff --git a/charts/jan-server/templates/cleanup-job.yaml b/charts/jan-server/templates/cleanup-job.yaml
deleted file mode 100644
index 0d4a9966..00000000
--- a/charts/jan-server/templates/cleanup-job.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-{{- if .Values.inference.enabled }}
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: {{ include "jan-server.fullname" . }}-cleanup
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": post-delete
-    "helm.sh/hook-weight": "5"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
-  name: {{ include "jan-server.fullname" . }}-cleanup
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": post-delete
-    "helm.sh/hook-weight": "5"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
-rules:
-- apiGroups: ["*"]
-  resources: ["*"]
-  verbs: ["get", "list", "delete"]
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
-  name: {{ include "jan-server.fullname" . }}-cleanup
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": post-delete
-    "helm.sh/hook-weight": "5"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: ClusterRole
-  name: {{ include "jan-server.fullname" . }}-cleanup
-subjects:
-- kind: ServiceAccount
-  name: {{ include "jan-server.fullname" . }}-cleanup
-  namespace: {{ .Release.Namespace }}
----
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: {{ include "jan-server.fullname" . }}-cleanup-deps
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": post-delete
-    "helm.sh/hook-weight": "10"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
-spec:
-  template:
-    metadata:
-      name: {{ include "jan-server.fullname" . }}-cleanup-deps
-    spec:
-      restartPolicy: Never
-      serviceAccountName: {{ include "jan-server.fullname" . }}-cleanup
-      containers:
-      - name: cleanup-deps
-        image: dtzar/helm-kubectl:latest
-        command:
-        - /bin/sh
-        - -c
-        - |
-          set -e
-          
-          echo "🧹 Cleaning up Jan Gateway dependencies..."
-          
-          # Check if we should cleanup dependencies (only if no other charts depend on them)
-          {{- if .Values.inference.cleanup.autoCleanupDependencies }}
-          
-          # Count how many releases might be using these operators (simple approach)
-          DEPENDENT_RELEASES=$(helm list -A | grep -E "(jan-|inference|ai-)" | grep -v "{{ .Release.Name }}" | wc -l || echo "0")
-          
-          if [ "$DEPENDENT_RELEASES" -eq "0" ]; then
-            echo "🚀 No other dependent releases found, cleaning up operators..."
-            
-            {{- if and .Values.inference.dependencies.aibrix.enabled .Values.inference.cleanup.cleanupAibrix }}
-            echo "Removing Aibrix..."
-            if helm list -n aibrix-system | grep -q "aibrix"; then
-              helm uninstall aibrix -n aibrix-system || true
-              sleep 10  # Wait for resources to be cleaned up
-            fi
-            kubectl delete namespace aibrix-system --ignore-not-found=true || true
-            {{- else if .Values.inference.dependencies.aibrix.enabled }}
-            echo "⏩ Skipping Aibrix cleanup (cleanupAibrix=false)"
-            {{- end }}
-            
-            {{- if and .Values.inference.dependencies.envoyGateway.enabled .Values.inference.cleanup.cleanupEnvoyGateway }}
-            echo "Removing Envoy Gateway..."
-            if helm list -n envoy-gateway-system | grep -q "envoy-gateway"; then
-              helm uninstall envoy-gateway -n envoy-gateway-system || true
-              sleep 10  # Wait for resources to be cleaned up
-            fi
-            kubectl delete namespace envoy-gateway-system --ignore-not-found=true || true
-            {{- else if .Values.inference.dependencies.envoyGateway.enabled }}
-            echo "⏩ Skipping Envoy Gateway cleanup (cleanupEnvoyGateway=false)"
-            {{- end }}
-            
-            {{- if and .Values.inference.dependencies.kuberayOperator.enabled .Values.inference.cleanup.cleanupKuberayOperator }}
-            echo "Removing KubeRay Operator..."
-            if helm list -n {{ .Values.inference.dependencies.kuberayOperator.namespace }} | grep -q "kuberay-operator"; then
-              helm uninstall kuberay-operator -n {{ .Values.inference.dependencies.kuberayOperator.namespace }} || true
-              sleep 10  # Wait for resources to be cleaned up
-            fi
-            kubectl delete namespace {{ .Values.inference.dependencies.kuberayOperator.namespace }} --ignore-not-found=true || true
-            {{- else if .Values.inference.dependencies.kuberayOperator.enabled }}
-            echo "⏩ Skipping KubeRay Operator cleanup (cleanupKuberayOperator=false)"
-            {{- end }}
-            
-            {{- if and .Values.inference.dependencies.gpuOperator.enabled .Values.inference.cleanup.cleanupGpuOperator }}
-            echo "Removing GPU Operator..."
-            if helm list -n gpu-operator-resources | grep -q "gpu-operator"; then
-              helm uninstall gpu-operator -n gpu-operator-resources || true
-              sleep 10  # Wait for resources to be cleaned up
-            fi
-            kubectl delete namespace gpu-operator-resources --ignore-not-found=true || true
-            {{- else if .Values.inference.dependencies.gpuOperator.enabled }}
-            echo "⏩ Skipping GPU Operator cleanup (cleanupGpuOperator=false)"
-            {{- end }}
-            
-            echo "✅ Dependencies cleanup completed"
-          else
-            echo "⚠️  Found $DEPENDENT_RELEASES other releases that might depend on operators, skipping cleanup"
-            echo "To force cleanup, run:"
-            echo "  helm uninstall gpu-operator -n gpu-operator-resources"
-            echo "  helm uninstall kuberay-operator -n kuberay-system"
-            echo "  helm uninstall envoy-gateway -n envoy-gateway-system"
-            echo "  helm uninstall aibrix -n aibrix-system"
-          fi
-          
-          {{- else }}
-          
-          echo "⚠️  Auto-cleanup disabled. Dependencies were NOT removed:"
-          echo "  - GPU Operator (gpu-operator-resources namespace)"
-          echo "  - KubeRay Operator (kuberay-system namespace)" 
-          echo "  - Envoy Gateway (envoy-gateway-system namespace)"
-          echo "  - Aibrix (aibrix-system namespace)"
-          echo ""
-          echo "To remove manually:"
-          echo "  helm uninstall gpu-operator -n gpu-operator-resources"
-          echo "  helm uninstall kuberay-operator -n kuberay-system"
-          echo "  helm uninstall envoy-gateway -n envoy-gateway-system" 
-          echo "  helm uninstall aibrix -n aibrix-system"
-          echo ""
-          echo "To enable auto-cleanup: --set inference.cleanup.autoCleanupDependencies=true"
-          
-          {{- end }}
-          
-          echo "🎉 Jan Gateway cleanup completed"
----
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/crd-installer-job.yaml b/charts/jan-server/templates/crd-installer-job.yaml
deleted file mode 100644
index 4df13e9d..00000000
--- a/charts/jan-server/templates/crd-installer-job.yaml
+++ /dev/null
@@ -1,115 +0,0 @@
-{{- if .Values.inference.enabled }}
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: {{ include "jan-server.fullname" . }}-install-crds
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": pre-install,pre-upgrade
-    "helm.sh/hook-weight": "-10"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
-spec:
-  template:
-    metadata:
-      name: {{ include "jan-server.fullname" . }}-install-crds
-    spec:
-      restartPolicy: Never
-      serviceAccountName: {{ include "jan-server.fullname" . }}-crd-installer
-      containers:
-      - name: install-crds
-        image: alpine/helm:3.13.0
-        command:
-        - /bin/sh
-        - -c
-        - |
-          set -e
-          
-          echo "Adding Helm repositories..."
-          helm repo add nvidia https://helm.ngc.nvidia.com/nvidia || true
-          helm repo add kuberay https://ray-project.github.io/kuberay-helm/ || true
-          helm repo add aibrix https://danchev.github.io/helm-charts || true
-          helm repo update
-          
-          # Install GPU Operator if enabled
-          {{- if .Values.inference.dependencies.gpuOperator.enabled }}
-          echo "Installing GPU Operator CRDs..."
-          if ! kubectl get crd clusterpolicies.nvidia.com >/dev/null 2>&1; then
-            echo "Installing GPU Operator..."
-            helm upgrade --install gpu-operator nvidia/gpu-operator \
-              --namespace gpu-operator-resources \
-              --create-namespace \
-              --version {{ .Values.inference.dependencies.gpuOperator.version }} \
-              --set operator.cleanupCRD=false \
-              --wait --timeout=10m
-          else
-            echo "✅ GPU Operator CRDs already exist"
-          fi
-          {{- end }}
-          
-          # Install KubeRay Operator if enabled
-          {{- if .Values.inference.dependencies.kuberayOperator.enabled }}
-          echo "Installing KubeRay Operator CRDs..."
-          if ! kubectl get crd rayclusters.ray.io >/dev/null 2>&1; then
-            echo "Installing KubeRay Operator..."
-            helm upgrade --install kuberay-operator kuberay/kuberay-operator \
-              --namespace {{ .Values.inference.dependencies.kuberayOperator.namespace }} \
-              --create-namespace \
-              --version {{ .Values.inference.dependencies.kuberayOperator.version }} \
-              --wait --timeout=10m
-          else
-            echo "✅ KubeRay Operator CRDs already exist"
-          fi
-          {{- end }}
-          
-          # Install Envoy Gateway if enabled
-          {{- if .Values.inference.dependencies.envoyGateway.enabled }}
-          echo "Installing Envoy Gateway CRDs..."
-          if ! kubectl get crd gatewayclasses.gateway.networking.k8s.io >/dev/null 2>&1; then
-            echo "Installing Envoy Gateway..."
-            helm upgrade --install envoy-gateway oci://registry.menlo.ai/dockerhub/envoyproxy/gateway-helm \
-              --namespace envoy-gateway-system \
-              --create-namespace \
-              --version {{ .Values.inference.dependencies.envoyGateway.version }} \
-              --wait --timeout=10m
-          else
-            echo "✅ Envoy Gateway CRDs already exist"
-          fi
-          {{- end }}
-          
-          # Install Aibrix if enabled
-          {{- if .Values.inference.dependencies.aibrix.enabled }}
-          echo "Installing Aibrix CRDs..."
-          if ! kubectl get crd podautoscalers.autoscaling.aibrix.ai >/dev/null 2>&1; then
-            echo "Installing Aibrix..."
-            helm upgrade --install aibrix aibrix/aibrix \
-              --namespace aibrix-system \
-              --create-namespace \
-              --version {{ .Values.inference.dependencies.aibrix.version }} \
-              --wait --timeout=10m
-          else
-            echo "✅ Aibrix CRDs already exist"
-          fi
-          {{- end }}
-          
-          echo "✅ All required CRDs are installed!"
-          
-          # Wait for CRDs to be established
-          echo "Waiting for CRDs to be established..."
-          {{- if .Values.inference.dependencies.gpuOperator.enabled }}
-          kubectl wait --for condition=established --timeout=60s crd/clusterpolicies.nvidia.com || true
-          {{- end }}
-          {{- if .Values.inference.dependencies.kuberayOperator.enabled }}
-          kubectl wait --for condition=established --timeout=60s crd/rayclusters.ray.io || true
-          {{- end }}
-          {{- if .Values.inference.dependencies.envoyGateway.enabled }}
-          kubectl wait --for condition=established --timeout=60s crd/gatewayclasses.gateway.networking.k8s.io || true
-          kubectl wait --for condition=established --timeout=60s crd/httproutes.gateway.networking.k8s.io || true
-          {{- end }}
-          {{- if .Values.inference.dependencies.aibrix.enabled }}
-          kubectl wait --for condition=established --timeout=60s crd/podautoscalers.autoscaling.aibrix.ai || true
-          {{- end }}
-          
-          echo "✅ CRDs are ready!"
----
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/crd-installer-rbac.yaml b/charts/jan-server/templates/crd-installer-rbac.yaml
deleted file mode 100644
index b4aa6830..00000000
--- a/charts/jan-server/templates/crd-installer-rbac.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-{{- if and .Values.gateway.serviceAccount.create .Values.inference.enabled }}
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: {{ include "jan-server.fullname" . }}-crd-installer
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": pre-install,pre-upgrade
-    "helm.sh/hook-weight": "-15"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
-  name: {{ include "jan-server.fullname" . }}-crd-installer
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": pre-install,pre-upgrade
-    "helm.sh/hook-weight": "-15"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
-rules:
-# CRD management
-- apiGroups: ["apiextensions.k8s.io"]
-  resources: ["customresourcedefinitions"]
-  verbs: ["get", "list", "watch"]
-# Namespace management
-- apiGroups: [""]
-  resources: ["namespaces"]
-  verbs: ["get", "list", "create", "patch", "update"]
-# For operator installations (broad permissions needed for operators)
-- apiGroups: ["*"]
-  resources: ["*"]
-  verbs: ["*"]
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
-  name: {{ include "jan-server.fullname" . }}-crd-installer
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": pre-install,pre-upgrade
-    "helm.sh/hook-weight": "-15"
-    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: ClusterRole
-  name: {{ include "jan-server.fullname" . }}-crd-installer
-subjects:
-- kind: ServiceAccount
-  name: {{ include "jan-server.fullname" . }}-crd-installer
-  namespace: {{ .Release.Namespace }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/deployment.yaml b/charts/jan-server/templates/deployment.yaml
deleted file mode 100644
index 84b53ccb..00000000
--- a/charts/jan-server/templates/deployment.yaml
+++ /dev/null
@@ -1,289 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "jan-server.fullname" . }}
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-spec:
-  replicas: {{ .Values.gateway.replicaCount }}
-  selector:
-    matchLabels:
-      app.kubernetes.io/name: {{ include "jan-server.name" . }}
-      app.kubernetes.io/instance: {{ .Release.Name }}
-  template:
-    metadata:
-      labels:
-        app.kubernetes.io/name: {{ include "jan-server.name" . }}
-        app.kubernetes.io/instance: {{ .Release.Name }}
-    spec:
-      {{- with .Values.gateway.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      serviceAccountName: {{ include "jan-server.serviceAccountName" . }}
-      securityContext:
-        {{- toYaml .Values.gateway.podSecurityContext | nindent 8 }}
-      initContainers:
-        {{- if .Values.postgresql.enabled }}
-        - name: wait-for-postgres
-          image: busybox:1.35
-          command: ['sh', '-c']
-          args:
-            - |
-              echo "Waiting for PostgreSQL to be ready..."
-              until nc -z {{ .Release.Name }}-postgresql 5432; do
-                echo "PostgreSQL is not ready yet..."
-                sleep 2
-              done
-              echo "PostgreSQL is ready!"
-        {{- end }}
-        {{- if .Values.valkey.enabled }}
-        - name: wait-for-valkey
-          image: busybox:1.35
-          command: ['sh', '-c']
-          args:
-            - |
-              echo "Waiting for Valkey to be ready..."
-              until nc -z {{ .Release.Name }}-valkey 6379; do
-                echo "Valkey is not ready yet..."
-                sleep 2
-              done
-              echo "Valkey is ready!"
-        {{- end }}
-        - name: db-config-builder
-          image: busybox:1.35
-          command: ["/bin/sh"]
-          args:
-            - -c
-            - |
-              echo "Building database connection strings..."
-              {{- if .Values.postgresql.enabled }}
-              # Bitnami PostgreSQL
-              WRITE_DSN="host={{ .Release.Name }}-postgresql user={{ .Values.postgresql.global.postgresql.auth.username }} password=${DB_PASSWORD} dbname={{ .Values.postgresql.global.postgresql.auth.database }} port=5432 sslmode=disable"
-              READ_DSN="host={{ .Release.Name }}-postgresql user={{ .Values.postgresql.global.postgresql.auth.username }} password=${DB_PASSWORD} dbname={{ .Values.postgresql.global.postgresql.auth.database }} port=5432 sslmode=disable"
-              {{- else if .Values.externalPostgresql.host }}
-              {{- if .Values.externalPostgresql.existingSecret }}
-              # External PostgreSQL with existing secret
-              WRITE_DSN="host={{ .Values.externalPostgresql.host }} user=${DB_USERNAME} password=${DB_PASSWORD} dbname={{ .Values.externalPostgresql.database }} port={{ .Values.externalPostgresql.port }} sslmode=disable"
-              READ_DSN="host={{ .Values.externalPostgresql.host }} user=${DB_USERNAME} password=${DB_PASSWORD} dbname={{ .Values.externalPostgresql.database }} port={{ .Values.externalPostgresql.port }} sslmode=disable"
-              {{- else }}
-              # External PostgreSQL with plain values
-              WRITE_DSN="host={{ .Values.externalPostgresql.host }} user={{ .Values.externalPostgresql.username }} password={{ .Values.externalPostgresql.password }} dbname={{ .Values.externalPostgresql.database }} port={{ .Values.externalPostgresql.port }} sslmode=disable"
-              READ_DSN="host={{ .Values.externalPostgresql.host }} user={{ .Values.externalPostgresql.username }} password={{ .Values.externalPostgresql.password }} dbname={{ .Values.externalPostgresql.database }} port={{ .Values.externalPostgresql.port }} sslmode=disable"
-              {{- end }}
-              {{- end }}
-              
-              # Write connection strings to shared volume
-              echo "export DB_POSTGRESQL_WRITE_DSN='$WRITE_DSN'" > /shared/db-config.env
-              echo "export DB_POSTGRESQL_READ1_DSN='$READ_DSN'" >> /shared/db-config.env
-              echo "Database configuration written to /shared/db-config.env"
-          env:
-            {{- if .Values.postgresql.enabled }}
-            - name: DB_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ .Release.Name }}-postgresql"
-                  key: "password"
-            {{- else if .Values.externalPostgresql.existingSecret }}
-            - name: DB_USERNAME
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.externalPostgresql.existingSecret }}
-                  key: {{ .Values.externalPostgresql.secretKeys.usernameKey }}
-            - name: DB_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.externalPostgresql.existingSecret }}
-                  key: {{ .Values.externalPostgresql.secretKeys.passwordKey }}
-            {{- end }}
-          volumeMounts:
-            - name: shared-config
-              mountPath: /shared
-      containers:
-        - name: jan-server
-          securityContext:
-            {{- toYaml .Values.gateway.securityContext | nindent 12 }}
-          image: "{{ .Values.gateway.image.repository }}:{{ .Values.gateway.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.gateway.image.pullPolicy }}
-          ports:
-            - containerPort: {{ .Values.gateway.service.port }}
-          command: ["/bin/sh"]
-          args:
-            - -c
-            - |
-              # Source the database configuration
-              if [ -f /shared/db-config.env ]; then
-                echo "Loading database configuration from /shared/db-config.env"
-                . /shared/db-config.env
-              fi
-              # Start the main application
-              exec /root/jan-server
-          env:
-            {{- range .Values.gateway.env }}
-            - name: {{ .name }}
-              value: {{ .value | quote }}
-            {{- end }}
-            
-            {{/* Valkey/Redis Configuration - Cluster mode */}}
-            {{- if .Values.valkey.enabled }}
-            - name: REDIS_URL
-              value: "redis://{{ include "jan-server.fullname" . }}-valkey-cluster:6379"
-            {{- if .Values.valkey.usePassword }}
-            - name: REDIS_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ include "jan-server.fullname" . }}-valkey-cluster"
-                  key: "valkey-password"
-            {{- else }}
-            - name: REDIS_PASSWORD
-              value: ""
-            {{- end }}
-            {{- else if .Values.externalValkey.host }}
-            - name: REDIS_URL
-              value: "redis://{{ .Values.externalValkey.host }}:{{ .Values.externalValkey.port }}"
-            {{- if .Values.externalValkey.existingSecret }}
-            - name: REDIS_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: {{ .Values.externalValkey.existingSecret }}
-                  key: {{ .Values.externalValkey.secretKeys.passwordKey }}
-            {{- else if .Values.externalValkey.password }}
-            - name: REDIS_PASSWORD
-              value: {{ .Values.externalValkey.password | quote }}
-            {{- else }}
-            - name: REDIS_PASSWORD
-              value: ""
-            {{- end }}
-            {{- end }}
-            
-            {{/* OAuth2 Configuration */}}
-            {{- if .Values.gateway.oauth2.enabled }}
-            {{- if .Values.gateway.oauth2.existingSecret }}
-            - name: OAUTH2_GOOGLE_CLIENT_ID
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ .Values.gateway.oauth2.existingSecret }}"
-                  key: google-client-id
-            - name: OAUTH2_GOOGLE_CLIENT_SECRET
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ .Values.gateway.oauth2.existingSecret }}"
-                  key: google-client-secret
-            {{- else }}
-            - name: OAUTH2_GOOGLE_CLIENT_ID
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ include "jan-server.fullname" . }}-oauth2"
-                  key: google-client-id
-            - name: OAUTH2_GOOGLE_CLIENT_SECRET
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ include "jan-server.fullname" . }}-oauth2"
-                  key: google-client-secret
-            {{- end }}
-            - name: OAUTH2_GOOGLE_REDIRECT_URL
-              value: "{{ .Values.gateway.oauth2.google.redirectUrl }}"
-            {{- end }}
-            
-            {{/* SMTP Configuration */}}
-            {{- if .Values.gateway.smtp.enabled }}
-            - name: SMTP_HOST
-              value: "{{ .Values.gateway.smtp.host }}"
-            - name: SMTP_PORT
-              value: "{{ .Values.gateway.smtp.port }}"
-            {{- if .Values.gateway.smtp.existingSecret }}
-            - name: SMTP_USERNAME
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ .Values.gateway.smtp.existingSecret }}"
-                  key: smtp-username
-            - name: SMTP_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ .Values.gateway.smtp.existingSecret }}"
-                  key: smtp-password
-            {{- else }}
-            - name: SMTP_USERNAME
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ include "jan-server.fullname" . }}-smtp"
-                  key: smtp-username
-            - name: SMTP_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ include "jan-server.fullname" . }}-smtp"
-                  key: smtp-password
-            {{- end }}
-            - name: SMTP_SENDER_EMAIL
-              value: "{{ .Values.gateway.smtp.fromEmail }}"
-            {{- end }}
-            
-            {{/* Application Secrets */}}
-            {{- if .Values.gateway.secrets.existingSecret }}
-            - name: JWT_SECRET
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ .Values.gateway.secrets.existingSecret }}"
-                  key: jwt-secret
-            - name: APIKEY_SECRET
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ .Values.gateway.secrets.existingSecret }}"
-                  key: apikey-secret
-            - name: SERPER_API_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ .Values.gateway.secrets.existingSecret }}"
-                  key: serper-api-key
-            {{- else }}
-            - name: JWT_SECRET
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ include "jan-server.fullname" . }}-secrets"
-                  key: jwt-secret
-            - name: APIKEY_SECRET
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ include "jan-server.fullname" . }}-secrets"
-                  key: apikey-secret
-            - name: SERPER_API_KEY
-              valueFrom:
-                secretKeyRef:
-                  name: "{{ include "jan-server.fullname" . }}-secrets"
-                  key: serper-api-key
-            {{- end }}
-            
-            {{/* Other Application Configuration */}}
-            - name: ORGANIZATION_ADMIN_EMAIL
-              value: "{{ .Values.gateway.secrets.adminEmail }}"
-            
-            {{/* Additional environment variables */}}
-            {{- range .Values.gateway.extraEnv }}
-            - name: {{ .name }}
-              {{- if .value }}
-              value: {{ .value | quote }}
-              {{- else if .valueFrom }}
-              valueFrom:
-                {{- toYaml .valueFrom | nindent 16 }}
-              {{- end }}
-            {{- end }}
-          resources:
-            {{- toYaml .Values.gateway.resources | nindent 12 }}
-          volumeMounts:
-            - name: shared-config
-              mountPath: /shared
-      volumes:
-        - name: shared-config
-          emptyDir: {}
-      {{- with .Values.gateway.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.gateway.affinity }}
-      affinity:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.gateway.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
diff --git a/charts/jan-server/templates/hpa.yaml b/charts/jan-server/templates/hpa.yaml
deleted file mode 100644
index 70cb5723..00000000
--- a/charts/jan-server/templates/hpa.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-{{- if .Values.gateway.autoscaling.enabled }}
-apiVersion: autoscaling/v2
-kind: HorizontalPodAutoscaler
-metadata:
-  name: {{ include "jan-server.fullname" . }}
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-spec:
-  scaleTargetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: {{ include "jan-server.fullname" . }}
-  minReplicas: {{ .Values.gateway.autoscaling.minReplicas }}
-  maxReplicas: {{ .Values.gateway.autoscaling.maxReplicas }}
-  metrics:
-    {{- if .Values.gateway.autoscaling.targetCPUUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: cpu
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.gateway.autoscaling.targetCPUUtilizationPercentage }}
-    {{- end }}
-    {{- if .Values.gateway.autoscaling.targetMemoryUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: memory
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.gateway.autoscaling.targetMemoryUtilizationPercentage }}
-    {{- end }}
-    {{- with .Values.gateway.autoscaling.customMetrics }}
-    {{- toYaml . | nindent 4 }}
-    {{- end }}
-  {{- with .Values.gateway.autoscaling.behavior }}
-  behavior:
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/inference-models.yaml b/charts/jan-server/templates/inference-models.yaml
deleted file mode 100644
index 88afb300..00000000
--- a/charts/jan-server/templates/inference-models.yaml
+++ /dev/null
@@ -1,231 +0,0 @@
-{{- if .Values.inference.enabled }}
-{{- range .Values.inference.models }}
-{{- if .enabled }}
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ .name }}
-  labels:
-    {{- include "jan-server.labels" $ | nindent 4 }}
-    model.aibrix.ai/name: {{ .name }}
-    model.aibrix.ai/port: {{ .port | quote }}
-  {{- with .labels }}
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-spec:
-  {{- if not .autoscaler.enabled }}
-  replicas: {{ .replicaCount | default 1 }}
-  {{- end }}
-  selector:
-    matchLabels:
-      model.aibrix.ai/name: {{ .name }}
-  strategy:
-    {{- toYaml .strategy | nindent 4 }}
-  template:
-    metadata:
-      annotations:
-        {{- with .podAnnotations }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-      labels:
-        model.aibrix.ai/name: {{ .name }}
-        {{- with .podLabels }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      containers:
-        - name: vllm-openai
-          image: {{ .image }}
-          imagePullPolicy: {{ .imagePullPolicy }}
-          {{- if .command }}
-          command:
-            {{- toYaml .command | nindent 12 }}
-          {{- end }}
-          {{- if .args }}
-          args:
-            {{- range .args }}
-            - {{ . | quote }}
-            {{- end }}
-          {{- end }}
-          ports:
-            - containerPort: {{ .port | default 8000 }}
-              protocol: TCP
-          env:
-            {{- with .extraEnv }}
-            {{- toYaml . | nindent 12 }}
-            {{- end }}
-          {{- with .livenessProbe }}
-          livenessProbe:
-            {{- toYaml . | nindent 12 }}
-          {{- end }}
-          {{- with .readinessProbe }}
-          readinessProbe:
-            {{- toYaml . | nindent 12 }}
-          {{- end }}
-          {{- with .resources }}
-          resources:
-            {{- toYaml . | nindent 12 }}
-          {{- end }}
-          {{- if or (and $.Values.inference.storage.enabled .useSharedStorage) .storage.enabled }}
-          volumeMounts:
-            {{- if and $.Values.inference.storage.enabled .useSharedStorage }}
-            - name: shared-hf-cache
-              mountPath: {{ $.Values.inference.storage.hfCachePath }}
-              subPath: {{ $.Values.inference.storage.hfCacheSubPath }}
-            - name: shared-hf-cache
-              mountPath: {{ $.Values.inference.storage.vllmCompilePath }}
-              subPath: {{ $.Values.inference.storage.vllmCompileSubPath }}
-            {{- else if .storage.enabled }}
-            - name: hf-cache
-              mountPath: {{ .storage.hfCachePath }}
-              subPath: {{ .storage.hfCacheSubPath }}
-            - name: hf-cache
-              mountPath: {{ .storage.vllmCompilePath }}
-              subPath: {{ .storage.vllmCompileSubPath }}
-            {{- end }}
-          {{- end }}
-      {{- if or (and $.Values.inference.storage.enabled .useSharedStorage) .storage.enabled }}
-      volumes:
-        {{- if and $.Values.inference.storage.enabled .useSharedStorage }}
-        - name: shared-hf-cache
-          persistentVolumeClaim:
-            claimName: {{ $.Values.inference.storage.pvcName }}
-        {{- else if .storage.enabled }}
-        - name: hf-cache
-          persistentVolumeClaim:
-            claimName: {{ .storage.pvcName }}
-        {{- end }}
-      {{- end }}
-      {{- with .nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .affinity }}
-      affinity:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ .name }}
-  labels:
-    {{- include "jan-server.labels" $ | nindent 4 }}
-    model.aibrix.ai/name: {{ .name }}
-    prometheus-discovery: "true"
-  annotations:
-    {{- with .service.annotations }}
-    {{- toYaml . | nindent 4 }}
-    {{- end }}
-spec:
-  type: {{ .service.type }}
-  ports:
-    - name: serve
-      port: {{ .service.port }}
-      protocol: TCP
-      targetPort: {{ .service.targetPort }}
-  selector:
-    model.aibrix.ai/name: {{ .name }}
-
-{{- if .serviceMonitor.enabled }}
----
-apiVersion: monitoring.coreos.com/v1
-kind: ServiceMonitor
-metadata:
-  name: {{ .name }}
-  labels:
-    {{- include "jan-server.labels" $ | nindent 4 }}
-    model.aibrix.ai/name: {{ .name }}
-spec:
-  endpoints:
-    - port: serve
-      path: {{ .serviceMonitor.path }}
-      interval: {{ .serviceMonitor.interval }}
-  selector:
-    matchLabels:
-      model.aibrix.ai/name: {{ .name }}
-{{- end }}
-
-{{- if and .autoscaler.enabled (ne .autoscaler.type "HPA") }}
----
-apiVersion: autoscaling.aibrix.ai/v1alpha1
-kind: PodAutoscaler
-metadata:
-  name: {{ .name }}-autoscaler
-  labels:
-    {{- include "jan-server.labels" $ | nindent 4 }}
-    app.kubernetes.io/name: aibrix
-    app.kubernetes.io/managed-by: kustomize
-  annotations:
-    {{- with .autoscaler.annotations }}
-    {{- toYaml . | nindent 4 }}
-    {{- end }}
-spec:
-  scalingStrategy: {{ .autoscaler.type }}
-  minReplicas: {{ .autoscaler.minReplicas }}
-  maxReplicas: {{ .autoscaler.maxReplicas }}
-  metricsSources:
-    {{- if eq .autoscaler.metricsSource.metricSourceType "pod" }}
-    - metricSourceType: {{ .autoscaler.metricsSource.metricSourceType }}
-      protocolType: {{ .autoscaler.metricsSource.protocolType }}
-      port: {{ .autoscaler.metricsSource.port | quote }}
-      path: {{ .autoscaler.metricsSource.path }}
-      targetMetric: {{ .autoscaler.metricsSource.targetMetric }}
-      targetValue: {{ .autoscaler.metricsSource.targetValue | quote }}
-    {{- else if eq .autoscaler.metricsSource.metricSourceType "domain" }}
-    - endpoint: {{ .autoscaler.metricsSource.endpoint }}
-      metricSourceType: {{ .autoscaler.metricsSource.metricSourceType }}
-      path: {{ .autoscaler.metricsSource.path }}
-      protocolType: {{ .autoscaler.metricsSource.protocolType }}
-      targetMetric: {{ .autoscaler.metricsSource.targetMetric }}
-      targetValue: {{ .autoscaler.metricsSource.targetValue | quote }}
-    {{- end }}
-  scaleTargetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: {{ .name }}
-{{- end }}
-
-{{- if and .autoscaler.enabled (eq .autoscaler.type "HPA") }}
----
-apiVersion: autoscaling/v2
-kind: HorizontalPodAutoscaler
-metadata:
-  name: {{ .name }}-hpa
-  labels:
-    {{- include "jan-server.labels" $ | nindent 4 }}
-spec:
-  scaleTargetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: {{ .name }}
-  minReplicas: {{ .autoscaler.minReplicas }}
-  maxReplicas: {{ .autoscaler.maxReplicas }}
-  metrics:
-    {{- if .autoscaler.hpa.targetCPUUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: cpu
-        target:
-          type: Utilization
-          averageUtilization: {{ .autoscaler.hpa.targetCPUUtilizationPercentage }}
-    {{- end }}
-    {{- if .autoscaler.hpa.targetMemoryUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: memory
-        target:
-          type: Utilization
-          averageUtilization: {{ .autoscaler.hpa.targetMemoryUtilizationPercentage }}
-    {{- end }}
-{{- end }}
-
-{{- end }}
-{{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/inference-resources.yaml b/charts/jan-server/templates/inference-resources.yaml
deleted file mode 100644
index 9f8586aa..00000000
--- a/charts/jan-server/templates/inference-resources.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-{{- if .Values.inference.enabled }}
-{{- range .Values.inference.models }}
-{{- if .enabled }}
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ .name }}-service
-  labels:
-    {{- include "jan-server.labels" $ | nindent 4 }}
-spec:
-  selector:
-    {{- include "jan-server.selectorLabels" $ | nindent 4 }}
-    model: {{ .name }}
-  ports:
-    - protocol: TCP
-      port: {{ .service.port | default 8000 }}
-      targetPort: {{ .service.targetPort | default 8000 }}
-  type: {{ .service.type | default "ClusterIP" }}
-{{- end }}
-{{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/inference-storage.yaml b/charts/jan-server/templates/inference-storage.yaml
deleted file mode 100644
index 9c5a2357..00000000
--- a/charts/jan-server/templates/inference-storage.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-{{- if and .Values.inference.enabled .Values.inference.storage.enabled }}
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ .Values.inference.storage.pvcName }}
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-    component: inference-storage
-spec:
-  accessModes:
-    - ReadWriteMany
-  {{- if .Values.inference.storage.storageClassName }}
-  storageClassName: {{ .Values.inference.storage.storageClassName }}
-  {{- end }}
-  resources:
-    requests:
-      storage: {{ .Values.inference.storage.size }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/ingress.yaml b/charts/jan-server/templates/ingress.yaml
deleted file mode 100644
index 0965c217..00000000
--- a/charts/jan-server/templates/ingress.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-{{- if .Values.gateway.ingress.enabled -}}
-{{- $fullName := include "jan-server.fullname" . -}}
-{{- $svcPort := .Values.gateway.service.port -}}
-{{- if and .Values.gateway.ingress.className (not (hasKey .Values.gateway.ingress.annotations "kubernetes.io/ingress.class")) }}
-  {{- $_ := set .Values.gateway.ingress.annotations "kubernetes.io/ingress.class" .Values.gateway.ingress.className}}
-{{- end }}
-{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
-apiVersion: networking.k8s.io/v1
-{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
-apiVersion: networking.k8s.io/v1beta1
-{{- else -}}
-apiVersion: extensions/v1beta1
-{{- end }}
-kind: Ingress
-metadata:
-  name: {{ $fullName }}
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-  {{- with .Values.gateway.ingress.annotations }}
-  annotations:
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-spec:
-  {{- if and .Values.gateway.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
-  ingressClassName: {{ .Values.gateway.ingress.className }}
-  {{- end }}
-  {{- if .Values.gateway.ingress.tls }}
-  tls:
-    {{- range .Values.gateway.ingress.tls }}
-    - hosts:
-        {{- range .hosts }}
-        - {{ . | quote }}
-        {{- end }}
-      secretName: {{ .secretName }}
-    {{- end }}
-  {{- end }}
-  rules:
-    {{- range .Values.gateway.ingress.hosts }}
-    - host: {{ .host | quote }}
-      http:
-        paths:
-          {{- range .paths }}
-          - path: {{ .path }}
-            {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
-            pathType: {{ .pathType }}
-            {{- end }}
-            backend:
-              {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
-              service:
-                name: {{ $fullName }}
-                port:
-                  number: {{ $svcPort }}
-              {{- else }}
-              serviceName: {{ $fullName }}
-              servicePort: {{ $svcPort }}
-              {{- end }}
-          {{- end }}
-    {{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/keda-scaledobject.yaml b/charts/jan-server/templates/keda-scaledobject.yaml
deleted file mode 100644
index 52ac1ccf..00000000
--- a/charts/jan-server/templates/keda-scaledobject.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-{{- if .Values.gateway.keda.enabled }}
-apiVersion: keda.sh/v1alpha1
-kind: ScaledObject
-metadata:
-  name: {{ include "jan-server.fullname" . }}
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-spec:
-  scaleTargetRef:
-    name: {{ include "jan-server.fullname" . }}
-  minReplicaCount: {{ .Values.gateway.keda.minReplicas }}
-  maxReplicaCount: {{ .Values.gateway.keda.maxReplicas }}
-  pollingInterval: {{ .Values.gateway.keda.pollingInterval }}
-  cooldownPeriod: {{ .Values.gateway.keda.cooldownPeriod }}
-  {{- with .Values.gateway.keda.triggers }}
-  triggers:
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/poddisruptionbudget.yaml b/charts/jan-server/templates/poddisruptionbudget.yaml
deleted file mode 100644
index a9ca0d5d..00000000
--- a/charts/jan-server/templates/poddisruptionbudget.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-{{- if .Values.gateway.podDisruptionBudget.enabled }}
-apiVersion: policy/v1
-kind: PodDisruptionBudget
-metadata:
-  name: {{ include "jan-server.fullname" . }}
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-spec:
-  {{- if .Values.gateway.podDisruptionBudget.minAvailable }}
-  minAvailable: {{ .Values.gateway.podDisruptionBudget.minAvailable }}
-  {{- end }}
-  {{- if .Values.gateway.podDisruptionBudget.maxUnavailable }}
-  maxUnavailable: {{ .Values.gateway.podDisruptionBudget.maxUnavailable }}
-  {{- end }}
-  selector:
-    matchLabels:
-      app.kubernetes.io/name: {{ include "jan-server.name" . }}
-      app.kubernetes.io/instance: {{ .Release.Name }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/secrets.yaml b/charts/jan-server/templates/secrets.yaml
deleted file mode 100644
index 336af1e8..00000000
--- a/charts/jan-server/templates/secrets.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-{{/* OAuth2 Secret */}}
-{{- if and .Values.gateway.oauth2.enabled (not .Values.gateway.oauth2.existingSecret) }}
-apiVersion: v1
-kind: Secret
-metadata:
-  name: {{ include "jan-server.fullname" . }}-oauth2
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-type: Opaque
-data:
-  {{- if .Values.gateway.oauth2.google.clientId }}
-  google-client-id: {{ .Values.gateway.oauth2.google.clientId | b64enc }}
-  {{- end }}
-  {{- if .Values.gateway.oauth2.google.clientSecret }}
-  google-client-secret: {{ .Values.gateway.oauth2.google.clientSecret | b64enc }}
-  {{- end }}
----
-{{- end }}
-
-{{/* SMTP Secret */}}
-{{- if and .Values.gateway.smtp.enabled (not .Values.gateway.smtp.existingSecret) }}
-apiVersion: v1
-kind: Secret
-metadata:
-  name: {{ include "jan-server.fullname" . }}-smtp
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-type: Opaque
-data:
-  {{- if .Values.gateway.smtp.username }}
-  smtp-username: {{ .Values.gateway.smtp.username | b64enc }}
-  {{- end }}
-  {{- if .Values.gateway.smtp.password }}
-  smtp-password: {{ .Values.gateway.smtp.password | b64enc }}
-  {{- end }}
----
-{{- end }}
-
-{{/* Application Secrets */}}
-{{- if not .Values.gateway.secrets.existingSecret }}
-apiVersion: v1
-kind: Secret
-metadata:
-  name: {{ include "jan-server.fullname" . }}-secrets
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-type: Opaque
-data:
-  {{- if .Values.gateway.secrets.jwtSecret }}
-  jwt-secret: {{ .Values.gateway.secrets.jwtSecret | b64enc }}
-  {{- end }}
-  {{- if .Values.gateway.secrets.apiKeySecret }}
-  apikey-secret: {{ .Values.gateway.secrets.apiKeySecret | b64enc }}
-  {{- end }}
-  {{- if .Values.gateway.secrets.serperApiKey }}
-  serper-api-key: {{ .Values.gateway.secrets.serperApiKey | b64enc }}
-  {{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/templates/service.yaml b/charts/jan-server/templates/service.yaml
deleted file mode 100644
index 492017dc..00000000
--- a/charts/jan-server/templates/service.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "jan-server.fullname" . }}
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-spec:
-  type: {{ .Values.gateway.service.type }}
-  ports:
-    - port: {{ .Values.gateway.service.port }}
-      targetPort: {{ .Values.gateway.service.port }}
-      {{- if eq .Values.gateway.service.type "NodePort" }}
-      nodePort: {{ .Values.gateway.service.nodePort }}
-      {{- end }}
-  selector:
-    app.kubernetes.io/name: {{ include "jan-server.name" . }}
-    app.kubernetes.io/instance: {{ .Release.Name }}
diff --git a/charts/jan-server/templates/vpa.yaml b/charts/jan-server/templates/vpa.yaml
deleted file mode 100644
index 81e207ae..00000000
--- a/charts/jan-server/templates/vpa.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-{{- if .Values.gateway.vpa.enabled }}
-apiVersion: autoscaling.k8s.io/v1
-kind: VerticalPodAutoscaler
-metadata:
-  name: {{ include "jan-server.fullname" . }}
-  labels:
-    {{- include "jan-server.labels" . | nindent 4 }}
-spec:
-  targetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: {{ include "jan-server.fullname" . }}
-  updatePolicy:
-    updateMode: {{ .Values.gateway.vpa.updateMode }}
-  resourcePolicy:
-    containerPolicies:
-    - containerName: jan-server
-      {{- with .Values.gateway.vpa.minAllowed }}
-      minAllowed:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.gateway.vpa.maxAllowed }}
-      maxAllowed:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/charts/jan-server/values.yaml b/charts/jan-server/values.yaml
deleted file mode 100644
index df7649c2..00000000
--- a/charts/jan-server/values.yaml
+++ /dev/null
@@ -1,498 +0,0 @@
-﻿# Jan Gateway Helm Chart Configuration
-# This file contains all configurable values for the Jan Gateway deployment
-# Uncomment and modify values as needed for your environment
-
-# =============================================================================
-# Jan Gateway Application Configuration
-# =============================================================================
-gateway:
-  # Number of replicas to deploy
-  replicaCount: 1
-
-  # Container image configuration
-  image:
-    repository: menloltd/jan-server
-    pullPolicy: IfNotPresent
-    # Overrides the image tag whose default is the chart appVersion (currently "1.0.0")
-    # Leave empty to use Chart.yaml appVersion, or specify version like "v1.2.3"
-    tag: ""
-
-  # Pull secrets for private registries
-  imagePullSecrets: []
-
-  # Override the name of the chart
-  nameOverride: ""
-
-  # Override the full name of the chart
-  fullnameOverride: ""
-
-  # =============================================================================
-  # Environment Variables Configuration
-  # =============================================================================
-
-  # Basic application environment variables
-  env:
-    # Application specific configs
-    - name: JAN_INFERENCE_MODEL_URL
-      value: "http://envoy-aibrix-system-aibrix-eg-903790dc.envoy-gateway-system" # This is default values if you enable Inference with AIbrix, modify if you use and external endpoint
-    - name: ALLOWED_CORS_HOSTS
-      value: "*.jan.ai,*.menlo.ai,http://localhost:3001"
-    - name: ENABLE_ADMIN_API
-      value: "true"
-    # Note: REDIS_URL, REDIS_PASSWORD are automatically configured by the chart based on valkey settings
-    - name: REDIS_DB
-      value: "0"
-
-  # Additional environment variables for advanced configuration
-  extraEnv: []
-  # Example:
-  # - name: CUSTOM_VAR
-  #   value: "custom_value"
-  # - name: SECRET_VAR
-  #   valueFrom:
-  #     secretKeyRef:
-  #       name: my-secret
-  #       key: secret-key
-
-  # =============================================================================
-  # OAuth2 Configuration (Google Authentication)
-  # =============================================================================
-  oauth2:
-    enabled: false
-    google:
-      # Google OAuth Client ID - Get from Google Cloud Console
-      clientId: "" # Replace with your Google OAuth Client ID
-      # Google OAuth Client Secret - Get from Google Cloud Console
-      clientSecret: "" # Replace with your Google OAuth Client Secret
-      # OAuth redirect URL - Must match Google Cloud Console configuration
-      redirectUrl: "https://api.example.com/api/v1/auth/google/callback"
-
-    # Use existing Kubernetes secret instead of values above (recommended for production)
-    existingSecret: ""
-    # Example: "oauth2-credentials"
-    # The secret should contain keys: google-client-id, google-client-secret
-
-  # =============================================================================
-  # SMTP Configuration (Email Services)
-  # =============================================================================
-  smtp:
-    enabled: false
-    # SMTP server configuration (SendGrid example)
-    host: "smtp.sendgrid.net"
-    port: "587" # Use 587 for TLS, 465 for SSL
-    # SendGrid uses 'apikey' as username, other providers use actual username
-    username: "apikey"
-    # For SendGrid: use your SendGrid API key, for others: use password
-    password: "" # Replace with your SMTP password/API key
-    # From email address for outgoing emails
-    fromEmail: "noreply@example.com"
-
-    # Use existing Kubernetes secret instead of values above (recommended for production)
-    existingSecret: ""
-    # Example: "smtp-credentials"
-    # The secret should contain keys: smtp-username, smtp-password
-
-  # =============================================================================
-  # Application Secrets Configuration
-  # =============================================================================
-  secrets:
-    # JWT secret for token signing (generate a strong random string)
-    jwtSecret: "asdfasdfasdf" # Example: "your-32-char-jwt-secret-key-here"
-
-    # API key secret for API authentication
-    apiKeySecret: "asdfasdf" # Example: "your-api-key-secret"
-
-    # Serper.dev API key for search functionality
-    serperApiKey: "asdfasdfasdf" # Get from https://serper.dev
-
-    # Admin email address
-    adminEmail: "admin@example.com"
-
-    # Use existing Kubernetes secret instead of values above (recommended for production)
-    existingSecret: ""
-    # Example: "app-secrets"
-    # The secret should contain keys: jwt-secret, apikey-secret, serper-api-key
-
-  # =============================================================================
-  # Kubernetes Service Configuration
-  # =============================================================================
-  service:
-    type: ClusterIP
-    port: 8080
-
-  # =============================================================================
-  # Ingress Configuration
-  # =============================================================================
-  ingress:
-    enabled: false # Set to true to enable ingress
-    className: "nginx" # Use your ingress controller class
-    annotations: {}
-    hosts:
-      - host: api.example.com
-        paths:
-          - path: /
-            pathType: Prefix
-    tls: []
-    #   - hosts:
-    #       - api.example.com
-    #     secretName: api-tls-secret
-
-  # =============================================================================
-  # Resource Limits and Requests
-  # =============================================================================
-  resources:
-    limits:
-      cpu: 500m
-      memory: 512Mi
-    requests:
-      cpu: 250m
-      memory: 256Mi
-
-  # =============================================================================
-  # Security Contexts
-  # =============================================================================
-  podSecurityContext:
-    fsGroup: 65534
-
-  securityContext:
-    allowPrivilegeEscalation: false
-    capabilities:
-      drop:
-        - ALL
-    readOnlyRootFilesystem: false
-    runAsNonRoot: false
-    # runAsUser: 65534
-
-  # =============================================================================
-  # Node Selection and Scheduling
-  # =============================================================================
-  nodeSelector: {}
-
-  tolerations: []
-
-  affinity: {}
-
-  # =============================================================================
-  # Service Account
-  # =============================================================================
-  serviceAccount:
-    # Specifies whether a service account should be created
-    create: true
-    # Automatically mount a ServiceAccount's API credentials?
-    automount: true
-    # Annotations to add to the service account
-    annotations: {}
-    # The name of the service account to use.
-    # If not set and create is true, a name is generated using the fullname template
-    name: ""
-
-  # =============================================================================
-  # Pod Disruption Budget
-  # =============================================================================
-  podDisruptionBudget:
-    enabled: true
-    minAvailable: 1
-
-  # =============================================================================
-  # Horizontal Pod Autoscaler (HPA)
-  # =============================================================================
-  autoscaling:
-    enabled: false # Set to true to enable HPA
-    minReplicas: 1
-    maxReplicas: 10
-    targetCPUUtilizationPercentage: 80
-    targetMemoryUtilizationPercentage: 80
-    customMetrics: []
-
-  # =============================================================================
-  # Vertical Pod Autoscaler (VPA) - Alternative to HPA
-  # =============================================================================
-  vpa:
-    enabled: false # Set to true to enable VPA (disable HPA when using VPA)
-    updateMode: "Auto" # Off, Initial, Recreation, Auto
-    minAllowed:
-      cpu: 100m
-      memory: 128Mi
-    maxAllowed:
-      cpu: 1000m
-      memory: 1Gi
-
-  # =============================================================================
-  # KEDA Autoscaling - Event-driven autoscaling (Alternative to HPA)
-  # =============================================================================
-  keda:
-    enabled: false # Set to true to enable KEDA (disable HPA when using KEDA)
-    minReplicas: 1
-    maxReplicas: 10
-    pollingInterval: 30
-    cooldownPeriod: 300
-    triggers: []
-
-# =============================================================================
-# PostgreSQL Database Configuration (Bitnami PostgreSQL)
-# =============================================================================
-
-# PostgreSQL Configuration (Bitnami PostgreSQL chart)
-postgresql:
-  enabled: true # Set to false to use external PostgreSQL
-  global:
-    postgresql:
-      auth:
-        postgresPassword: "" # Will be auto-generated if empty
-        username: "jan_user"
-        password: "jan_password" # Will be auto-generated if empty
-        database: "jan_api_gateway"
-
-  primary:
-    service:
-      ports:
-        postgresql: 5432
-  persistence:
-    enabled: true
-    size: 8Gi
-
-# External PostgreSQL Configuration (when cloudnative-pg.enabled = false)
-externalPostgresql:
-  host: "" # External PostgreSQL host
-  port: 5432
-  database: "jan_api_gateway"
-  username: "jan_user"
-  password: ""
-  existingSecret: "" # Name of existing secret
-  secretKeys:
-    usernameKey: "username" # Key in secret for username
-    passwordKey: "password" # Key in secret for password
-
-# =============================================================================
-# Valkey/Redis Cache Configuration
-# =============================================================================
-
-# Valkey Configuration (Redis-compatible, recommended)
-valkey:
-  enabled: true # Set to false to use external Redis/Valkey
-  usePassword: false
-  password: ""
-  existingSecret: ""
-  existingSecretPasswordKey: ""
-  cluster:
-    nodes: 3
-
-# External Valkey/Redis Configuration (when valkey.enabled = false)
-externalValkey:
-  host: "" # External Valkey/Redis host
-  port: 6379
-  password: ""
-  existingSecret: "" # Name of existing secret
-  secretKeys:
-    passwordKey: "password" # Key in secret for password
-
-# =============================================================================
-# Inference Configuration
-# =============================================================================
-inference:
-  enabled: true # Set to true to enable inference capabilities
-
-  # =============================================================================
-  # Shared Storage Configuration
-  # =============================================================================
-  storage:
-    enabled: true # Set to true to enable shared storage for models
-    pvcName: "hf-hub-cache"
-    storageClassName: "multiattach" # ReadWriteMany storage class
-    size: "100Gi"
-
-    # Mount paths for shared storage
-    hfCachePath: "/root/.cache/huggingface/hub"
-    hfCacheSubPath: "hf-hub"
-    vllmCompilePath: "/root/.cache/vllm/torch_compile_cache"
-    vllmCompileSubPath: "vllm-compile"
-
-  # =============================================================================
-  # Dependencies Configuration
-  # =============================================================================
-  dependencies:
-    # GPU Operator
-    gpuOperator:
-      enabled: true
-      version: "v25.3.2"
-
-    # KubeRay Operator
-    kuberayOperator:
-      enabled: true
-      version: "1.4.1"
-      namespace: "kuberay-system"
-      includeCrds: true
-      fullnameOverride: "kuberay-operator"
-      env:
-        - name: "ENABLE_PROBES_INJECTION"
-          value: "false"
-      featureGates:
-        - name: "RayClusterStatusConditions"
-          enabled: true
-
-    # Envoy Gateway
-    envoyGateway:
-      enabled: true
-      version: "1.5.1"
-
-    # Aibrix
-    aibrix:
-      enabled: true
-      version: "0.4.1"
-      repository: "https://artifacthub.io/packages/helm/danchev/aibrix"
-
-  # =============================================================================
-  # Cleanup Configuration
-  # =============================================================================
-  cleanup:
-    # Auto-cleanup dependencies when uninstalling the chart
-    # WARNING: This will remove operators that other charts might be using!
-    # Only enable if you're sure no other applications depend on these operators
-    autoCleanupDependencies: true # Set to true to auto-remove operators on uninstall
-
-    # Individual cleanup control (only used if autoCleanupDependencies=true)
-    cleanupGpuOperator: false
-    cleanupKuberayOperator: true
-    cleanupEnvoyGateway: true
-    cleanupAibrix: true
-
-  # =============================================================================
-  # Models Configuration
-  # =============================================================================
-  models:
-    # Example model configuration - users can add multiple models
-    - name: "jan-v1-4b"
-      enabled: true
-
-      # Container configuration
-      image: "registry.menlo.ai/dockerhub/vllm/vllm-openai:v0.10.2"
-      imagePullPolicy: "IfNotPresent"
-      port: 8000
-
-      # Custom command and args - users MUST define this for their specific model
-      command: ["sh"]
-      args:
-        - -c
-        - |
-          python3 -m vllm.entrypoints.openai.api_server \
-            --host 0.0.0.0 \
-            --port 8000 \
-            --uvicorn-log-level warning \
-            --model janhq/Jan-v1-2509 \
-            --served-model-name jan-v1-4b \
-            --max-num-batched-tokens 1024 \
-            --enable-auto-tool-choice \
-            --tool-call-parser hermes \
-            --reasoning-parser qwen3 \
-            --max-model-len 131072 \
-            --compilation-config '{"cudagraph_mode":"FULL_AND_PIECEWISE","compile_sizes":[1,2,4]}' \
-            --async-scheduling \
-            --api-server-count 4
-      resources:
-        limits:
-          nvidia.com/gpu: "1"
-        requests:
-          nvidia.com/gpu: "1"
-
-      # Probes configuration
-      livenessProbe:
-        failureThreshold: 15
-        httpGet:
-          path: /health
-          port: 8000
-          scheme: HTTP
-        initialDelaySeconds: 300
-        periodSeconds: 30
-        successThreshold: 1
-        timeoutSeconds: 1
-
-      readinessProbe:
-        failureThreshold: 15
-        httpGet:
-          path: /health
-          port: 8000
-          scheme: HTTP
-        initialDelaySeconds: 300
-        periodSeconds: 30
-        successThreshold: 1
-        timeoutSeconds: 1
-
-      # Storage configuration (uses shared storage when inference.storage.enabled=true)
-      useSharedStorage: true # Set to false to use model-specific storage
-
-      # Service configuration
-      service:
-        type: "ClusterIP"
-        port: 8000
-        targetPort: 8000
-        annotations:
-          prometheus.io/scrape: "true"
-          prometheus.io/port: "8000"
-
-      # Prometheus monitoring
-      serviceMonitor:
-        enabled: false
-        interval: "30s"
-        path: "/metrics"
-
-      # Pod annotations for Prometheus
-      podAnnotations:
-        prometheus.io/scrape: "true"
-        prometheus.io/port: "8000"
-        prometheus.io/path: "/metrics"
-
-      # Autoscaling configuration
-      autoscaler:
-        enabled: true
-        type: "KPA" # HPA, KPA, or APA
-
-        # Common autoscaler settings
-        minReplicas: 1
-        maxReplicas: 8
-
-        # Annotations for different autoscaler types
-        annotations:
-          # KPA specific
-          "kpa.autoscaling.aibrix.ai/scale-down-delay": "0s"
-          # APA specific
-          # "autoscaling.aibrix.ai/up-fluctuation-tolerance": "0.1"
-          # "autoscaling.aibrix.ai/down-fluctuation-tolerance": "0.2"
-          # "apa.autoscaling.aibrix.ai/window": "30s"
-
-        # Metrics configuration - supports both metric-based and optimizer-based
-        metricsSource:
-          # For metric-based autoscaling (HPA/KPA/APA with pod metrics)
-          metricSourceType: "pod" # Options: pod, domain
-          protocolType: "http"
-          port: "8000"
-          path: "/metrics"
-          targetMetric: "gpu_cache_usage_perc"
-          targetValue: "50" # For metric-based: "50", for optimizer-based: "100"
-
-          # For optimizer-based autoscaling (uncomment when using optimizer)
-          # metricSourceType: "domain"
-          # endpoint: "aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080"
-          # path: "/metrics/default/jan-v1-4b"
-          # targetMetric: "vllm:deployment_replicas"
-          # targetValue: "100"
-
-        # Standard K8s HPA configuration (when type: HPA)
-        hpa:
-          targetCPUUtilizationPercentage: 80
-          targetMemoryUtilizationPercentage: 80 # Deployment strategy
-      strategy:
-        type: "Recreate"
-
-      # Number of replicas (when autoscaling is disabled)
-      replicaCount: 1
-
-      # Node selector, tolerations, and affinity
-      nodeSelector: {}
-      tolerations: []
-      affinity: {}
-
-      # Additional environment variables
-      extraEnv: []
-      # Example:
-      # - name: "CUSTOM_VAR"
-      #   value: "custom_value"
diff --git a/cmd/jan-cli/README.md b/cmd/jan-cli/README.md
new file mode 100644
index 00000000..65b5baba
--- /dev/null
+++ b/cmd/jan-cli/README.md
@@ -0,0 +1,750 @@
+# jan-cli - Jan Server Command-Line Interface
+
+The official CLI tool for Jan Server, providing unified access to configuration management, service operations, and development tools.
+
+## Quick Start
+
+### Using Wrapper Scripts (Recommended)
+
+The easiest way to use jan-cli from the project root:
+
+```bash
+# Linux/macOS
+./jan-cli.sh --help
+./jan-cli.sh config validate
+./jan-cli.sh service list
+
+# Windows PowerShell
+.\jan-cli.ps1 --help
+.\jan-cli.ps1 config validate
+.\jan-cli.ps1 service list
+```
+
+The wrapper scripts automatically build jan-cli if needed and run it with your arguments.
+
+## Installation
+
+### Using Wrapper Scripts
+
+No installation needed! Just use the wrapper scripts from the project root:
+
+- **`jan-cli.sh`** - For Linux/macOS/WSL
+- **`jan-cli.ps1`** - For Windows PowerShell
+
+The scripts will automatically:
+1. Check if jan-cli binary exists
+2. Build it if missing or outdated
+3. Run your command
+
+### From Source
+
+```bash
+cd cmd/jan-cli
+go build -o jan-cli
+# Move to PATH (optional)
+sudo mv jan-cli /usr/local/bin/  # Linux/macOS
+# or for Windows, add to PATH
+```
+
+### Using Go Install
+
+```bash
+go install github.com/janhq/jan-server/cmd/jan-cli@latest
+```
+
+### Installation
+
+**Option 1: Install Globally (Recommended)**
+
+Use the Makefile target to build and install `jan-cli` to your local bin directory:
+
+```bash
+# From project root
+make cli-install
+```
+
+This will:
+- Build the `jan-cli` binary
+- Install it to `~/bin` (Linux/macOS) or `%USERPROFILE%\bin` (Windows)
+- Display instructions for adding to PATH if needed
+
+After installation and adding to PATH, you can run `jan-cli` from anywhere:
+
+```bash
+jan-cli --version
+jan-cli config validate
+jan-cli service list
+```
+
+**Option 2: Use Wrapper Scripts**
+
+Run from the project root using wrapper scripts (no installation needed):
+
+```bash
+# Linux/macOS
+./jan-cli.sh --help
+./jan-cli.sh config validate
+
+# Windows PowerShell
+.\jan-cli.ps1 --help
+.\jan-cli.ps1 config validate
+```
+
+The wrapper scripts automatically build the CLI if needed.
+
+**Option 3: Build and Run Manually**
+
+```bash
+# Build
+cd cmd/jan-cli
+go build
+
+# Run
+./jan-cli --help  # Linux/macOS
+.\jan-cli.exe --help  # Windows
+```
+
+### Adding to PATH (Optional)
+
+For easier access, you can add the built binary to your PATH or create an alias:
+
+```bash
+# Linux/macOS - Add to ~/.bashrc or ~/.zshrc
+alias jan-cli='/path/to/jan-server/cmd/jan-cli/jan-cli'
+
+# Or use the wrapper from anywhere
+alias jan-cli='/path/to/jan-server/jan-cli.sh'
+
+# Windows PowerShell - Add to $PROFILE
+function jan-cli { & 'C:\path\to\jan-server\jan-cli.ps1' $args }
+```
+
+## Commands Overview
+
+### Configuration Management (`config`)
+
+Manage Jan Server configuration files.
+
+```bash
+# Validate configuration
+jan-cli config validate
+jan-cli config validate --env production
+
+# Export configuration
+jan-cli config export --format env > .env
+jan-cli config export --format json
+jan-cli config export --format docker-env --output docker.env
+
+# Show configuration
+jan-cli config show llm-api
+jan-cli config show --path services.llm-api.database
+jan-cli config show --format json
+
+# Generate Kubernetes values
+jan-cli config k8s-values --env production > k8s/values-prod.yaml
+jan-cli config k8s-values --env development --output k8s/values-dev.yaml
+```
+
+### Service Operations (`service`)
+
+Manage and inspect Jan Server services.
+
+```bash
+# List all services
+jan-cli service list
+
+# Show service logs
+jan-cli service logs llm-api
+jan-cli service logs llm-api --tail 50 --follow
+
+# Check service status
+jan-cli service status
+jan-cli service status llm-api
+```
+
+### Development Tools (`dev`)
+
+Development utilities for Jan Server.
+
+```bash
+# Setup development environment
+jan-cli dev setup
+
+# Scaffold new service
+jan-cli dev scaffold my-service
+jan-cli dev scaffold worker-service --template worker --port 8999
+```
+
+### Monitoring Stack (`monitor`)
+
+Manage observability stack (Prometheus, Grafana, Jaeger, OTEL Collector).
+
+```bash
+# Install monitoring dependencies
+jan-cli monitor setup
+
+# Start monitoring stack
+jan-cli monitor up               # Basic start
+jan-cli monitor dev              # Development mode (full sampling)
+
+# Check health and status
+jan-cli monitor test             # Validate all services
+jan-cli monitor status           # Show status and resource usage
+
+# Query monitoring data
+jan-cli monitor query            # Interactive queries
+
+# Maintenance operations
+jan-cli monitor down             # Stop monitoring stack
+jan-cli monitor reset            # Clear all monitoring data
+jan-cli monitor export           # Export configuration files
+```
+
+## Configuration Commands
+
+### `config validate`
+
+Validate configuration files for syntax errors and required fields.
+
+**Usage:**
+```bash
+jan-cli config validate [flags]
+```
+
+**Flags:**
+- `-f, --file string` - Config file to validate (default: `config/defaults.yaml`)
+- `--schema string` - Schema file to validate against
+- `-e, --env string` - Environment to validate (development, production, etc.)
+
+**Examples:**
+```bash
+# Validate default configuration
+jan-cli config validate
+
+# Validate production configuration
+jan-cli config validate --env production
+
+# Validate specific file with schema
+jan-cli config validate --file custom-config.yaml --schema config-schema.json
+```
+
+### `config export`
+
+Export configuration in various formats.
+
+**Usage:**
+```bash
+jan-cli config export [flags]
+```
+
+**Flags:**
+- `-f, --file string` - Config file to export (default: `config/defaults.yaml`)
+- `--format string` - Output format: `env`, `docker-env`, `json`, `yaml` (default: `env`)
+- `--prefix string` - Add prefix to exported variables
+- `-o, --output string` - Output file (default: stdout)
+
+**Examples:**
+```bash
+# Export as shell environment variables
+eval $(jan-cli config export)
+
+# Export as docker-compose .env file
+jan-cli config export --format docker-env --output .env
+
+# Export as JSON
+jan-cli config export --format json > config.json
+
+# Export with prefix
+jan-cli config export --prefix MYAPP --format env
+```
+
+### `config show`
+
+Display configuration values.
+
+**Usage:**
+```bash
+jan-cli config show [service] [flags]
+```
+
+**Flags:**
+- `-f, --file string` - Config file to read (default: `config/defaults.yaml`)
+- `--path string` - Config path to show (e.g., `services.llm-api`)
+- `--format string` - Output format: `yaml`, `json`, `value` (default: `yaml`)
+
+**Examples:**
+```bash
+# Show entire configuration
+jan-cli config show
+
+# Show specific service config
+jan-cli config show llm-api
+
+# Show specific path
+jan-cli config show --path services.llm-api.database
+
+# Show as JSON
+jan-cli config show llm-api --format json
+
+# Show single value
+jan-cli config show --path services.llm-api.http.port --format value
+```
+
+### `config k8s-values`
+
+Generate Kubernetes Helm values file from configuration.
+
+**Usage:**
+```bash
+jan-cli config k8s-values [flags]
+```
+
+**Flags:**
+- `-e, --env string` - Environment (development, production, etc.) (default: `development`)
+- `-o, --output string` - Output file (default: stdout)
+- `--set stringSlice` - Override values (key=value)
+
+**Examples:**
+```bash
+# Generate development values
+jan-cli config k8s-values --env development > k8s/values-dev.yaml
+
+# Generate production values
+jan-cli config k8s-values --env production > k8s/values-prod.yaml
+
+# Generate with overrides
+jan-cli config k8s-values --env production \
+  --set services.llm-api.replicas=3 \
+  --set services.llm-api.resources.limits.memory=2Gi \
+  --output k8s/values-prod-scaled.yaml
+```
+
+## Service Commands
+
+### `service list`
+
+List all available Jan Server services.
+
+**Usage:**
+```bash
+jan-cli service list
+```
+
+**Example Output:**
+```
+Available services:
+  llm-api         :8080  LLM API - OpenAI-compatible chat completions
+  media-api       :8285  Media API - File upload and management
+  response-api    :8082  Response API - Multi-step orchestration
+  mcp-tools       :8091  MCP Tools - Model Context Protocol tools
+```
+
+### `service logs`
+
+Show logs for a specific service.
+
+**Usage:**
+```bash
+jan-cli service logs [service] [flags]
+```
+
+**Flags:**
+- `-n, --tail int` - Number of lines to show (default: 100)
+- `-f, --follow` - Follow log output
+
+**Examples:**
+```bash
+# Show last 100 lines
+jan-cli service logs llm-api
+
+# Show last 50 lines
+jan-cli service logs llm-api --tail 50
+
+# Follow logs in real-time
+jan-cli service logs llm-api --follow
+```
+
+### `service status`
+
+Show service status and health information.
+
+**Usage:**
+```bash
+jan-cli service status [service]
+```
+
+**Examples:**
+```bash
+# Show status for all services
+jan-cli service status
+
+# Show status for specific service
+jan-cli service status llm-api
+```
+
+## Development Commands
+
+### `dev setup`
+
+Initialize development environment.
+
+**Usage:**
+```bash
+jan-cli dev setup
+```
+
+This command will:
+- Check for required dependencies (Docker, Go)
+- Create `.env` file from template
+- Pull required Docker images
+- Set up development directories
+
+### `dev scaffold`
+
+Generate a new service from template.
+
+**Usage:**
+```bash
+jan-cli dev scaffold [service-name] [flags]
+```
+
+**Flags:**
+- `-t, --template string` - Service template: `api`, `worker` (default: `api`)
+- `-p, --port string` - Service port
+
+**Examples:**
+```bash
+# Scaffold API service
+jan-cli dev scaffold my-service
+
+# Scaffold with specific port
+jan-cli dev scaffold my-service --port 8999
+
+# Scaffold worker service
+jan-cli dev scaffold my-worker --template worker
+```
+
+## Monitoring Commands
+
+### `monitor setup`
+
+Install monitoring dependencies (OpenTelemetry, etc.). This is a cross-platform command that works on Windows, Linux, and macOS.
+
+**Usage:**
+```bash
+jan-cli monitor setup
+```
+
+**What it does:**
+- Installs OpenTelemetry Go dependencies
+- Runs sanitizer tests
+- Verifies all monitoring files are present
+- Checks Docker and Docker Compose installation
+
+**Example:**
+```bash
+jan-cli monitor setup
+```
+
+### `monitor up` / `monitor dev`
+
+Start the monitoring stack.
+
+**Usage:**
+```bash
+jan-cli monitor up      # Standard start
+jan-cli monitor dev     # Development mode with full sampling
+```
+
+**Features:**
+- `monitor up`: Basic monitoring with normal sampling
+- `monitor dev`: Full sampling (AlwaysSample) for development/debugging
+
+**Example:**
+```bash
+jan-cli monitor dev
+# Monitoring stack ready:
+#   - Prometheus: http://localhost:9090
+#   - Grafana: http://localhost:3331 (admin/admin)
+#   - Jaeger: http://localhost:16686
+#   - OTEL Collector: http://localhost:13133
+```
+
+### `monitor test`
+
+Validate that all monitoring services are healthy.
+
+**Usage:**
+```bash
+jan-cli monitor test
+```
+
+**Checks:**
+- Prometheus health endpoint
+- Grafana health endpoint
+- OTEL Collector health endpoint
+- Jaeger UI availability
+
+**Example:**
+```bash
+jan-cli monitor test
+# Testing Prometheus...
+# [OK]   Prometheus healthy
+# Testing Grafana...
+# [OK]   Grafana healthy
+# ...
+```
+
+### `monitor status`
+
+Show monitoring stack status and resource usage.
+
+**Usage:**
+```bash
+jan-cli monitor status
+```
+
+**Shows:**
+- Container status (running/stopped)
+- CPU usage per container
+- Memory usage per container
+
+**Example:**
+```bash
+jan-cli monitor status
+```
+
+### `monitor query`
+
+Interactive queries for traces, metrics, and alert rules.
+
+**Usage:**
+```bash
+jan-cli monitor query
+```
+
+**Query Types:**
+1. Recent traces for a service (Jaeger)
+2. Current metric value (Prometheus)
+3. Alert rules status (Prometheus)
+
+**Example:**
+```bash
+jan-cli monitor query
+# Select query:
+# 1) Recent traces for service
+# 2) Metric current value
+# 3) Alert rules status
+# Choice [1-3]: 1
+# Service name: llm-api
+```
+
+### `monitor down`
+
+Stop the monitoring stack.
+
+**Usage:**
+```bash
+jan-cli monitor down
+```
+
+**Example:**
+```bash
+jan-cli monitor down
+# [OK] Monitoring stack stopped
+# 
+# To fully disable tracing, set ENABLE_TRACING=false in .env
+```
+
+### `monitor reset`
+
+Delete all monitoring data (destructive operation).
+
+**Usage:**
+```bash
+jan-cli monitor reset
+```
+
+**Warning:** This permanently deletes all Prometheus metrics and Jaeger traces.
+
+**Example:**
+```bash
+jan-cli monitor reset
+# ⚠️  Delete all Prometheus/Jaeger data? [y/N]: y
+# [OK] Monitoring data cleared
+```
+
+### `monitor export`
+
+Export monitoring configuration files.
+
+**Usage:**
+```bash
+jan-cli monitor export
+```
+
+**Exports:**
+- Docker Compose configuration
+- Prometheus configuration
+- OTEL Collector configuration
+- Prometheus alert rules
+
+**Output:** `exports/monitoring/`
+
+**Example:**
+```bash
+jan-cli monitor export
+# [OK] Configs exported to exports/monitoring/
+```
+
+## Global Flags
+
+Available for all commands:
+
+- `-v, --verbose` - Enable verbose output
+- `--config-dir string` - Configuration directory (default: `config`)
+- `-h, --help` - Show help for any command
+- `--version` - Show version information
+
+## Shell Completion
+
+Generate shell completion scripts for better command-line experience.
+
+### Bash
+
+```bash
+jan-cli completion bash > /etc/bash_completion.d/jan-cli
+```
+
+### Zsh
+
+```bash
+jan-cli completion zsh > "${fpath[1]}/_jan-cli"
+```
+
+### Fish
+
+```bash
+jan-cli completion fish > ~/.config/fish/completions/jan-cli.fish
+```
+
+### PowerShell
+
+```powershell
+jan-cli completion powershell | Out-String | Invoke-Expression
+```
+
+## Examples
+
+### Typical Development Workflow
+
+```bash
+# 1. Setup development environment
+jan-cli dev setup
+
+# 2. Validate configuration
+jan-cli config validate
+
+# 3. Export configuration for Docker Compose
+jan-cli config export --format docker-env --output .env
+
+# 4. Start services (using make or docker compose)
+make up-full
+
+# 5. Check service status
+jan-cli service status
+
+# 6. View logs
+jan-cli service logs llm-api --follow
+```
+
+### Configuration Management
+
+```bash
+# Validate all environments
+jan-cli config validate
+jan-cli config validate --env production
+jan-cli config validate --env staging
+
+# Export for different targets
+jan-cli config export --format env > .env
+jan-cli config export --format json > config.json
+jan-cli config k8s-values --env production > k8s/values-prod.yaml
+
+# Inspect configuration
+jan-cli config show llm-api
+jan-cli config show --path services.llm-api.database --format json
+```
+
+### Service Operations
+
+```bash
+# Quick service overview
+jan-cli service list
+jan-cli service status
+
+# Debug specific service
+jan-cli service logs llm-api --tail 100
+jan-cli service logs llm-api --follow
+jan-cli service status llm-api
+```
+
+## Integration with Make
+
+You can integrate jan-cli with your Makefile:
+
+```makefile
+.PHONY: config-validate
+config-validate:
+	jan-cli config validate
+
+.PHONY: config-export
+config-export:
+	jan-cli config export --format docker-env --output .env
+
+.PHONY: k8s-values
+k8s-values:
+	jan-cli config k8s-values --env production > k8s/values-prod.yaml
+```
+
+## Troubleshooting
+
+### Command Not Found
+
+Ensure jan-cli is in your PATH:
+
+```bash
+# Check if jan-cli is installed
+which jan-cli
+
+# If not, add to PATH or use full path
+export PATH=$PATH:/path/to/jan-cli
+```
+
+### Configuration Validation Errors
+
+```bash
+# Verbose output for debugging
+jan-cli -v config validate
+
+# Check specific file
+jan-cli config validate --file config/defaults.yaml
+```
+
+### Permission Denied
+
+```bash
+# Make executable (Linux/macOS)
+chmod +x jan-cli
+
+# Or run with sudo if accessing protected files
+sudo jan-cli config export --output /etc/jan/config.env
+```
+
+## Contributing
+
+See [../../CONTRIBUTING.md](../../CONTRIBUTING.md) for guidelines on contributing to jan-cli.
+
+## License
+
+See [../../LICENSE](../../LICENSE) for license information.
diff --git a/cmd/jan-cli/cmd_apitest.go b/cmd/jan-cli/cmd_apitest.go
new file mode 100644
index 00000000..3863c03f
--- /dev/null
+++ b/cmd/jan-cli/cmd_apitest.go
@@ -0,0 +1,516 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"time"
+
+	"github.com/spf13/cobra"
+)
+
+var apiTestCmd = &cobra.Command{
+	Use:   "api-test",
+	Short: "Run API tests from Postman collections",
+	Long: `Run API integration tests using Postman collection JSON files.
+
+This is a lightweight cli api test that supports the essential
+features needed for Jan Server testing: running collections, setting 
+environment variables, and reporting results.
+
+Examples:
+  jan-cli api-test run tests/automation/auth-postman-scripts.json
+  jan-cli api-test run tests/automation/auth-postman-scripts.json \
+    --env-var "kong_url=http://localhost:8000" \
+    --env-var "keycloak_admin=admin" \
+    --verbose`,
+}
+
+var runApiTestCmd = &cobra.Command{
+	Use:   "run [collection-file]",
+	Short: "Run a Postman collection",
+	Long:  `Execute all requests in a Postman collection file and report results.`,
+	Args:  cobra.ExactArgs(1),
+	RunE:  runApiTest,
+}
+
+var (
+	envVars   []string
+	verbose   bool
+	reporters []string
+	timeout   int
+)
+
+func init() {
+	apiTestCmd.AddCommand(runApiTestCmd)
+
+	runApiTestCmd.Flags().StringArrayVar(&envVars, "env-var", []string{}, "Environment variable (key=value)")
+	runApiTestCmd.Flags().BoolVar(&verbose, "verbose", false, "Verbose output")
+	runApiTestCmd.Flags().StringArrayVar(&reporters, "reporters", []string{"cli"}, "Reporters to use")
+	runApiTestCmd.Flags().IntVar(&timeout, "timeout-request", 30000, "Request timeout in milliseconds")
+}
+
+type PostmanCollection struct {
+	Info struct {
+		Name   string `json:"name"`
+		Schema string `json:"schema"`
+	} `json:"info"`
+	Item  []PostmanItem  `json:"item"`
+	Event []PostmanEvent `json:"event,omitempty"`
+}
+
+type PostmanItem struct {
+	Name    string          `json:"name"`
+	Request *PostmanRequest `json:"request,omitempty"`
+	Item    []PostmanItem   `json:"item,omitempty"`
+	Event   []PostmanEvent  `json:"event,omitempty"`
+}
+
+type PostmanRequest struct {
+	Method string          `json:"method"`
+	Header []PostmanHeader `json:"header"`
+	Body   *PostmanBody    `json:"body,omitempty"`
+	URL    interface{}     `json:"url"`
+}
+
+type PostmanHeader struct {
+	Key   string `json:"key"`
+	Value string `json:"value"`
+}
+
+type PostmanBody struct {
+	Mode       string            `json:"mode"`
+	Raw        string            `json:"raw,omitempty"`
+	Urlencoded []PostmanFormData `json:"urlencoded,omitempty"`
+}
+
+type PostmanFormData struct {
+	Key   string `json:"key"`
+	Value string `json:"value"`
+}
+
+type PostmanEvent struct {
+	Listen string        `json:"listen"`
+	Script PostmanScript `json:"script"`
+}
+
+type PostmanScript struct {
+	Type string   `json:"type"`
+	Exec []string `json:"exec"`
+}
+
+type TestResult struct {
+	Name     string
+	Passed   bool
+	Duration time.Duration
+	Error    string
+}
+
+func runApiTest(cmd *cobra.Command, args []string) error {
+	collectionFile := args[0]
+
+	// Parse environment variables
+	envMap := make(map[string]string)
+	for _, ev := range envVars {
+		parts := strings.SplitN(ev, "=", 2)
+		if len(parts) == 2 {
+			envMap[parts[0]] = parts[1]
+		}
+	}
+
+	// Load collection
+	data, err := os.ReadFile(collectionFile)
+	if err != nil {
+		return fmt.Errorf("failed to read collection file: %w", err)
+	}
+
+	var collection PostmanCollection
+	if err := json.Unmarshal(data, &collection); err != nil {
+		return fmt.Errorf("failed to parse collection: %w", err)
+	}
+
+	fmt.Printf("\n┌─────────────────────────────────────────────────────────────────────┐\n")
+	fmt.Printf("│ Jan API Test Runner                                                 │\n")
+	fmt.Printf("└─────────────────────────────────────────────────────────────────────┘\n\n")
+	fmt.Printf("→ %s\n\n", collection.Info.Name)
+
+	// Process collection-level prerequest scripts
+	processCollectionEvents(collection.Event, envMap)
+
+	// Run tests
+	results := []TestResult{}
+	totalStart := time.Now()
+
+	for _, item := range collection.Item {
+		itemResults := runItem(item, envMap, "")
+		results = append(results, itemResults...)
+	}
+
+	totalDuration := time.Since(totalStart)
+
+	// Report results
+	printResults(results, totalDuration)
+
+	// Check for failures
+	for _, result := range results {
+		if !result.Passed {
+			return fmt.Errorf("tests failed")
+		}
+	}
+
+	return nil
+}
+
+func runItem(item PostmanItem, envMap map[string]string, prefix string) []TestResult {
+	results := []TestResult{}
+
+	// If this item has nested items (folder), run them
+	if len(item.Item) > 0 {
+		if verbose {
+			fmt.Printf("\n📁 %s\n", item.Name)
+		}
+		for _, subItem := range item.Item {
+			subResults := runItem(subItem, envMap, prefix+"  ")
+			results = append(results, subResults...)
+		}
+		return results
+	}
+
+	// This is a request item
+	if item.Request == nil {
+		return results
+	}
+
+	result := TestResult{
+		Name:   item.Name,
+		Passed: true,
+	}
+
+	start := time.Now()
+
+	// Build URL
+	urlStr := buildURL(item.Request.URL, envMap)
+
+	if verbose {
+		fmt.Printf("%s→ %s %s\n", prefix, item.Request.Method, urlStr)
+	}
+
+	// Create request
+	var bodyReader io.Reader
+	if item.Request.Body != nil {
+		if item.Request.Body.Mode == "raw" {
+			body := replaceVariables(item.Request.Body.Raw, envMap)
+			bodyReader = strings.NewReader(body)
+		} else if item.Request.Body.Mode == "urlencoded" {
+			formData := url.Values{}
+			for _, param := range item.Request.Body.Urlencoded {
+				key := replaceVariables(param.Key, envMap)
+				value := replaceVariables(param.Value, envMap)
+				formData.Set(key, value)
+			}
+			bodyReader = strings.NewReader(formData.Encode())
+		}
+	}
+
+	req, err := http.NewRequest(item.Request.Method, urlStr, bodyReader)
+	if err != nil {
+		result.Passed = false
+		result.Error = fmt.Sprintf("Failed to create request: %v", err)
+		result.Duration = time.Since(start)
+		results = append(results, result)
+		return results
+	}
+
+	// Set headers
+	for _, header := range item.Request.Header {
+		value := replaceVariables(header.Value, envMap)
+		req.Header.Set(header.Key, value)
+	}
+
+	// Execute request
+	client := &http.Client{
+		Timeout: time.Duration(timeout) * time.Millisecond,
+	}
+
+	resp, err := client.Do(req)
+	if err != nil {
+		result.Passed = false
+		result.Error = fmt.Sprintf("Request failed: %v", err)
+		result.Duration = time.Since(start)
+		results = append(results, result)
+		return results
+	}
+	defer resp.Body.Close()
+
+	// Read response
+	respBody, _ := io.ReadAll(resp.Body)
+
+	result.Duration = time.Since(start)
+
+	if verbose {
+		fmt.Printf("%s  ← %d %s (%dms)\n", prefix, resp.StatusCode, http.StatusText(resp.StatusCode), result.Duration.Milliseconds())
+	}
+
+	// Simple test: check if status is 2xx or 3xx (success range)
+	if resp.StatusCode >= 400 {
+		result.Passed = false
+		result.Error = fmt.Sprintf("HTTP %d: %s", resp.StatusCode, string(respBody))
+	} else {
+		// Extract variables from test scripts if the request succeeded
+		extractVariablesFromScripts(item, respBody, resp, envMap)
+	}
+
+	results = append(results, result)
+	return results
+}
+
+func buildURL(urlInterface interface{}, envMap map[string]string) string {
+	switch v := urlInterface.(type) {
+	case string:
+		return replaceVariables(v, envMap)
+	case map[string]interface{}:
+		// Handle Postman URL object format
+		if raw, ok := v["raw"].(string); ok {
+			return replaceVariables(raw, envMap)
+		}
+		// Also try "url" field
+		if urlStr, ok := v["url"].(string); ok {
+			return replaceVariables(urlStr, envMap)
+		}
+	}
+	return fmt.Sprintf("%v", urlInterface)
+}
+
+func replaceVariables(text string, envMap map[string]string) string {
+	result := text
+	for key, value := range envMap {
+		result = strings.ReplaceAll(result, "{{"+key+"}}", value)
+		result = strings.ReplaceAll(result, "${"+key+"}", value)
+	}
+	return result
+}
+
+// processCollectionEvents processes collection-level prerequest scripts to initialize variables
+func processCollectionEvents(events []PostmanEvent, envMap map[string]string) {
+	for _, event := range events {
+		if event.Listen != "prerequest" {
+			continue
+		}
+
+		script := strings.Join(event.Script.Exec, "\n")
+		lines := strings.Split(script, "\n")
+
+		for _, line := range lines {
+			line = strings.TrimSpace(line)
+
+			// Handle test_user_username
+			if strings.Contains(line, "pm.collectionVariables.set('test_user_username'") {
+				if _, exists := envMap["test_user_username"]; !exists {
+					envMap["test_user_username"] = fmt.Sprintf("automation-user-%d", time.Now().UnixNano())
+				}
+			}
+
+			// Handle test_user_password
+			if strings.Contains(line, "pm.collectionVariables.set('test_user_password'") {
+				if _, exists := envMap["test_user_password"]; !exists {
+					envMap["test_user_password"] = fmt.Sprintf("Passw0rd!%d", time.Now().UnixNano()%10000)
+				}
+			}
+
+			// Handle test_user_email
+			if strings.Contains(line, "pm.collectionVariables.set('test_user_email'") {
+				if _, exists := envMap["test_user_email"]; !exists {
+					if username, ok := envMap["test_user_username"]; ok {
+						envMap["test_user_email"] = username + "@example.com"
+					}
+				}
+			}
+
+			// Handle test_user_pid
+			if strings.Contains(line, "pm.collectionVariables.set('test_user_pid'") {
+				if _, exists := envMap["test_user_pid"]; !exists {
+					if username, ok := envMap["test_user_username"]; ok {
+						envMap["test_user_pid"] = username
+					}
+				}
+			}
+
+			// Handle collection_timestamp
+			if strings.Contains(line, "pm.collectionVariables.set('collection_timestamp'") {
+				if _, exists := envMap["collection_timestamp"]; !exists {
+					envMap["collection_timestamp"] = time.Now().Format(time.RFC3339)
+				}
+			}
+		}
+	}
+}
+
+// extractVariablesFromScripts parses test scripts and extracts variables
+func extractVariablesFromScripts(item PostmanItem, respBody []byte, resp *http.Response, envMap map[string]string) {
+	if len(item.Event) == 0 {
+		return
+	}
+
+	// Parse response body as JSON if possible
+	var responseData map[string]interface{}
+	json.Unmarshal(respBody, &responseData) // Ignore error - not all responses are JSON
+
+	// Process each event script
+	for _, event := range item.Event {
+		if event.Listen != "test" {
+			continue
+		}
+
+		// Join script lines
+		script := strings.Join(event.Script.Exec, "\n")
+
+		// Check for Location header extraction in the script
+		locationExtracted := false
+		if strings.Contains(script, "pm.response.headers.get('Location')") &&
+			strings.Contains(script, "pm.collectionVariables.set('test_user_id'") {
+			// Extract user ID from Location header
+			if resp.StatusCode == 201 || resp.StatusCode == 204 {
+				location := resp.Header.Get("Location")
+				if location != "" {
+					// Extract ID from location (last segment of path)
+					lastSlash := strings.LastIndex(location, "/")
+					if lastSlash >= 0 && lastSlash < len(location)-1 {
+						userID := location[lastSlash+1:]
+						envMap["test_user_id"] = userID
+						envMap["teardown_user_id"] = userID
+						locationExtracted = true
+					}
+				}
+			}
+		}
+
+		// Simple pattern matching for pm.collectionVariables.set calls
+		// Pattern: pm.collectionVariables.set('varname', data.field)
+		lines := strings.Split(script, "\n")
+		for _, line := range lines {
+			line = strings.TrimSpace(line)
+
+			// Look for pm.collectionVariables.set
+			if strings.Contains(line, "pm.collectionVariables.set") {
+				// Extract variable name and source field
+				// Example: pm.collectionVariables.set('kc_admin_access_token', data.access_token);
+				varName, jsonPath := extractVarSetPattern(line)
+
+				// Skip if we already extracted this variable from Location header
+				if varName == "test_user_id" && locationExtracted {
+					continue
+				}
+
+				if varName != "" && jsonPath != "" {
+					// Extract value from response data
+					if value := extractJSONValue(responseData, jsonPath); value != "" {
+						envMap[varName] = value
+					}
+				}
+			}
+		}
+	}
+}
+
+// extractVarSetPattern extracts variable name and JSON path from pm.collectionVariables.set line
+func extractVarSetPattern(line string) (varName string, jsonPath string) {
+	// Remove semicolons and clean up
+	line = strings.TrimSuffix(line, ";")
+	line = strings.TrimSpace(line)
+
+	// Find the pattern: pm.collectionVariables.set('varname', source)
+	if idx := strings.Index(line, "pm.collectionVariables.set("); idx >= 0 {
+		// Extract the arguments
+		argsStart := idx + len("pm.collectionVariables.set(")
+		argsEnd := strings.LastIndex(line, ")")
+		if argsEnd > argsStart {
+			args := line[argsStart:argsEnd]
+			// Split by comma
+			parts := strings.SplitN(args, ",", 2)
+			if len(parts) == 2 {
+				// Extract variable name (remove quotes)
+				varName = strings.Trim(strings.TrimSpace(parts[0]), "'\"")
+				// Extract JSON path (e.g., "data.access_token")
+				jsonPath = strings.TrimSpace(parts[1])
+				// Remove "data." prefix if present
+				jsonPath = strings.TrimPrefix(jsonPath, "data.")
+				jsonPath = strings.Trim(jsonPath, "'\"")
+			}
+		}
+	}
+	return
+}
+
+// extractJSONValue extracts a value from JSON response using dot notation
+func extractJSONValue(data map[string]interface{}, path string) string {
+	parts := strings.Split(path, ".")
+	var current interface{} = data
+
+	for _, part := range parts {
+		if m, ok := current.(map[string]interface{}); ok {
+			current = m[part]
+		} else {
+			return ""
+		}
+	}
+
+	// Convert to string
+	switch v := current.(type) {
+	case string:
+		return v
+	case float64:
+		return fmt.Sprintf("%.0f", v)
+	case bool:
+		return fmt.Sprintf("%t", v)
+	default:
+		return fmt.Sprintf("%v", v)
+	}
+}
+
+func printResults(results []TestResult, totalDuration time.Duration) {
+	passed := 0
+	failed := 0
+
+	for _, result := range results {
+		if result.Passed {
+			passed++
+		} else {
+			failed++
+		}
+	}
+
+	fmt.Printf("\n┌─────────────────────────────────────────────────────────────────────┐\n")
+	fmt.Printf("│ Test Results                                                        │\n")
+	fmt.Printf("└─────────────────────────────────────────────────────────────────────┘\n\n")
+
+	if failed > 0 {
+		fmt.Printf("❌ Failed Tests:\n\n")
+		for _, result := range results {
+			if !result.Passed {
+				fmt.Printf("  ✗ %s\n", result.Name)
+				if result.Error != "" {
+					fmt.Printf("    %s\n", result.Error)
+				}
+				fmt.Printf("    Duration: %dms\n\n", result.Duration.Milliseconds())
+			}
+		}
+	}
+
+	fmt.Printf("Summary:\n")
+	fmt.Printf("  Total:    %d tests\n", len(results))
+	fmt.Printf("  Passed:   %d ✓\n", passed)
+	if failed > 0 {
+		fmt.Printf("  Failed:   %d ✗\n", failed)
+	}
+	fmt.Printf("  Duration: %dms\n\n", totalDuration.Milliseconds())
+
+	if failed == 0 {
+		fmt.Printf("✓ All tests passed!\n\n")
+	} else {
+		fmt.Printf("✗ Some tests failed\n\n")
+	}
+}
diff --git a/cmd/jan-cli/cmd_config.go b/cmd/jan-cli/cmd_config.go
new file mode 100644
index 00000000..6f22dfd2
--- /dev/null
+++ b/cmd/jan-cli/cmd_config.go
@@ -0,0 +1,411 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/janhq/jan-server/pkg/config/codegen"
+	"github.com/spf13/cobra"
+	"gopkg.in/yaml.v3"
+)
+
+var configCmd = &cobra.Command{
+	Use:   "config",
+	Short: "Configuration management commands",
+	Long:  `Manage Jan Server configuration files, validate, export, and inspect config values.`,
+}
+
+var configGenerateCmd = &cobra.Command{
+	Use:   "generate",
+	Short: "Generate configuration files from Go structs",
+	Long:  `Generate JSON Schema and YAML defaults from Go struct definitions in pkg/config/types.go.`,
+	RunE:  runConfigGenerate,
+}
+
+var configValidateCmd = &cobra.Command{
+	Use:   "validate",
+	Short: "Validate configuration files",
+	Long:  `Validate configuration files for syntax errors and required fields.`,
+	RunE:  runConfigValidate,
+}
+
+var configExportCmd = &cobra.Command{
+	Use:   "export",
+	Short: "Export configuration in various formats",
+	Long:  `Export configuration as environment variables, JSON, YAML, or docker-compose env file.`,
+	RunE:  runConfigExport,
+}
+
+var configShowCmd = &cobra.Command{
+	Use:   "show [service]",
+	Short: "Show configuration values",
+	Long:  `Display configuration values for a specific service or entire config.`,
+	RunE:  runConfigShow,
+}
+
+var configK8sCmd = &cobra.Command{
+	Use:   "k8s-values",
+	Short: "Generate Kubernetes Helm values",
+	Long:  `Generate Kubernetes Helm values file from configuration.`,
+	RunE:  runConfigK8sValues,
+}
+
+func init() {
+	configCmd.AddCommand(configGenerateCmd)
+	configCmd.AddCommand(configValidateCmd)
+	configCmd.AddCommand(configExportCmd)
+	configCmd.AddCommand(configShowCmd)
+	configCmd.AddCommand(configK8sCmd)
+
+	// generate flags
+	configGenerateCmd.Flags().StringP("output", "o", "config", "Output directory for generated files")
+	configGenerateCmd.Flags().Bool("schema-only", false, "Generate only JSON schemas")
+	configGenerateCmd.Flags().Bool("yaml-only", false, "Generate only YAML defaults")
+
+	// validate flags
+	configValidateCmd.Flags().StringP("file", "f", "config/defaults.yaml", "Config file to validate")
+	configValidateCmd.Flags().String("schema", "", "Schema file to validate against")
+	configValidateCmd.Flags().StringP("env", "e", "", "Environment to validate")
+
+	// export flags
+	configExportCmd.Flags().StringP("file", "f", "config/defaults.yaml", "Config file to export")
+	configExportCmd.Flags().String("format", "env", "Output format: env, docker-env, json, yaml")
+	configExportCmd.Flags().String("prefix", "", "Add prefix to exported variables")
+	configExportCmd.Flags().StringP("output", "o", "", "Output file (default: stdout)")
+
+	// show flags
+	configShowCmd.Flags().StringP("file", "f", "config/defaults.yaml", "Config file to read")
+	configShowCmd.Flags().String("path", "", "Config path to show (e.g., services.llm-api)")
+	configShowCmd.Flags().String("format", "yaml", "Output format: yaml, json, value")
+
+	// k8s-values flags
+	configK8sCmd.Flags().StringP("env", "e", "development", "Environment (development, production, etc.)")
+	configK8sCmd.Flags().StringP("output", "o", "", "Output file (default: stdout)")
+	configK8sCmd.Flags().StringSlice("set", []string{}, "Override values (key=value)")
+}
+
+func runConfigGenerate(cmd *cobra.Command, args []string) error {
+	outputDir, err := resolveOutputDir(cmd)
+	if err != nil {
+		return fmt.Errorf("resolve output directory: %w", err)
+	}
+	schemaOnly, _ := cmd.Flags().GetBool("schema-only")
+	yamlOnly, _ := cmd.Flags().GetBool("yaml-only")
+
+	fmt.Println("Starting configuration code generation...")
+
+	// Determine what to generate
+	generateSchema := !yamlOnly
+	generateYAML := !schemaOnly
+
+	// Generate JSON Schema
+	if generateSchema {
+		schemaDir := filepath.Join(outputDir, "schema")
+		fmt.Printf("Generating JSON Schema files in %s...\n", schemaDir)
+		if err := codegen.GenerateJSONSchema(schemaDir); err != nil {
+			return fmt.Errorf("generate JSON schema: %w", err)
+		}
+	}
+
+	// Generate YAML defaults
+	if generateYAML {
+		defaultsPath := filepath.Join(outputDir, "defaults.yaml")
+		fmt.Printf("Generating YAML defaults in %s...\n", defaultsPath)
+		if err := codegen.GenerateDefaultsYAML(defaultsPath); err != nil {
+			return fmt.Errorf("generate YAML defaults: %w", err)
+		}
+	}
+
+	fmt.Println("✓ Configuration generation complete!")
+	return nil
+}
+
+func runConfigValidate(cmd *cobra.Command, args []string) error {
+	configFile, _ := cmd.Flags().GetString("file")
+	schemaFile, _ := cmd.Flags().GetString("schema")
+	env, _ := cmd.Flags().GetString("env")
+
+	configPath, err := resolveConfigFile(cmd, configFile)
+	if err != nil {
+		return fmt.Errorf("resolve config file: %w", err)
+	}
+
+	configDir, err := getConfigDir(cmd)
+	if err != nil {
+		return fmt.Errorf("resolve config directory: %w", err)
+	}
+
+	fmt.Printf("Validating configuration...\n")
+	fmt.Printf("  Config: %s\n", configPath)
+	if env != "" {
+		fmt.Printf("  Environment: %s\n", env)
+	}
+	if schemaFile != "" {
+		fmt.Printf("  Schema: %s\n", schemaFile)
+	}
+
+	// Load config file
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		return fmt.Errorf("read config file: %w", err)
+	}
+
+	// Parse YAML
+	var config map[string]interface{}
+	if err := yaml.Unmarshal(data, &config); err != nil {
+		return fmt.Errorf("parse YAML: %w", err)
+	}
+
+	// If environment specified, merge environment overrides
+	if env != "" {
+		envFile := filepath.Join(configDir, env+".yaml")
+		if _, err := os.Stat(envFile); err == nil {
+			envData, err := os.ReadFile(envFile)
+			if err != nil {
+				return fmt.Errorf("read environment file: %w", err)
+			}
+
+			var envConfig map[string]interface{}
+			if err := yaml.Unmarshal(envData, &envConfig); err != nil {
+				return fmt.Errorf("parse environment YAML: %w", err)
+			}
+
+			// Merge configs
+			mergeMaps(config, envConfig)
+		}
+	}
+
+	// Basic validation
+	errors := []string{}
+
+	// Check required top-level keys
+	requiredKeys := []string{"services"}
+	for _, key := range requiredKeys {
+		if _, ok := config[key]; !ok {
+			errors = append(errors, fmt.Sprintf("missing required key: %s", key))
+		}
+	}
+
+	if len(errors) > 0 {
+		fmt.Println("\n Validation failed:")
+		for _, err := range errors {
+			fmt.Printf("  - %s\n", err)
+		}
+		return fmt.Errorf("validation failed with %d errors", len(errors))
+	}
+
+	fmt.Println("\n Configuration is valid")
+	return nil
+}
+
+func runConfigExport(cmd *cobra.Command, args []string) error {
+	configFile, _ := cmd.Flags().GetString("file")
+	format, _ := cmd.Flags().GetString("format")
+	prefix, _ := cmd.Flags().GetString("prefix")
+	outputFile, _ := cmd.Flags().GetString("output")
+
+	configPath, err := resolveConfigFile(cmd, configFile)
+	if err != nil {
+		return fmt.Errorf("resolve config file: %w", err)
+	}
+
+	// Load config
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		return fmt.Errorf("read config file: %w", err)
+	}
+
+	var config map[string]interface{}
+	if err := yaml.Unmarshal(data, &config); err != nil {
+		return fmt.Errorf("parse YAML: %w", err)
+	}
+
+	// Generate output
+	var output string
+	switch format {
+	case "env":
+		output = exportAsEnv(config, prefix)
+	case "docker-env":
+		output = exportAsDockerEnv(config, prefix)
+	case "json":
+		jsonData, err := json.MarshalIndent(config, "", "  ")
+		if err != nil {
+			return fmt.Errorf("marshal JSON: %w", err)
+		}
+		output = string(jsonData)
+	case "yaml":
+		yamlData, err := yaml.Marshal(config)
+		if err != nil {
+			return fmt.Errorf("marshal YAML: %w", err)
+		}
+		output = string(yamlData)
+	default:
+		return fmt.Errorf("unsupported format: %s", format)
+	}
+
+	// Write output
+	if outputFile != "" {
+		if err := os.WriteFile(outputFile, []byte(output), 0644); err != nil {
+			return fmt.Errorf("write output file: %w", err)
+		}
+		fmt.Printf(" Exported to %s\n", outputFile)
+	} else {
+		fmt.Print(output)
+	}
+
+	return nil
+}
+
+func runConfigShow(cmd *cobra.Command, args []string) error {
+	configFile, _ := cmd.Flags().GetString("file")
+	path, _ := cmd.Flags().GetString("path")
+	format, _ := cmd.Flags().GetString("format")
+
+	configPath, err := resolveConfigFile(cmd, configFile)
+	if err != nil {
+		return fmt.Errorf("resolve config file: %w", err)
+	}
+
+	// If service specified in args, use it as path
+	if len(args) > 0 {
+		if path == "" {
+			path = "services." + args[0]
+		}
+	}
+
+	// Load config
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		return fmt.Errorf("read config file: %w", err)
+	}
+
+	var config map[string]interface{}
+	if err := yaml.Unmarshal(data, &config); err != nil {
+		return fmt.Errorf("parse YAML: %w", err)
+	}
+
+	// Navigate to path if specified
+	var value interface{} = config
+	if path != "" {
+		parts := strings.Split(path, ".")
+		for _, part := range parts {
+			if m, ok := value.(map[string]interface{}); ok {
+				if v, exists := m[part]; exists {
+					value = v
+				} else {
+					return fmt.Errorf("path not found: %s", path)
+				}
+			} else {
+				return fmt.Errorf("cannot navigate path: %s", path)
+			}
+		}
+	}
+
+	// Format output
+	switch format {
+	case "yaml":
+		yamlData, err := yaml.Marshal(value)
+		if err != nil {
+			return fmt.Errorf("marshal YAML: %w", err)
+		}
+		fmt.Print(string(yamlData))
+	case "json":
+		jsonData, err := json.MarshalIndent(value, "", "  ")
+		if err != nil {
+			return fmt.Errorf("marshal JSON: %w", err)
+		}
+		fmt.Println(string(jsonData))
+	case "value":
+		fmt.Println(value)
+	default:
+		return fmt.Errorf("unsupported format: %s", format)
+	}
+
+	return nil
+}
+
+func runConfigK8sValues(cmd *cobra.Command, args []string) error {
+	env, _ := cmd.Flags().GetString("env")
+	outputFile, _ := cmd.Flags().GetString("output")
+	_, _ = cmd.Flags().GetStringSlice("set") // TODO: use overrides
+
+	fmt.Printf("Generating Kubernetes Helm values for environment: %s\n", env) // TODO: Implement K8s values generation
+	output := fmt.Sprintf("# Generated Helm values for %s environment\n", env)
+	output += "# This is a placeholder - integrate with pkg/config/k8s\n"
+
+	if outputFile != "" {
+		if err := os.WriteFile(outputFile, []byte(output), 0644); err != nil {
+			return fmt.Errorf("write output file: %w", err)
+		}
+		fmt.Printf(" Generated values file: %s\n", outputFile)
+	} else {
+		fmt.Print(output)
+	}
+
+	return nil
+}
+
+// Helper functions
+
+func mergeMaps(dst, src map[string]interface{}) {
+	for k, v := range src {
+		if dstVal, ok := dst[k]; ok {
+			if dstMap, dstOk := dstVal.(map[string]interface{}); dstOk {
+				if srcMap, srcOk := v.(map[string]interface{}); srcOk {
+					mergeMaps(dstMap, srcMap)
+					continue
+				}
+			}
+		}
+		dst[k] = v
+	}
+}
+
+func exportAsEnv(config map[string]interface{}, prefix string) string {
+	var lines []string
+	flatten("", config, prefix, &lines)
+
+	var result strings.Builder
+	for _, line := range lines {
+		result.WriteString("export ")
+		result.WriteString(line)
+		result.WriteString("\n")
+	}
+	return result.String()
+}
+
+func exportAsDockerEnv(config map[string]interface{}, prefix string) string {
+	var lines []string
+	flatten("", config, prefix, &lines)
+
+	var result strings.Builder
+	for _, line := range lines {
+		result.WriteString(line)
+		result.WriteString("\n")
+	}
+	return result.String()
+}
+
+func flatten(prefix string, data interface{}, globalPrefix string, lines *[]string) {
+	switch v := data.(type) {
+	case map[string]interface{}:
+		for key, val := range v {
+			newPrefix := key
+			if prefix != "" {
+				newPrefix = prefix + "_" + strings.ToUpper(key)
+			} else {
+				newPrefix = strings.ToUpper(key)
+			}
+			flatten(newPrefix, val, globalPrefix, lines)
+		}
+	default:
+		key := prefix
+		if globalPrefix != "" {
+			key = globalPrefix + "_" + prefix
+		}
+		*lines = append(*lines, fmt.Sprintf("%s=%v", key, v))
+	}
+}
diff --git a/cmd/jan-cli/cmd_dev.go b/cmd/jan-cli/cmd_dev.go
new file mode 100644
index 00000000..8788aa46
--- /dev/null
+++ b/cmd/jan-cli/cmd_dev.go
@@ -0,0 +1,326 @@
+﻿package main
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/spf13/cobra"
+)
+
+var devCmd = &cobra.Command{
+	Use:   "dev",
+	Short: "Development tools",
+	Long:  `Development tools for Jan Server - setup, scaffolding, and generators.`,
+}
+
+var devSetupCmd = &cobra.Command{
+	Use:   "setup",
+	Short: "Setup development environment",
+	Long:  `Initialize development environment with dependencies and configuration.`,
+	RunE:  runDevSetup,
+}
+
+var devScaffoldCmd = &cobra.Command{
+	Use:   "scaffold [service-name]",
+	Short: "Scaffold a new service",
+	Long:  `Generate a new service from the template with proper structure.`,
+	RunE:  runDevScaffold,
+	Args:  cobra.ExactArgs(1),
+}
+
+var devRunCmd = &cobra.Command{
+	Use:   "run [service]",
+	Short: "Run a service locally for development",
+	Long: `Run a service on the host (outside Docker) for development/debugging. 
+Kong gateway will automatically route to host.docker.internal.`,
+	RunE: runDevRun,
+	Args: cobra.ExactArgs(1),
+}
+
+func init() {
+	devCmd.AddCommand(devSetupCmd)
+	devCmd.AddCommand(devScaffoldCmd)
+	devCmd.AddCommand(devRunCmd)
+
+	// scaffold flags
+	devScaffoldCmd.Flags().StringP("template", "t", "api", "Service template (api, worker)")
+	devScaffoldCmd.Flags().StringP("port", "p", "", "Service port")
+
+	// run flags
+	devRunCmd.Flags().StringP("env", "e", ".env", "Environment file to load")
+	devRunCmd.Flags().Bool("build", false, "Build before running")
+}
+
+func runDevSetup(cmd *cobra.Command, args []string) error {
+	fmt.Println("Setting up development environment...")
+	fmt.Println()
+
+	// 1. Check Docker (optional - warn if not available)
+	dockerAvailable := false
+	fmt.Print("Checking Docker... ")
+	if err := execCommand("docker", "--version"); err != nil {
+		fmt.Println("⚠ (not available - Docker features will be limited)")
+	} else {
+		fmt.Println("✓")
+		dockerAvailable = true
+	}
+
+	// 2. Check Docker Compose (optional - only if Docker is available)
+	if dockerAvailable {
+		fmt.Print("Checking Docker Compose... ")
+		if err := execCommand("docker", "compose", "version"); err != nil {
+			fmt.Println("⚠ (not available - some features may be limited)")
+		} else {
+			fmt.Println("✓")
+		}
+	}
+
+	// 3. Check for .env file
+	fmt.Print("Checking .env file... ")
+	if _, err := os.Stat(".env"); os.IsNotExist(err) {
+		fmt.Println("not found")
+		fmt.Println("Creating .env from template...")
+
+		data, err := os.ReadFile(".env.template")
+		if err != nil {
+			return fmt.Errorf("failed to read .env.template: %w", err)
+		}
+
+		if err := os.WriteFile(".env", data, 0644); err != nil {
+			return fmt.Errorf("failed to create .env: %w", err)
+		}
+		fmt.Println("✓ Created .env file")
+	} else {
+		fmt.Println("✓")
+	}
+
+	// 4. Create necessary directories
+	fmt.Print("Creating directories... ")
+	dirs := []string{"docker", "backups", "logs"}
+	for _, dir := range dirs {
+		if err := os.MkdirAll(dir, 0755); err != nil {
+			return fmt.Errorf("failed to create %s: %w", dir, err)
+		}
+	}
+	fmt.Println("✓")
+
+	// 5. Create Docker networks (only if Docker is available)
+	if dockerAvailable {
+		fmt.Print("Creating Docker networks... ")
+		networks := []string{"jan-network", "jan-monitoring"}
+		for _, network := range networks {
+			// Check if network exists
+			checkCmd := execCommandSilent("docker", "network", "inspect", network)
+			if checkCmd != nil {
+				// Network doesn't exist, create it
+				if err := execCommandSilent("docker", "network", "create", network); err != nil {
+					fmt.Printf("(skipped %s) ", network)
+				}
+			}
+		}
+		fmt.Println("✓")
+	}
+
+	fmt.Println()
+	fmt.Println("✅ Development environment setup complete!")
+	fmt.Println()
+	fmt.Println("Next steps:")
+	fmt.Println("  1. Review .env file and add your API keys")
+	fmt.Println("  2. Start services: make up-full")
+	fmt.Println("  3. Check health: make health-check")
+	fmt.Println()
+
+	return nil
+}
+
+func runDevScaffold(cmd *cobra.Command, args []string) error {
+	serviceName := args[0]
+	template, _ := cmd.Flags().GetString("template")
+	port, _ := cmd.Flags().GetString("port")
+
+	// Normalize service name
+	serviceName = strings.ToLower(strings.ReplaceAll(serviceName, " ", "-"))
+
+	fmt.Printf("Scaffolding new service: %s\n", serviceName)
+	fmt.Printf("  Template: %s\n", template)
+	if port != "" {
+		fmt.Printf("  Port: %s\n", port)
+	}
+	fmt.Println()
+
+	sourceDir := filepath.Join("services", "template-api")
+	destDir := filepath.Join("services", serviceName)
+
+	// Check if template exists
+	if _, err := os.Stat(sourceDir); os.IsNotExist(err) {
+		return fmt.Errorf("template not found at %s", sourceDir)
+	}
+
+	// Check if destination already exists
+	if _, err := os.Stat(destDir); err == nil {
+		return fmt.Errorf("service '%s' already exists at %s", serviceName, destDir)
+	}
+
+	fmt.Println("Copying template files...")
+	if err := copyDir(sourceDir, destDir); err != nil {
+		return fmt.Errorf("failed to copy template: %w", err)
+	}
+
+	// Generate Pascal case name
+	words := strings.Split(serviceName, "-")
+	var pascalParts []string
+	for _, word := range words {
+		if len(word) > 0 {
+			pascalParts = append(pascalParts, strings.ToUpper(word[:1])+word[1:])
+		}
+	}
+	pascalName := strings.Join(pascalParts, " ")
+
+	fmt.Println("Replacing placeholders...")
+	if err := replaceInDir(destDir, map[string]string{
+		"Template API":                     pascalName,
+		"template-api":                     serviceName,
+		"jan-server/services/template-api": fmt.Sprintf("jan-server/services/%s", serviceName),
+	}); err != nil {
+		return fmt.Errorf("failed to replace placeholders: %w", err)
+	}
+
+	fmt.Println()
+	fmt.Printf("✓ Service '%s' created successfully!\n", serviceName)
+	fmt.Println()
+	fmt.Println("Next steps:")
+	fmt.Printf("  1. cd %s\n", destDir)
+	fmt.Println("  2. go mod tidy")
+	fmt.Println("  3. Update README.md with service-specific information")
+	fmt.Println("  4. Implement your service logic")
+	fmt.Println("  5. Add service to docker-compose.yml")
+	fmt.Println("  6. Update Kong gateway configuration")
+	fmt.Println()
+
+	return nil
+}
+
+// copyDir recursively copies a directory
+func copyDir(src, dst string) error {
+	return filepath.Walk(src, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Calculate destination path
+		relPath, err := filepath.Rel(src, path)
+		if err != nil {
+			return err
+		}
+		destPath := filepath.Join(dst, relPath)
+
+		if info.IsDir() {
+			return os.MkdirAll(destPath, info.Mode())
+		}
+
+		// Copy file
+		return copyFile(path, destPath)
+	})
+}
+
+// replaceInDir replaces strings in all text files in a directory
+func replaceInDir(dir string, replacements map[string]string) error {
+	return filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
+		if err != nil || info.IsDir() {
+			return err
+		}
+
+		// Only process text files
+		ext := filepath.Ext(path)
+		textExts := []string{".go", ".md", ".yaml", ".yml", ".json", ".mod", ".sum", ""}
+		isText := false
+		for _, te := range textExts {
+			if ext == te || filepath.Base(path) == "Dockerfile" || filepath.Base(path) == "Makefile" {
+				isText = true
+				break
+			}
+		}
+
+		if !isText {
+			return nil
+		}
+
+		// Read file
+		content, err := os.ReadFile(path)
+		if err != nil {
+			return err
+		}
+
+		// Replace all occurrences
+		text := string(content)
+		for old, new := range replacements {
+			text = strings.ReplaceAll(text, old, new)
+		}
+
+		// Write back
+		return os.WriteFile(path, []byte(text), info.Mode())
+	})
+}
+
+func runDevRun(cmd *cobra.Command, args []string) error {
+	service := args[0]
+	envFile, _ := cmd.Flags().GetString("env")
+	build, _ := cmd.Flags().GetBool("build")
+
+	fmt.Printf("Running %s in development mode...\n", service)
+	fmt.Println()
+
+	// Stop Docker container for this service
+	fmt.Printf("Stopping Docker container for %s...\n", service)
+	execCommand("docker", "compose", "stop", service)
+	fmt.Println()
+
+	serviceDir := fmt.Sprintf("services/%s", service)
+
+	// Check if service directory exists
+	if _, err := os.Stat(serviceDir); os.IsNotExist(err) {
+		return fmt.Errorf("service not found: %s", service)
+	}
+
+	// Build if requested
+	if build {
+		fmt.Println("Building service...")
+		originalDir, _ := os.Getwd()
+		os.Chdir(serviceDir)
+
+		if err := execCommand("go", "build", "-o", "bin/"+service, "./cmd/server"); err != nil {
+			os.Chdir(originalDir)
+			return fmt.Errorf("build failed: %w", err)
+		}
+
+		os.Chdir(originalDir)
+		fmt.Println("✓ Build complete")
+		fmt.Println()
+	}
+
+	// Load environment variables
+	if envFile != "" && envFile != ".env" {
+		fmt.Printf("Loading environment from %s...\n", envFile)
+	}
+
+	fmt.Printf("Starting %s on host...\n", service)
+	fmt.Println("Press Ctrl+C to stop")
+	fmt.Println()
+
+	// Change to service directory and run
+	originalDir, _ := os.Getwd()
+	os.Chdir(serviceDir)
+	defer os.Chdir(originalDir)
+
+	// Run the service based on type
+	var runCmd []string
+	if build {
+		runCmd = []string{"./bin/" + service}
+	} else {
+		runCmd = []string{"go", "run", "./cmd/server"}
+	}
+
+	return execCommand(runCmd[0], runCmd[1:]...)
+}
diff --git a/cmd/jan-cli/cmd_install.go b/cmd/jan-cli/cmd_install.go
new file mode 100644
index 00000000..af457aa4
--- /dev/null
+++ b/cmd/jan-cli/cmd_install.go
@@ -0,0 +1,129 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/spf13/cobra"
+)
+
+var installCmd = &cobra.Command{
+	Use:   "install",
+	Short: "Install jan-cli to system PATH",
+	Long:  `Install jan-cli binary to a location in your system PATH.`,
+	RunE:  runInstall,
+}
+
+func init() {
+	installCmd.Flags().Bool("global", false, "Install to system-wide location (requires admin)")
+	installCmd.Flags().String("path", "", "Custom installation path")
+}
+
+func runInstall(cmd *cobra.Command, args []string) error {
+	global, _ := cmd.Flags().GetBool("global")
+	customPath, _ := cmd.Flags().GetString("path")
+
+	// Determine installation directory
+	var binDir string
+	if customPath != "" {
+		binDir = customPath
+	} else if global {
+		if isWindows() {
+			binDir = `C:\Program Files\jan-cli`
+		} else {
+			binDir = "/usr/local/bin"
+		}
+	} else {
+		if isWindows() {
+			binDir = filepath.Join(os.Getenv("USERPROFILE"), "bin")
+		} else {
+			binDir = filepath.Join(os.Getenv("HOME"), "bin")
+		}
+	}
+
+	// Get source binary path
+	executable, err := os.Executable()
+	if err != nil {
+		return fmt.Errorf("failed to get executable path: %w", err)
+	}
+
+	// Create bin directory if needed
+	if _, err := os.Stat(binDir); os.IsNotExist(err) {
+		fmt.Printf("Creating directory: %s\n", binDir)
+		if err := os.MkdirAll(binDir, 0755); err != nil {
+			return fmt.Errorf("failed to create directory: %w", err)
+		}
+	}
+
+	// Determine destination filename
+	destFile := filepath.Join(binDir, "jan-cli")
+	if isWindows() {
+		destFile += ".exe"
+	}
+
+	// Copy binary
+	fmt.Printf("Installing jan-cli to %s...\n", destFile)
+	if err := copyFile(executable, destFile); err != nil {
+		return fmt.Errorf("failed to copy binary: %w", err)
+	}
+
+	// Make executable on Unix
+	if !isWindows() {
+		if err := os.Chmod(destFile, 0755); err != nil {
+			return fmt.Errorf("failed to set executable permission: %w", err)
+		}
+	}
+
+	fmt.Println()
+	fmt.Println("✓ jan-cli installed successfully!")
+	fmt.Println()
+
+	// Check if in PATH
+	if !isInPath(binDir) {
+		fmt.Printf("⚠ WARNING: %s is not in your PATH\n", binDir)
+		fmt.Println()
+		if isWindows() {
+			fmt.Println("To add to PATH (PowerShell):")
+			fmt.Printf("  $env:PATH += \";%s\"\n", binDir)
+			fmt.Println()
+			fmt.Println("To make permanent, add to your PowerShell profile:")
+			fmt.Println("  notepad $PROFILE")
+		} else {
+			fmt.Println("To add to PATH, add this to ~/.bashrc or ~/.zshrc:")
+			fmt.Printf("  export PATH=\"$PATH:%s\"\n", binDir)
+			fmt.Println()
+			fmt.Println("Then reload your shell:")
+			fmt.Println("  source ~/.bashrc  # or source ~/.zshrc")
+		}
+	} else {
+		fmt.Println("✓ Installation directory is already in PATH")
+	}
+
+	fmt.Println()
+	fmt.Println("You can now run: jan-cli --help")
+
+	return nil
+}
+
+func isInPath(dir string) bool {
+	pathEnv := os.Getenv("PATH")
+	if isWindows() {
+		pathEnv = strings.ToLower(pathEnv)
+		dir = strings.ToLower(dir)
+	}
+
+	separator := ":"
+	if isWindows() {
+		separator = ";"
+	}
+
+	paths := strings.Split(pathEnv, separator)
+	for _, p := range paths {
+		if strings.TrimSpace(p) == dir {
+			return true
+		}
+	}
+	return false
+}
diff --git a/cmd/jan-cli/cmd_monitor.go b/cmd/jan-cli/cmd_monitor.go
new file mode 100644
index 00000000..25d417a2
--- /dev/null
+++ b/cmd/jan-cli/cmd_monitor.go
@@ -0,0 +1,599 @@
+package main
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"time"
+
+	"github.com/spf13/cobra"
+)
+
+var monitorCmd = &cobra.Command{
+	Use:   "monitor",
+	Short: "Monitoring stack management",
+	Long: `Manage Jan Server's observability stack including Prometheus, Grafana, Jaeger, and OTEL Collector.
+
+Examples:
+  jan-cli monitor up          # Start monitoring stack
+  jan-cli monitor dev         # Start with full sampling for development
+  jan-cli monitor test        # Validate all services are healthy
+  jan-cli monitor status      # Show status and resource usage
+  jan-cli monitor query       # Interactive queries
+  jan-cli monitor down        # Stop monitoring stack`,
+}
+
+var monitorUpCmd = &cobra.Command{
+	Use:   "up",
+	Short: "Start monitoring stack",
+	Run:   runMonitorUp,
+}
+
+var monitorDevCmd = &cobra.Command{
+	Use:   "dev",
+	Short: "Start monitoring stack with full sampling for development",
+	Run:   runMonitorDev,
+}
+
+var monitorDownCmd = &cobra.Command{
+	Use:   "down",
+	Short: "Stop monitoring stack",
+	Run:   runMonitorDown,
+}
+
+var monitorTestCmd = &cobra.Command{
+	Use:   "test",
+	Short: "Validate monitoring stack health",
+	Run:   runMonitorTest,
+}
+
+var monitorStatusCmd = &cobra.Command{
+	Use:   "status",
+	Short: "Show monitoring stack status and resource usage",
+	Run:   runMonitorStatus,
+}
+
+var monitorResetCmd = &cobra.Command{
+	Use:   "reset",
+	Short: "Reset monitoring data (destructive)",
+	Run:   runMonitorReset,
+}
+
+var monitorQueryCmd = &cobra.Command{
+	Use:   "query",
+	Short: "Interactive monitoring queries",
+	Run:   runMonitorQuery,
+}
+
+var monitorExportCmd = &cobra.Command{
+	Use:   "export",
+	Short: "Export monitoring configuration",
+	Run:   runMonitorExport,
+}
+
+var monitorSetupCmd = &cobra.Command{
+	Use:   "setup",
+	Short: "Install monitoring dependencies",
+	Run:   runMonitorSetup,
+}
+
+func init() {
+	monitorCmd.AddCommand(monitorUpCmd)
+	monitorCmd.AddCommand(monitorDevCmd)
+	monitorCmd.AddCommand(monitorDownCmd)
+	monitorCmd.AddCommand(monitorTestCmd)
+	monitorCmd.AddCommand(monitorStatusCmd)
+	monitorCmd.AddCommand(monitorResetCmd)
+	monitorCmd.AddCommand(monitorQueryCmd)
+	monitorCmd.AddCommand(monitorExportCmd)
+	monitorCmd.AddCommand(monitorSetupCmd)
+}
+
+func runMonitorUp(cmd *cobra.Command, args []string) {
+	printInfo("Starting monitoring stack...")
+
+	composeFile := filepath.Join("docker", "observability.yml")
+	if err := runDockerCompose(composeFile, "up", "-d"); err != nil {
+		printError("Failed to start monitoring stack: %v", err)
+		os.Exit(1)
+	}
+
+	printSuccess("Monitoring stack started")
+	fmt.Println()
+	fmt.Println("Dashboards:")
+	fmt.Println("  - Grafana:    http://localhost:3331 (admin/admin)")
+	fmt.Println("  - Prometheus: http://localhost:9090")
+	fmt.Println("  - Jaeger:     http://localhost:16686")
+}
+
+func runMonitorDev(cmd *cobra.Command, args []string) {
+	printInfo("Starting monitoring stack with AlwaysSample...")
+
+	// Set environment variable for full sampling
+	os.Setenv("OTEL_TRACES_SAMPLER", "always_on")
+
+	composeFile := filepath.Join("docker", "observability.yml")
+	if err := runDockerCompose(composeFile, "up", "-d"); err != nil {
+		printError("Failed to start monitoring stack: %v", err)
+		os.Exit(1)
+	}
+
+	printInfo("Waiting for services...")
+	time.Sleep(5 * time.Second)
+
+	// Wait for OTEL Collector health check
+	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+	defer cancel()
+
+	if err := waitForHealthCheck(ctx, "http://localhost:13133/", 2*time.Second); err != nil {
+		printWarning("OTEL Collector health check timeout")
+	} else {
+		printSuccess("Monitoring stack ready:")
+		fmt.Println("  - Prometheus: http://localhost:9090")
+		fmt.Println("  - Grafana: http://localhost:3331 (admin/admin)")
+		fmt.Println("  - Jaeger: http://localhost:16686")
+		fmt.Println("  - OTEL Collector: http://localhost:13133")
+	}
+}
+
+func runMonitorDown(cmd *cobra.Command, args []string) {
+	printInfo("Stopping monitoring stack...")
+
+	composeFile := filepath.Join("docker", "observability.yml")
+	if err := runDockerCompose(composeFile, "down"); err != nil {
+		printError("Failed to stop monitoring stack: %v", err)
+		os.Exit(1)
+	}
+
+	printSuccess("Monitoring stack stopped")
+	fmt.Println()
+	fmt.Println("To fully disable tracing, set ENABLE_TRACING=false in .env")
+}
+
+func runMonitorTest(cmd *cobra.Command, args []string) {
+	printInfo("Testing monitoring stack health...")
+	fmt.Println()
+
+	services := map[string]string{
+		"Prometheus":     "http://localhost:9090/-/healthy",
+		"Grafana":        "http://localhost:3331/api/health",
+		"OTEL Collector": "http://localhost:13133/",
+		"Jaeger":         "http://localhost:16686/",
+	}
+
+	allHealthy := true
+	for name, url := range services {
+		fmt.Printf("Testing %s...\n", name)
+		if err := checkHealth(url, 2*time.Second); err != nil {
+			printError("  %s unhealthy", name)
+			allHealthy = false
+		} else {
+			printSuccess("  %s healthy", name)
+		}
+	}
+
+	fmt.Println()
+	if allHealthy {
+		printSuccess("All monitoring services healthy")
+	} else {
+		printError("Some monitoring services are unhealthy")
+		os.Exit(1)
+	}
+}
+
+func runMonitorStatus(cmd *cobra.Command, args []string) {
+	fmt.Println("=== Monitoring Stack Status ===")
+	fmt.Println()
+
+	composeFile := filepath.Join("docker", "observability.yml")
+	if err := runDockerCompose(composeFile, "ps"); err != nil {
+		printError("Failed to get status: %v", err)
+		os.Exit(1)
+	}
+
+	fmt.Println()
+	fmt.Println("=== Resource Usage ===")
+
+	// Run docker stats for monitoring containers
+	statsCmd := exec.Command("docker", "stats", "--no-stream",
+		"--format", "table {{.Name}}\\t{{.CPUPerc}}\\t{{.MemUsage}}",
+		"otel-collector", "prometheus", "grafana", "jaeger")
+	statsCmd.Stdout = os.Stdout
+	statsCmd.Stderr = os.Stderr
+
+	if err := statsCmd.Run(); err != nil {
+		printWarning("Could not retrieve resource usage (containers may not be running)")
+	}
+}
+
+func runMonitorReset(cmd *cobra.Command, args []string) {
+	fmt.Print("⚠️  Delete all Prometheus/Jaeger data? [y/N]: ")
+
+	reader := bufio.NewReader(os.Stdin)
+	response, err := reader.ReadString('\n')
+	if err != nil {
+		printError("Failed to read input: %v", err)
+		os.Exit(1)
+	}
+
+	response = strings.TrimSpace(strings.ToLower(response))
+	if response != "y" && response != "yes" {
+		fmt.Println("Aborted")
+		return
+	}
+
+	composeFile := filepath.Join("docker", "observability.yml")
+	if err := runDockerCompose(composeFile, "down", "-v"); err != nil {
+		printError("Failed to reset monitoring data: %v", err)
+		os.Exit(1)
+	}
+
+	printSuccess("Monitoring data cleared")
+}
+
+func runMonitorQuery(cmd *cobra.Command, args []string) {
+	fmt.Println("Select query:")
+	fmt.Println("1) Recent traces for service")
+	fmt.Println("2) Metric current value")
+	fmt.Println("3) Alert rules status")
+	fmt.Print("Choice [1-3]: ")
+
+	reader := bufio.NewReader(os.Stdin)
+	choice, _ := reader.ReadString('\n')
+	choice = strings.TrimSpace(choice)
+
+	switch choice {
+	case "1":
+		fmt.Print("Service name: ")
+		service, _ := reader.ReadString('\n')
+		service = strings.TrimSpace(service)
+		queryTraces(service)
+	case "2":
+		fmt.Print("Metric name: ")
+		metric, _ := reader.ReadString('\n')
+		metric = strings.TrimSpace(metric)
+		queryMetric(metric)
+	case "3":
+		queryAlertRules()
+	default:
+		printError("Invalid choice")
+	}
+}
+
+func runMonitorExport(cmd *cobra.Command, args []string) {
+	printInfo("Exporting monitoring configs...")
+
+	exportDir := filepath.Join("exports", "monitoring")
+	if err := os.MkdirAll(exportDir, 0755); err != nil {
+		printError("Failed to create export directory: %v", err)
+		os.Exit(1)
+	}
+
+	// Export docker-compose config
+	composeFile := filepath.Join("docker", "observability.yml")
+	exportFile := filepath.Join(exportDir, "docker-compose.yml")
+
+	configCmd := exec.Command("docker", "compose", "-f", composeFile, "config")
+	output, err := configCmd.Output()
+	if err != nil {
+		printError("Failed to export compose config: %v", err)
+		os.Exit(1)
+	}
+
+	if err := os.WriteFile(exportFile, output, 0644); err != nil {
+		printError("Failed to write compose config: %v", err)
+		os.Exit(1)
+	}
+
+	// Copy monitoring config files
+	filesToCopy := []struct {
+		src string
+		dst string
+	}{
+		{"monitoring/prometheus.yml", filepath.Join(exportDir, "prometheus.yml")},
+		{"monitoring/otel-collector.yaml", filepath.Join(exportDir, "otel-collector.yaml")},
+		{"monitoring/prometheus-alerts.yml", filepath.Join(exportDir, "prometheus-alerts.yml")},
+	}
+
+	for _, f := range filesToCopy {
+		if err := copyFile(f.src, f.dst); err != nil {
+			printWarning("Could not copy %s: %v", f.src, err)
+		}
+	}
+
+	printSuccess("Configs exported to %s", exportDir)
+}
+
+func runMonitorSetup(cmd *cobra.Command, args []string) {
+	printInfo("Installing monitoring dependencies...")
+	fmt.Println()
+
+	// Check Go installation
+	if _, err := exec.LookPath("go"); err != nil {
+		printError("Go is not installed. Please install Go 1.21+ first.")
+		os.Exit(1)
+	}
+	printSuccess("Go found: %s", getGoVersion())
+
+	// Install OpenTelemetry dependencies
+	printInfo("Installing OpenTelemetry dependencies...")
+
+	dependencies := []string{
+		"go.opentelemetry.io/otel@v1.21.0",
+		"go.opentelemetry.io/otel/sdk@v1.21.0",
+		"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp@v1.21.0",
+		"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp@v0.44.0",
+		"go.opentelemetry.io/otel/trace@v1.21.0",
+		"go.opentelemetry.io/otel/metric@v1.21.0",
+		"go.opentelemetry.io/otel/propagation@v1.21.0",
+		"go.opentelemetry.io/otel/semconv/v1.21.0@v1.21.0",
+		"github.com/stretchr/testify@v1.8.4",
+	}
+
+	for _, dep := range dependencies {
+		if err := runCommand("go", "get", dep); err != nil {
+			printWarning("Failed to install %s", dep)
+		}
+	}
+
+	printSuccess("Dependencies installed")
+
+	// Tidy go.mod
+	printInfo("Tidying go.mod...")
+	if err := runCommand("go", "mod", "tidy"); err != nil {
+		printError("Failed to tidy go.mod: %v", err)
+		os.Exit(1)
+	}
+	printSuccess("go.mod tidied")
+
+	// Run sanitizer tests
+	printInfo("Running sanitizer tests...")
+	testCmd := exec.Command("go", "test", "-v", "./pkg/telemetry/...")
+	testCmd.Stdout = os.Stdout
+	testCmd.Stderr = os.Stderr
+
+	if err := testCmd.Run(); err != nil {
+		printWarning("Sanitizer tests failed (may need service-specific imports)")
+	} else {
+		printSuccess("Sanitizer tests passed")
+	}
+
+	// Verify monitoring files
+	printInfo("Verifying monitoring files...")
+
+	requiredFiles := []string{
+		"pkg/observability/config.go",
+		"pkg/observability/provider.go",
+		"pkg/observability/attributes.go",
+		"pkg/observability/middleware/http.go",
+		"pkg/observability/worker/worker.go",
+		"pkg/telemetry/sanitizer.go",
+		"pkg/telemetry/sanitizer_test.go",
+		"monitoring/prometheus-alerts.yml",
+		"monitoring/otel-collector.yaml",
+	}
+
+	missing := 0
+	for _, file := range requiredFiles {
+		if _, err := os.Stat(file); os.IsNotExist(err) {
+			printError("  ✗ %s (missing)", file)
+			missing++
+		} else {
+			printSuccess("  ✓ %s", file)
+		}
+	}
+
+	fmt.Println()
+	if missing > 0 {
+		printError("%d files missing. Please verify implementation.", missing)
+		os.Exit(1)
+	}
+
+	printSuccess("All monitoring files present")
+
+	// Check Docker
+	printInfo("Checking Docker setup...")
+	if _, err := exec.LookPath("docker"); err != nil {
+		printWarning("Docker not found. Monitoring stack requires Docker.")
+	} else {
+		printSuccess("Docker found: %s", getDockerVersion())
+
+		// Check Docker Compose
+		if err := runCommand("docker", "compose", "version"); err != nil {
+			printWarning("Docker Compose V2 not found. Monitoring stack requires Docker Compose V2.")
+		} else {
+			printSuccess("Docker Compose V2 found")
+		}
+	}
+
+	fmt.Println()
+	printSuccess("Setup complete!")
+	fmt.Println()
+	fmt.Println("Next steps:")
+	fmt.Println("1. Start monitoring stack:  jan-cli monitor dev")
+	fmt.Println("2. Test monitoring health:  jan-cli monitor test")
+	fmt.Println("3. Integrate into services: See MONITORING_IMPLEMENTATION.md")
+}
+
+// Helper functions
+
+func runDockerCompose(composeFile string, args ...string) error {
+	cmdArgs := append([]string{"compose", "-f", composeFile}, args...)
+	cmd := exec.Command("docker", cmdArgs...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func waitForHealthCheck(ctx context.Context, url string, interval time.Duration) error {
+	ticker := time.NewTicker(interval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		case <-ticker.C:
+			if err := checkHealth(url, 2*time.Second); err == nil {
+				return nil
+			}
+		}
+	}
+}
+
+func checkHealth(url string, timeout time.Duration) error {
+	client := &http.Client{Timeout: timeout}
+	resp, err := client.Get(url)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 200 && resp.StatusCode < 500 {
+		return nil
+	}
+
+	return fmt.Errorf("unhealthy status: %d", resp.StatusCode)
+}
+
+func queryTraces(service string) {
+	url := fmt.Sprintf("http://localhost:16686/api/traces?service=%s&limit=10", service)
+
+	client := &http.Client{Timeout: 5 * time.Second}
+	resp, err := client.Get(url)
+	if err != nil {
+		printError("Failed to query Jaeger: %v", err)
+		return
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		printError("Failed to read response: %v", err)
+		return
+	}
+
+	var result map[string]interface{}
+	if err := json.Unmarshal(body, &result); err != nil {
+		printError("Failed to parse response: %v", err)
+		return
+	}
+
+	prettyJSON, _ := json.MarshalIndent(result, "", "  ")
+	fmt.Println(string(prettyJSON))
+}
+
+func queryMetric(metric string) {
+	url := fmt.Sprintf("http://localhost:9090/api/v1/query?query=%s", metric)
+
+	client := &http.Client{Timeout: 5 * time.Second}
+	resp, err := client.Get(url)
+	if err != nil {
+		printError("Failed to query Prometheus: %v", err)
+		return
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		printError("Failed to read response: %v", err)
+		return
+	}
+
+	var result map[string]interface{}
+	if err := json.Unmarshal(body, &result); err != nil {
+		printError("Failed to parse response: %v", err)
+		return
+	}
+
+	prettyJSON, _ := json.MarshalIndent(result, "", "  ")
+	fmt.Println(string(prettyJSON))
+}
+
+func queryAlertRules() {
+	url := "http://localhost:9090/api/v1/rules"
+
+	client := &http.Client{Timeout: 5 * time.Second}
+	resp, err := client.Get(url)
+	if err != nil {
+		printError("Failed to query Prometheus: %v", err)
+		return
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		printError("Failed to read response: %v", err)
+		return
+	}
+
+	var result map[string]interface{}
+	if err := json.Unmarshal(body, &result); err != nil {
+		printError("Failed to parse response: %v", err)
+		return
+	}
+
+	prettyJSON, _ := json.MarshalIndent(result, "", "  ")
+	fmt.Println(string(prettyJSON))
+}
+
+func runCommand(name string, args ...string) error {
+	cmd := exec.Command(name, args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	return cmd.Run()
+}
+
+func getGoVersion() string {
+	cmd := exec.Command("go", "version")
+	output, err := cmd.Output()
+	if err != nil {
+		return "unknown"
+	}
+	return strings.TrimSpace(string(output))
+}
+
+func getDockerVersion() string {
+	cmd := exec.Command("docker", "--version")
+	output, err := cmd.Output()
+	if err != nil {
+		return "unknown"
+	}
+	return strings.TrimSpace(string(output))
+}
+
+func printSuccess(format string, args ...interface{}) {
+	prefix := "✓"
+	if runtime.GOOS == "windows" {
+		prefix = "[OK]"
+	}
+	fmt.Printf("%s %s\n", prefix, fmt.Sprintf(format, args...))
+}
+
+func printError(format string, args ...interface{}) {
+	prefix := "✗"
+	if runtime.GOOS == "windows" {
+		prefix = "[ERROR]"
+	}
+	fmt.Fprintf(os.Stderr, "%s %s\n", prefix, fmt.Sprintf(format, args...))
+}
+
+func printWarning(format string, args ...interface{}) {
+	prefix := "⚠"
+	if runtime.GOOS == "windows" {
+		prefix = "[WARNING]"
+	}
+	fmt.Printf("%s %s\n", prefix, fmt.Sprintf(format, args...))
+}
+
+func printInfo(format string, args ...interface{}) {
+	fmt.Printf("%s\n", fmt.Sprintf(format, args...))
+}
diff --git a/cmd/jan-cli/cmd_service.go b/cmd/jan-cli/cmd_service.go
new file mode 100644
index 00000000..1327d7ca
--- /dev/null
+++ b/cmd/jan-cli/cmd_service.go
@@ -0,0 +1,119 @@
+﻿package main
+
+import (
+	"fmt"
+
+	"github.com/spf13/cobra"
+)
+
+var serviceCmd = &cobra.Command{
+	Use:   "service",
+	Short: "Service operations",
+	Long:  `Manage Jan Server services - list, start, stop, logs, and status.`,
+}
+
+var serviceListCmd = &cobra.Command{
+	Use:   "list",
+	Short: "List all services",
+	Long:  `List all available Jan Server services and their status.`,
+	RunE:  runServiceList,
+}
+
+var serviceLogsCmd = &cobra.Command{
+	Use:   "logs [service]",
+	Short: "Show service logs",
+	Long:  `Display logs for a specific service.`,
+	RunE:  runServiceLogs,
+	Args:  cobra.MinimumNArgs(1),
+}
+
+var serviceStatusCmd = &cobra.Command{
+	Use:   "status [service]",
+	Short: "Show service status",
+	Long:  `Display status information for a service.`,
+	RunE:  runServiceStatus,
+}
+
+func init() {
+	serviceCmd.AddCommand(serviceListCmd)
+	serviceCmd.AddCommand(serviceLogsCmd)
+	serviceCmd.AddCommand(serviceStatusCmd)
+
+	// logs flags
+	serviceLogsCmd.Flags().IntP("tail", "n", 100, "Number of lines to show")
+	serviceLogsCmd.Flags().BoolP("follow", "f", false, "Follow log output")
+}
+
+func runServiceList(cmd *cobra.Command, args []string) error {
+	fmt.Println("Available services:")
+	services := []struct {
+		Name string
+		Port string
+		Desc string
+	}{
+		{"llm-api", "8080", "LLM API - OpenAI-compatible chat completions"},
+		{"media-api", "8285", "Media API - File upload and management"},
+		{"response-api", "8082", "Response API - Multi-step orchestration"},
+		{"mcp-tools", "8091", "MCP Tools - Model Context Protocol tools"},
+	}
+
+	for _, svc := range services {
+		fmt.Printf("  %-15s :%s  %s\n", svc.Name, svc.Port, svc.Desc)
+	}
+
+	return nil
+}
+
+func runServiceLogs(cmd *cobra.Command, args []string) error {
+	service := args[0]
+	tail, _ := cmd.Flags().GetInt("tail")
+	follow, _ := cmd.Flags().GetBool("follow")
+
+	fmt.Printf("Showing logs for %s\n", service)
+	fmt.Println()
+
+	// Build docker compose logs command
+	cmdArgs := []string{"compose", "logs"}
+	if follow {
+		cmdArgs = append(cmdArgs, "-f")
+	}
+	cmdArgs = append(cmdArgs, "--tail", fmt.Sprintf("%d", tail))
+	cmdArgs = append(cmdArgs, service)
+
+	return execCommand("docker", cmdArgs...)
+}
+
+func runServiceStatus(cmd *cobra.Command, args []string) error {
+	service := ""
+	if len(args) > 0 {
+		service = args[0]
+	}
+
+	if service == "" {
+		// Check all services status
+		return execCommand("make", "health-check")
+	} else {
+		// Check specific service container status
+		fmt.Printf("Checking status for %s:\n", service)
+		fmt.Println()
+
+		// Check if container is running
+		if err := execCommand("docker", "compose", "ps", service); err != nil {
+			return err
+		}
+
+		// Try to check health endpoint based on service
+		healthURL := getHealthURL(service)
+		if healthURL != "" {
+			fmt.Printf("\nHealth endpoint: %s\n", healthURL)
+			if isWindows() {
+				execCommand("powershell", "-Command",
+					fmt.Sprintf("try { Invoke-WebRequest -Uri %s -UseBasicParsing -TimeoutSec 2 | Select-Object -ExpandProperty Content } catch { Write-Host 'Service not responding' }", healthURL))
+			} else {
+				execCommand("curl", "-sf", healthURL)
+			}
+		}
+
+		return nil
+	}
+}
diff --git a/cmd/jan-cli/cmd_setup.go b/cmd/jan-cli/cmd_setup.go
new file mode 100644
index 00000000..83e54f11
--- /dev/null
+++ b/cmd/jan-cli/cmd_setup.go
@@ -0,0 +1,749 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/spf13/cobra"
+)
+
+var setupAndRunCmd = &cobra.Command{
+	Use:   "setup-and-run",
+	Short: "Interactive setup and run Jan Server",
+	Long:  `Interactively configure environment variables and start Jan Server with all services.`,
+	RunE:  runSetupAndRun,
+}
+
+func init() {
+	setupAndRunCmd.Flags().Bool("skip-prompts", false, "Skip interactive prompts and use existing .env")
+	setupAndRunCmd.Flags().Bool("with-memory-tools", false, "Enable memory tools profile and defaults during setup")
+}
+
+func runSetupAndRun(cmd *cobra.Command, args []string) error {
+	skipPrompts, _ := cmd.Flags().GetBool("skip-prompts")
+	enableMemory, _ := cmd.Flags().GetBool("with-memory-tools")
+
+	fmt.Println("🚀 Jan Server Setup and Run")
+	fmt.Println("=" + strings.Repeat("=", 50))
+	fmt.Println()
+
+	// Check if .env exists
+	envPath := ".env"
+	envExists := false
+	if _, err := os.Stat(envPath); err == nil {
+		envExists = true
+	}
+
+	if !skipPrompts {
+		// Create or update .env file
+		if envExists {
+			fmt.Println("✓ Found existing .env file")
+			fmt.Print("Do you want to update it? (y/N): ")
+			reader := bufio.NewReader(os.Stdin)
+			response, _ := reader.ReadString('\n')
+			response = strings.TrimSpace(strings.ToLower(response))
+
+			// Default is No for updating existing config
+			if response != "y" && response != "yes" {
+				fmt.Println("Using existing .env file...")
+			} else {
+				if err := promptForEnvVars(envPath, enableMemory); err != nil {
+					return fmt.Errorf("failed to update .env: %w", err)
+				}
+			}
+		} else {
+			fmt.Println("📝 Creating .env file...")
+			// Copy template
+			if err := copyEnvTemplate(envPath); err != nil {
+				return fmt.Errorf("failed to copy .env template: %w", err)
+			}
+
+			if err := promptForEnvVars(envPath, enableMemory); err != nil {
+				return fmt.Errorf("failed to configure .env: %w", err)
+			}
+		}
+	} else if !envExists {
+		// Skip prompts but no .env exists - copy template
+		fmt.Println("📝 Creating .env from template...")
+		if err := copyEnvTemplate(envPath); err != nil {
+			return fmt.Errorf("failed to copy .env template: %w", err)
+		}
+	}
+
+	if skipPrompts && enableMemory {
+		if err := applyMemoryDefaults(envPath); err != nil {
+			return fmt.Errorf("failed to enable memory tools defaults: %w", err)
+		}
+	}
+
+	fmt.Println()
+	fmt.Println("=" + strings.Repeat("=", 50))
+	fmt.Println("⚙️  Running setup...")
+	fmt.Println()
+
+	// Run dev setup
+	if err := execCommand("make", "setup"); err != nil {
+		return fmt.Errorf("setup failed: %w", err)
+	}
+
+	// Ask about monitoring setup
+	if !skipPrompts {
+		fmt.Println()
+		fmt.Println("=" + strings.Repeat("=", 50))
+		fmt.Println("📊 Monitoring Stack Setup (Optional)")
+		fmt.Println()
+		fmt.Println("Would you like to set up the monitoring stack?")
+		fmt.Println("This includes:")
+		fmt.Println("  • Prometheus (metrics)")
+		fmt.Println("  • Grafana (dashboards)")
+		fmt.Println("  • Jaeger (distributed tracing)")
+		fmt.Println("  • OpenTelemetry Collector")
+		fmt.Println()
+		fmt.Println()
+		fmt.Print("Set up monitoring? (y/N): ")
+
+		reader := bufio.NewReader(os.Stdin)
+		monitorResponse, _ := reader.ReadString('\n')
+		monitorResponse = strings.TrimSpace(strings.ToLower(monitorResponse))
+
+		// Default is No for monitoring (optional feature)
+		if monitorResponse == "y" || monitorResponse == "yes" {
+			fmt.Println()
+			fmt.Println("🔧 Installing monitoring dependencies...")
+
+			// Enable tracing in .env
+			if err := updateEnvVariable(envPath, "OTEL_ENABLED", "true"); err != nil {
+				fmt.Println("⚠️  Warning: Failed to enable OTEL_ENABLED in .env")
+			} else {
+				fmt.Println("✓ Enabled telemetry collection (OTEL_ENABLED=true)")
+			}
+
+			if err := execCommand("make", "monitor-up"); err != nil {
+				fmt.Println("⚠️  Warning: Failed to start monitoring stack")
+				fmt.Println("You can set it up later with: jan-cli monitor setup")
+			} else {
+				fmt.Println("✓ Monitoring stack started successfully!")
+				fmt.Println()
+				fmt.Println("Access monitoring dashboards:")
+				fmt.Println("  • Grafana:    http://localhost:3331 (admin/admin)")
+				fmt.Println("  • Prometheus: http://localhost:9090")
+				fmt.Println("  • Jaeger:     http://localhost:16686")
+			}
+		} else {
+			fmt.Println("⏭️  Skipping monitoring setup")
+			fmt.Println("You can set it up later with: jan-cli monitor setup")
+		}
+	}
+
+	fmt.Println()
+	fmt.Println("=" + strings.Repeat("=", 50))
+	fmt.Println("🐳 Starting Docker services...")
+	fmt.Println("This may take 1-2 minutes on first run...")
+	fmt.Println()
+
+	// Start services
+	if err := execCommand("make", "up-full"); err != nil {
+		// Docker compose up -d returns non-zero if services are already running
+		// Check if it's actually an error or just a warning
+		fmt.Println()
+		fmt.Println("Note: Some services may already be running")
+	}
+
+	fmt.Println()
+	fmt.Println("=" + strings.Repeat("=", 50))
+	fmt.Println("✅ Jan Server is starting!")
+	fmt.Println()
+	fmt.Println("Waiting for services to be ready (30 seconds)...")
+
+	// Wait for services to start - cross-platform
+	if isWindows() {
+		execCommandSilent("powershell", "-Command", "Start-Sleep -Seconds 30")
+	} else {
+		execCommandSilent("sleep", "30")
+	}
+
+	fmt.Println()
+	fmt.Println("Access your services:")
+	fmt.Println("  • API Gateway:      http://localhost:8000")
+	fmt.Println("  • API Docs:         http://localhost:8000/v1/swagger/")
+	fmt.Println("  • LLM API:          http://localhost:8080")
+	fmt.Println("  • Keycloak:         http://localhost:8085 (admin/admin)")
+
+	// Only show vLLM if using local provider
+	if os.Getenv("_USING_LOCAL_VLLM") == "true" {
+		fmt.Println("  • vLLM (Local):     http://localhost:8101")
+	}
+
+	fmt.Println()
+	fmt.Println("Get started:")
+	fmt.Println("  1. Get a token:     curl -X POST http://localhost:8000/llm/auth/guest-login")
+	fmt.Println("  2. Health check:    make health-check")
+	fmt.Println("  3. View logs:       make logs-llm-api")
+	fmt.Println("  4. Stop services:   make down")
+	fmt.Println()
+
+	return nil
+}
+
+func copyEnvTemplate(destPath string) error {
+	templatePath := ".env.template"
+
+	// Read template
+	data, err := os.ReadFile(templatePath)
+	if err != nil {
+		return fmt.Errorf("read template: %w", err)
+	}
+
+	// Write to destination
+	if err := os.WriteFile(destPath, data, 0644); err != nil {
+		return fmt.Errorf("write .env: %w", err)
+	}
+
+	return nil
+}
+
+func promptForEnvVars(envPath string, defaultEnableMemory bool) error {
+	reader := bufio.NewReader(os.Stdin)
+
+	fmt.Println()
+	fmt.Println("=== Configuration Wizard ===")
+	fmt.Println()
+
+	// Read current .env
+	if _, err := os.ReadFile(envPath); err != nil {
+		return fmt.Errorf("read .env: %w", err)
+	}
+
+	updates := make(map[string]string)
+
+	// 1. LLM Provider Configuration
+	fmt.Println("📦 LLM Provider Setup")
+	fmt.Println("Choose your LLM provider:")
+	fmt.Println("  1. Local vLLM (requires GPU, uses HuggingFace token)")
+	fmt.Println("  2. Remote API endpoint (OpenAI-compatible)")
+	fmt.Print("Enter choice [1/2] (default: 1): ")
+
+	providerChoice, _ := reader.ReadString('\n')
+	providerChoice = strings.TrimSpace(providerChoice)
+	if providerChoice == "" {
+		providerChoice = "1"
+	}
+
+	// Track which services to enable
+	useLocalVLLM := false
+	profiles := []string{"infra", "api", "mcp"} // Always include core services
+
+	if providerChoice == "1" {
+		// Local vLLM setup
+		fmt.Println()
+		fmt.Print("HF_TOKEN (get from https://huggingface.co/settings/tokens): ")
+		hfToken, _ := reader.ReadString('\n')
+		hfToken = strings.TrimSpace(hfToken)
+		if hfToken != "" {
+			updates["HF_TOKEN"] = hfToken
+		}
+
+		// Disable remote provider, enable local
+		updates["JAN_PROVIDER_CONFIGS"] = "true"
+		updates["JAN_DEFAULT_NODE_SETUP"] = "false"
+		profiles = append(profiles, "full") // Add vLLM
+		useLocalVLLM = true
+
+		fmt.Println("✓ Will use local vLLM with model: Qwen/Qwen2.5-0.5B-Instruct")
+	} else {
+		// Remote provider setup
+		fmt.Println()
+		fmt.Print("Remote API URL (e.g., https://api.openai.com/v1): ")
+		remoteURL, _ := reader.ReadString('\n')
+		remoteURL = strings.TrimSpace(remoteURL)
+
+		fmt.Print("API Key (press Enter if no key required): ")
+		apiKey, _ := reader.ReadString('\n')
+		apiKey = strings.TrimSpace(apiKey)
+
+		if remoteURL != "" {
+			updates["JAN_DEFAULT_NODE_SETUP"] = "true"
+			updates["JAN_DEFAULT_NODE_URL"] = remoteURL
+			updates["JAN_DEFAULT_NODE_API_KEY"] = apiKey
+			updates["JAN_PROVIDER_CONFIGS"] = "false"
+			updates["HF_TOKEN"] = "not_required_for_remote_provider"
+			// Note: infra, api, mcp already in profiles
+			fmt.Println("✓ Will use remote provider:", remoteURL)
+		}
+	}
+
+	// 2. MCP Search Tool Configuration
+	fmt.Println()
+	fmt.Println("🔍 MCP Search Tool Setup")
+	fmt.Println("Choose search provider for MCP tools:")
+	fmt.Println("  1. Serper (requires API key, Google search)")
+	fmt.Println("  2. SearXNG (local, no API key needed)")
+	fmt.Println("  3. None (disable MCP search, but Vector Store still available)")
+	fmt.Print("Enter choice [1/2/3] (default: 1): ")
+
+	searchChoice, _ := reader.ReadString('\n')
+	searchChoice = strings.TrimSpace(searchChoice)
+	if searchChoice == "" {
+		searchChoice = "1"
+	}
+
+	// MCP profile is already included in the profiles list
+
+	switch searchChoice {
+	case "1":
+		fmt.Println()
+		fmt.Print("SERPER_API_KEY (get from https://serper.dev): ")
+		serperKey, _ := reader.ReadString('\n')
+		serperKey = strings.TrimSpace(serperKey)
+		if serperKey != "" {
+			updates["SERPER_API_KEY"] = serperKey
+			updates["SEARCH_ENGINE"] = "serper"
+			fmt.Println("✓ Will use Serper for search")
+		} else {
+			fmt.Println("⚠️  No API key provided, falling back to SearXNG")
+			updates["SEARCH_ENGINE"] = "searxng"
+		}
+	case "2":
+		updates["SEARCH_ENGINE"] = "searxng"
+		updates["SERPER_API_KEY"] = "not_required_for_searxng"
+		fmt.Println("✓ Will use SearXNG (local) for search")
+	case "3":
+		updates["SEARCH_ENGINE"] = "none"
+		updates["SERPER_API_KEY"] = "mcp_search_disabled"
+		fmt.Println("✓ MCP search disabled (Vector Store still available)")
+	}
+
+	// 3. Memory Tools Configuration
+	fmt.Println()
+	fmt.Println("🧠 Memory Tools Setup")
+	fmt.Println("Enable memory tools for long-term context and retrieval.")
+	memoryPromptDefault := "Y/n"
+	if !defaultEnableMemory {
+		memoryPromptDefault = "y/N"
+	}
+	fmt.Printf("Enable memory tools? (%s): ", memoryPromptDefault)
+
+	memoryChoice, _ := reader.ReadString('\n')
+	memoryChoice = strings.TrimSpace(strings.ToLower(memoryChoice))
+
+	// Default based on defaultEnableMemory flag (Y/n or y/N)
+	enableMemory := defaultEnableMemory
+	if memoryChoice != "" {
+		enableMemory = memoryChoice != "n" && memoryChoice != "no"
+	}
+
+	externalEmbedding := false
+	useRedis := false
+	if enableMemory {
+		externalEmbedding, useRedis = configureMemoryOptions(reader, updates)
+	}
+	applyMemorySettings(updates, &profiles, enableMemory, externalEmbedding, useRedis)
+
+	// 4. Media API Configuration
+	fmt.Println()
+	fmt.Println("🖼️  Media API Setup")
+
+	// Media API with local storage only works when using local vLLM
+	if !useLocalVLLM {
+		fmt.Println("Note: Media API with local storage requires local vLLM deployment")
+		fmt.Println("      Only S3 storage is available with remote API providers")
+		fmt.Print("Enable Media API with S3 storage? (y/N): ")
+
+		mediaChoice, _ := reader.ReadString('\n')
+		mediaChoice = strings.TrimSpace(strings.ToLower(mediaChoice))
+
+		// Default is No for S3 with remote provider (requires credentials)
+		if mediaChoice == "y" || mediaChoice == "yes" {
+			updates["MEDIA_API_ENABLED"] = "true"
+			updates["MEDIA_STORAGE_BACKEND"] = "s3"
+
+			fmt.Println()
+			fmt.Println("S3-compatible storage configuration:")
+			fmt.Println("(Press Enter to use default Menlo AI settings)")
+
+			fmt.Print("S3 Endpoint URL (default: https://s3.menlo.ai): ")
+			s3Endpoint, _ := reader.ReadString('\n')
+			s3Endpoint = strings.TrimSpace(s3Endpoint)
+			if s3Endpoint == "" {
+				s3Endpoint = "https://s3.menlo.ai"
+			}
+			updates["MEDIA_S3_ENDPOINT"] = s3Endpoint
+
+			fmt.Print("S3 Bucket name (default: platform-dev): ")
+			s3Bucket, _ := reader.ReadString('\n')
+			s3Bucket = strings.TrimSpace(s3Bucket)
+			if s3Bucket == "" {
+				s3Bucket = "platform-dev"
+			}
+			updates["MEDIA_S3_BUCKET"] = s3Bucket
+
+			fmt.Print("S3 Access Key ID (default: 7N33WPTUI1KN99MFILQS): ")
+			s3AccessKey, _ := reader.ReadString('\n')
+			s3AccessKey = strings.TrimSpace(s3AccessKey)
+			if s3AccessKey == "" {
+				s3AccessKey = "7N33WPTUI1KN99MFILQS"
+			}
+			updates["MEDIA_S3_ACCESS_KEY_ID"] = s3AccessKey
+
+			fmt.Print("S3 Secret Access Key (default: ppxQsHpnfDSewYZD065aGjQeEQ0nTFA7c2aHNPz5): ")
+			s3SecretKey, _ := reader.ReadString('\n')
+			s3SecretKey = strings.TrimSpace(s3SecretKey)
+			if s3SecretKey == "" {
+				s3SecretKey = "ppxQsHpnfDSewYZD065aGjQeEQ0nTFA7c2aHNPz5"
+			}
+			updates["MEDIA_S3_SECRET_ACCESS_KEY"] = s3SecretKey
+
+			fmt.Print("S3 Region (default: us-west-2): ")
+			s3Region, _ := reader.ReadString('\n')
+			s3Region = strings.TrimSpace(s3Region)
+			if s3Region == "" {
+				s3Region = "us-west-2"
+			}
+			updates["MEDIA_S3_REGION"] = s3Region
+
+			// Set media API URLs
+			updates["MEDIA_API_URL"] = "http://media-api:8285"
+			updates["MEDIA_RESOLVE_URL"] = "http://media-api:8285/v1/media/resolve"
+
+			fmt.Println("✓ Media API enabled with S3 storage")
+		} else {
+			updates["MEDIA_API_ENABLED"] = "false"
+			fmt.Println("✓ Media API disabled")
+		}
+	} else {
+		// Local vLLM - offer both storage options
+		fmt.Print("Enable Media API? (Y/n): ")
+
+		mediaChoice, _ := reader.ReadString('\n')
+		mediaChoice = strings.TrimSpace(strings.ToLower(mediaChoice))
+
+		// Default is Yes for Media API with local vLLM
+		if mediaChoice == "n" || mediaChoice == "no" {
+			updates["MEDIA_API_ENABLED"] = "false"
+			fmt.Println("✓ Media API disabled")
+		} else {
+			updates["MEDIA_API_ENABLED"] = "true"
+
+			// Ask for storage backend
+			fmt.Println()
+			fmt.Println("Choose Media storage backend:")
+			fmt.Println("  1. Local file system (default, stores files locally)")
+			fmt.Println("  2. S3-compatible storage (requires credentials)")
+			fmt.Print("Enter choice [1/2] (default: 1): ")
+
+			storageChoice, _ := reader.ReadString('\n')
+			storageChoice = strings.TrimSpace(storageChoice)
+			if storageChoice == "" {
+				storageChoice = "1"
+			}
+
+			if storageChoice == "2" {
+				// S3 Configuration
+				updates["MEDIA_STORAGE_BACKEND"] = "s3"
+
+				fmt.Println()
+				fmt.Println("S3-compatible storage configuration:")
+				fmt.Println("(Press Enter to use default Menlo AI settings)")
+
+				fmt.Print("S3 Endpoint URL (default: https://s3.menlo.ai): ")
+				s3Endpoint, _ := reader.ReadString('\n')
+				s3Endpoint = strings.TrimSpace(s3Endpoint)
+				if s3Endpoint == "" {
+					s3Endpoint = "https://s3.menlo.ai"
+				}
+				updates["MEDIA_S3_ENDPOINT"] = s3Endpoint
+
+				fmt.Print("S3 Bucket name (default: platform-dev): ")
+				s3Bucket, _ := reader.ReadString('\n')
+				s3Bucket = strings.TrimSpace(s3Bucket)
+				if s3Bucket == "" {
+					s3Bucket = "platform-dev"
+				}
+				updates["MEDIA_S3_BUCKET"] = s3Bucket
+
+				fmt.Print("S3 Access Key ID (default: 7N33WPTUI1KN99MFILQS): ")
+				s3AccessKey, _ := reader.ReadString('\n')
+				s3AccessKey = strings.TrimSpace(s3AccessKey)
+				if s3AccessKey == "" {
+					s3AccessKey = "7N33WPTUI1KN99MFILQS"
+				}
+				updates["MEDIA_S3_ACCESS_KEY_ID"] = s3AccessKey
+
+				fmt.Print("S3 Secret Access Key (default: ppxQsHpnfDSewYZD065aGjQeEQ0nTFA7c2aHNPz5): ")
+				s3SecretKey, _ := reader.ReadString('\n')
+				s3SecretKey = strings.TrimSpace(s3SecretKey)
+				if s3SecretKey == "" {
+					s3SecretKey = "ppxQsHpnfDSewYZD065aGjQeEQ0nTFA7c2aHNPz5"
+				}
+				updates["MEDIA_S3_SECRET_ACCESS_KEY"] = s3SecretKey
+
+				fmt.Print("S3 Region (default: us-west-2): ")
+				s3Region, _ := reader.ReadString('\n')
+				s3Region = strings.TrimSpace(s3Region)
+				if s3Region == "" {
+					s3Region = "us-west-2"
+				}
+				updates["MEDIA_S3_REGION"] = s3Region
+
+				fmt.Println("✓ Media API enabled with S3 storage")
+			} else {
+				// Local file system storage (default)
+				updates["MEDIA_STORAGE_BACKEND"] = "local"
+				updates["MEDIA_LOCAL_STORAGE_PATH"] = "./media-data"
+				updates["MEDIA_LOCAL_STORAGE_BASE_URL"] = "http://localhost:8285/v1/files"
+				fmt.Println("✓ Media API enabled with local file system storage")
+			}
+
+			// Set media API URLs (common for both backends)
+			updates["MEDIA_API_URL"] = "http://media-api:8285"
+			updates["MEDIA_RESOLVE_URL"] = "http://media-api:8285/v1/media/resolve"
+		}
+	}
+
+	// Apply all updates
+	fmt.Println()
+
+	// Ensure Keycloak URLs are properly set for browser access
+	if _, exists := updates["KEYCLOAK_PUBLIC_URL"]; !exists {
+		updates["KEYCLOAK_PUBLIC_URL"] = "http://localhost:8085"
+	}
+	if _, exists := updates["KEYCLOAK_ADMIN_URL"]; !exists {
+		updates["KEYCLOAK_ADMIN_URL"] = "http://localhost:8085"
+	}
+	if _, exists := updates["KEYCLOAK_BASE_URL"]; !exists {
+		updates["KEYCLOAK_BASE_URL"] = "http://keycloak:8085"
+	}
+	if _, exists := updates["ISSUER"]; !exists {
+		updates["ISSUER"] = "http://localhost:8085/realms/jan"
+	}
+
+	// Set COMPOSE_PROFILES based on enabled services
+	if len(profiles) > 0 {
+		updates["COMPOSE_PROFILES"] = strings.Join(profiles, ",")
+	}
+
+	// Store provider choice for return value (used later for conditional output)
+	if useLocalVLLM {
+		updates["_USING_LOCAL_VLLM"] = "true"
+	}
+
+	if len(updates) > 0 {
+		if err := applyEnvUpdates(envPath, updates); err != nil {
+			return err
+		}
+
+		fmt.Println("✓ Configuration saved to .env")
+	} else {
+		fmt.Println("✓ No changes made")
+	}
+
+	// Check if using local vLLM (look in updates or re-read from env)
+	data, _ := os.ReadFile(envPath)
+	if strings.Contains(string(data), "COMPOSE_PROFILES=full") {
+		os.Setenv("_USING_LOCAL_VLLM", "true")
+	}
+
+	return nil
+}
+
+func applyEnvUpdates(envPath string, updates map[string]string) error {
+	if len(updates) == 0 {
+		return nil
+	}
+
+	data, err := os.ReadFile(envPath)
+	if err != nil {
+		return fmt.Errorf("read .env: %w", err)
+	}
+
+	lines := strings.Split(string(data), "\n")
+	pending := make(map[string]string, len(updates))
+	for key, value := range updates {
+		pending[key] = value
+	}
+
+	for i, line := range lines {
+		trimmed := strings.TrimSpace(line)
+		if strings.HasPrefix(trimmed, "#") || trimmed == "" {
+			continue
+		}
+
+		for key, value := range pending {
+			if strings.HasPrefix(trimmed, key+"=") {
+				lines[i] = fmt.Sprintf("%s=%s", key, value)
+				delete(pending, key)
+			}
+		}
+	}
+
+	for key, value := range pending {
+		lines = append(lines, fmt.Sprintf("%s=%s", key, value))
+	}
+
+	newContent := strings.Join(lines, "\n")
+	if err := os.WriteFile(envPath, []byte(newContent), 0644); err != nil {
+		return fmt.Errorf("write .env: %w", err)
+	}
+
+	return nil
+}
+
+func applyMemoryDefaults(envPath string) error {
+	data, err := os.ReadFile(envPath)
+	if err != nil {
+		return fmt.Errorf("read .env: %w", err)
+	}
+
+	profiles := parseProfiles(strings.Split(string(data), "\n"))
+	updates := make(map[string]string)
+	setMemoryDefaults(updates, &profiles, false, false)
+	if len(profiles) > 0 {
+		updates["COMPOSE_PROFILES"] = strings.Join(profiles, ",")
+	}
+
+	return applyEnvUpdates(envPath, updates)
+}
+
+func applyMemorySettings(updates map[string]string, profiles *[]string, enable bool, externalEmbedding bool, useRedis bool) {
+	if enable {
+		setMemoryDefaults(updates, profiles, externalEmbedding, useRedis)
+		fmt.Println("Memory tools enabled (profile: memory)")
+	} else {
+		updates["MEMORY_TOOLS_ENABLED"] = "false"
+		fmt.Println("Memory tools disabled (enable later by editing .env)")
+	}
+}
+
+func setMemoryDefaults(updates map[string]string, profiles *[]string, externalEmbedding bool, useRedis bool) {
+	if profiles != nil {
+		hasMemory := false
+		hasMock := false
+		hasRedis := false
+		for _, profile := range *profiles {
+			if profile == "memory" {
+				hasMemory = true
+			}
+			if profile == "memory-mock" {
+				hasMock = true
+			}
+			if profile == "memory-redis" {
+				hasRedis = true
+			}
+		}
+		if !hasMemory {
+			*profiles = append(*profiles, "memory")
+		}
+		if !externalEmbedding && !hasMock {
+			*profiles = append(*profiles, "memory-mock")
+		}
+		if useRedis && !hasRedis {
+			*profiles = append(*profiles, "memory-redis")
+		}
+	}
+
+	if _, exists := updates["MEMORY_TOOLS_PORT"]; !exists {
+		updates["MEMORY_TOOLS_PORT"] = "8090"
+	}
+
+	if !externalEmbedding && updates["EMBEDDING_SERVICE_URL"] == "" {
+		updates["EMBEDDING_SERVICE_URL"] = "http://bge-m3:8091"
+	}
+
+	updates["MEMORY_TOOLS_ENABLED"] = "true"
+	updates["EMBEDDING_CACHE_TYPE"] = "memory"
+	updates["PROMPT_ORCHESTRATION_MEMORY"] = "true"
+}
+
+func configureMemoryOptions(reader *bufio.Reader, updates map[string]string) (bool, bool) {
+	fmt.Println()
+	fmt.Println("Memory Embedding Service")
+	fmt.Println("Use the built-in BGE-M3 mock (default) or point to your own embedding endpoint.")
+	fmt.Print("Custom embedding service URL (leave blank for http://bge-m3:8091): ")
+	customURL, _ := reader.ReadString('\n')
+	customURL = strings.TrimSpace(customURL)
+	external := false
+	if customURL != "" {
+		updates["EMBEDDING_SERVICE_URL"] = customURL
+		external = true
+	} else if _, exists := updates["EMBEDDING_SERVICE_URL"]; !exists {
+		updates["EMBEDDING_SERVICE_URL"] = "http://bge-m3:8091"
+	}
+
+	fmt.Println()
+	fmt.Println("Embedding Cache")
+	fmt.Println("Choose Redis for shared cache or in-memory for simplicity.")
+	fmt.Print("Use Redis cache? (y/N): ")
+	cacheChoice, _ := reader.ReadString('\n')
+	cacheChoice = strings.TrimSpace(strings.ToLower(cacheChoice))
+	// Default is No for Redis (in-memory is simpler for getting started)
+	useRedis := false
+	if cacheChoice == "y" || cacheChoice == "yes" {
+		updates["EMBEDDING_CACHE_TYPE"] = "redis"
+		fmt.Print("Redis URL (default: redis://redis-memory:6379/3): ")
+		redisURL, _ := reader.ReadString('\n')
+		redisURL = strings.TrimSpace(redisURL)
+		if redisURL == "" {
+			redisURL = "redis://redis-memory:6379/3"
+		}
+		updates["EMBEDDING_CACHE_REDIS_URL"] = redisURL
+		useRedis = true
+	} else {
+		updates["EMBEDDING_CACHE_TYPE"] = "memory"
+	}
+
+	return external, useRedis
+}
+
+func updateEnvVariable(envPath, key, value string) error {
+	// Read current .env
+	data, err := os.ReadFile(envPath)
+	if err != nil {
+		return fmt.Errorf("read .env: %w", err)
+	}
+
+	lines := strings.Split(string(data), "\n")
+	found := false
+
+	// Update existing line or add new one
+	for i, line := range lines {
+		trimmed := strings.TrimSpace(line)
+		// Skip comments
+		if strings.HasPrefix(trimmed, "#") {
+			continue
+		}
+
+		if strings.HasPrefix(trimmed, key+"=") {
+			lines[i] = fmt.Sprintf("%s=%s", key, value)
+			found = true
+			break
+		}
+	}
+
+	// If not found, append
+	if !found {
+		lines = append(lines, fmt.Sprintf("%s=%s", key, value))
+	}
+
+	// Write back
+	newContent := strings.Join(lines, "\n")
+	if err := os.WriteFile(envPath, []byte(newContent), 0644); err != nil {
+		return fmt.Errorf("write .env: %w", err)
+	}
+
+	return nil
+}
+
+func parseProfiles(lines []string) []string {
+	for _, line := range lines {
+		trimmed := strings.TrimSpace(line)
+		if strings.HasPrefix(trimmed, "COMPOSE_PROFILES=") {
+			value := strings.TrimPrefix(trimmed, "COMPOSE_PROFILES=")
+			if value != "" {
+				return strings.Split(value, ",")
+			}
+		}
+	}
+	return []string{"infra", "api", "mcp"}
+}
diff --git a/cmd/jan-cli/cmd_swagger.go b/cmd/jan-cli/cmd_swagger.go
new file mode 100644
index 00000000..d436cb9a
--- /dev/null
+++ b/cmd/jan-cli/cmd_swagger.go
@@ -0,0 +1,248 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/spf13/cobra"
+)
+
+var swaggerCmd = &cobra.Command{
+	Use:   "swagger",
+	Short: "Swagger documentation management",
+	Long:  `Generate and combine Swagger/OpenAPI documentation for Jan Server services.`,
+}
+
+var swaggerGenerateCmd = &cobra.Command{
+	Use:   "generate",
+	Short: "Generate Swagger documentation",
+	Long:  `Generate Swagger documentation for all services or a specific service.`,
+	RunE:  runSwaggerGenerate,
+}
+
+var swaggerCombineCmd = &cobra.Command{
+	Use:   "combine",
+	Short: "Combine Swagger specs",
+	Long:  `Combine multiple Swagger specifications into a single unified spec.`,
+	RunE:  runSwaggerCombine,
+}
+
+func init() {
+	swaggerCmd.AddCommand(swaggerGenerateCmd)
+	swaggerCmd.AddCommand(swaggerCombineCmd)
+
+	// generate flags
+	swaggerGenerateCmd.Flags().StringP("service", "s", "", "Generate for specific service (llm-api, media-api, response-api, mcp-tools)")
+	swaggerGenerateCmd.Flags().Bool("combine", false, "Combine specs after generation")
+}
+
+func runSwaggerGenerate(cmd *cobra.Command, args []string) error {
+	service, _ := cmd.Flags().GetString("service")
+	combine, _ := cmd.Flags().GetBool("combine")
+
+	fmt.Println("Generating Swagger documentation...")
+	fmt.Println()
+
+	services := []string{"llm-api", "media-api", "response-api", "mcp-tools"}
+	if service != "" {
+		services = []string{service}
+	}
+
+	for _, svc := range services {
+		if err := generateSwaggerForService(svc); err != nil {
+			return fmt.Errorf("failed to generate swagger for %s: %w", svc, err)
+		}
+	}
+
+	if combine && service == "" {
+		fmt.Println()
+		fmt.Println("Combining Swagger specs...")
+		if err := runSwaggerCombine(cmd, args); err != nil {
+			return err
+		}
+	}
+
+	fmt.Println()
+	fmt.Println("✓ Swagger documentation generated successfully!")
+	return nil
+}
+
+func generateSwaggerForService(service string) error {
+	fmt.Printf("Generating swagger for %s...\n", service)
+
+	serviceDir := filepath.Join("services", service)
+	if _, err := os.Stat(serviceDir); os.IsNotExist(err) {
+		return fmt.Errorf("service directory not found: %s", serviceDir)
+	}
+
+	var swaggerArgs []string
+	switch service {
+	case "llm-api":
+		swaggerArgs = []string{
+			"run", "github.com/swaggo/swag/cmd/swag@v1.8.12", "init",
+			"--dir", "./cmd/server,./internal/interfaces/httpserver/routes",
+			"--generalInfo", "server.go",
+			"--output", "./docs/swagger",
+			"--parseDependency",
+			"--parseInternal",
+		}
+	case "media-api":
+		swaggerArgs = []string{
+			"run", "github.com/swaggo/swag/cmd/swag@v1.8.12", "init",
+			"--dir", "./cmd/server,./internal/interfaces/httpserver/handlers,./internal/interfaces/httpserver/routes/v1",
+			"--generalInfo", "server.go",
+			"--output", "./docs/swagger",
+			"--parseDependency",
+			"--parseInternal",
+		}
+	case "response-api":
+		swaggerArgs = []string{
+			"run", "github.com/swaggo/swag/cmd/swag@v1.8.12", "init",
+			"--dir", "./cmd/server,./internal/interfaces/httpserver/handlers,./internal/interfaces/httpserver/routes/v1",
+			"--generalInfo", "server.go",
+			"--output", "./docs/swagger",
+			"--parseDependency",
+			"--parseInternal",
+		}
+	case "mcp-tools":
+		swaggerArgs = []string{
+			"run", "github.com/swaggo/swag/cmd/swag@v1.8.12", "init",
+			"--dir", ".",
+			"--generalInfo", "main.go",
+			"--output", "./docs/swagger",
+			"--parseDependency",
+			"--parseInternal",
+		}
+	default:
+		return fmt.Errorf("unknown service: %s", service)
+	}
+
+	// Change to service directory
+	originalDir, err := os.Getwd()
+	if err != nil {
+		return err
+	}
+	defer os.Chdir(originalDir)
+
+	if err := os.Chdir(serviceDir); err != nil {
+		return err
+	}
+
+	// Run swag init
+	if err := execCommand("go", swaggerArgs...); err != nil {
+		return err
+	}
+
+	// Check if swagger.json was generated
+	swaggerFile := filepath.Join("docs", "swagger", "swagger.json")
+	if _, err := os.Stat(swaggerFile); os.IsNotExist(err) {
+		return fmt.Errorf("swagger.json not generated for %s", service)
+	}
+
+	fmt.Printf("  ✓ %s swagger generated\n", service)
+	return nil
+}
+
+func runSwaggerCombine(cmd *cobra.Command, args []string) error {
+	fmt.Println("Combining Swagger specifications...")
+
+	llmSwagger := filepath.Join("services", "llm-api", "docs", "swagger", "swagger.json")
+	mcpSwagger := filepath.Join("services", "mcp-tools", "docs", "swagger", "swagger.json")
+	outputFile := filepath.Join("services", "llm-api", "docs", "swagger", "swagger-combined.json")
+
+	// Read LLM API swagger
+	llmData, err := os.ReadFile(llmSwagger)
+	if err != nil {
+		return fmt.Errorf("failed to read llm-api swagger: %w", err)
+	}
+
+	var llmSpec map[string]interface{}
+	if err := json.Unmarshal(llmData, &llmSpec); err != nil {
+		return fmt.Errorf("failed to parse llm-api swagger: %w", err)
+	}
+
+	// Read MCP Tools swagger (optional)
+	mcpData, err := os.ReadFile(mcpSwagger)
+	if err != nil {
+		fmt.Println("  ⚠ MCP Tools swagger not found, using LLM API only")
+		// Just write LLM API spec
+		if err := os.WriteFile(outputFile, llmData, 0644); err != nil {
+			return fmt.Errorf("failed to write output: %w", err)
+		}
+		fmt.Printf("  ✓ Combined swagger created (LLM API only)\n")
+		return nil
+	}
+
+	var mcpSpec map[string]interface{}
+	if err := json.Unmarshal(mcpData, &mcpSpec); err != nil {
+		return fmt.Errorf("failed to parse mcp-tools swagger: %w", err)
+	}
+
+	// Merge specs
+	combined := llmSpec
+	if info, ok := combined["info"].(map[string]interface{}); ok {
+		info["title"] = "Jan Server API (LLM API + MCP Tools)"
+		info["description"] = "Unified API documentation for Jan Server including LLM API (OpenAI-compatible) and MCP Tools"
+	}
+
+	// Merge paths with /mcp prefix
+	llmPaths, _ := combined["paths"].(map[string]interface{})
+	if llmPaths == nil {
+		llmPaths = make(map[string]interface{})
+	}
+
+	if mcpPaths, ok := mcpSpec["paths"].(map[string]interface{}); ok {
+		for path, methods := range mcpPaths {
+			llmPaths["/mcp"+path] = methods
+		}
+	}
+	combined["paths"] = llmPaths
+
+	// Merge definitions
+	llmDefs, _ := combined["definitions"].(map[string]interface{})
+	if llmDefs == nil {
+		llmDefs = make(map[string]interface{})
+	}
+
+	if mcpDefs, ok := mcpSpec["definitions"].(map[string]interface{}); ok {
+		for defName, def := range mcpDefs {
+			llmDefs["MCP_"+defName] = def
+		}
+	}
+	combined["definitions"] = llmDefs
+
+	// Merge tags
+	llmTags, _ := combined["tags"].([]interface{})
+	mcpTag := map[string]interface{}{
+		"name":        "MCP Tools",
+		"description": "Model Context Protocol tools",
+	}
+	llmTags = append(llmTags, mcpTag)
+
+	if mcpTags, ok := mcpSpec["tags"].([]interface{}); ok {
+		for _, tag := range mcpTags {
+			if tagMap, ok := tag.(map[string]interface{}); ok {
+				if name, ok := tagMap["name"].(string); ok {
+					tagMap["name"] = "MCP: " + name
+				}
+				llmTags = append(llmTags, tagMap)
+			}
+		}
+	}
+	combined["tags"] = llmTags
+
+	// Write combined spec
+	combinedData, err := json.MarshalIndent(combined, "", "  ")
+	if err != nil {
+		return fmt.Errorf("failed to marshal combined spec: %w", err)
+	}
+
+	if err := os.WriteFile(outputFile, combinedData, 0644); err != nil {
+		return fmt.Errorf("failed to write output: %w", err)
+	}
+
+	fmt.Printf("  ✓ Combined swagger created at: %s\n", outputFile)
+	return nil
+}
diff --git a/cmd/jan-cli/go.mod b/cmd/jan-cli/go.mod
new file mode 100644
index 00000000..0c05cb33
--- /dev/null
+++ b/cmd/jan-cli/go.mod
@@ -0,0 +1,21 @@
+module jan-server/cmd/jan-cli
+
+go 1.25.0
+
+require (
+	github.com/janhq/jan-server v0.0.0
+	github.com/spf13/cobra v1.8.1
+	gopkg.in/yaml.v3 v3.0.1
+)
+
+require (
+	github.com/bahlo/generic-list-go v0.2.0 // indirect
+	github.com/buger/jsonparser v1.1.1 // indirect
+	github.com/inconshreveable/mousetrap v1.1.0 // indirect
+	github.com/invopop/jsonschema v0.13.0 // indirect
+	github.com/mailru/easyjson v0.9.0 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/wk8/go-ordered-map/v2 v2.1.9-0.20240816141633-0a40785b4f41 // indirect
+)
+
+replace github.com/janhq/jan-server => ../..
diff --git a/cmd/jan-cli/go.sum b/cmd/jan-cli/go.sum
new file mode 100644
index 00000000..1697c338
--- /dev/null
+++ b/cmd/jan-cli/go.sum
@@ -0,0 +1,28 @@
+github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
+github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
+github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
+github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
+github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
+github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
+github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
+github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
+github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
+github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/wk8/go-ordered-map/v2 v2.1.9-0.20240816141633-0a40785b4f41 h1:rnB8ZLMeAr3VcqjfRkAm27qb8y6zFKNfuHvy1Gfe7KI=
+github.com/wk8/go-ordered-map/v2 v2.1.9-0.20240816141633-0a40785b4f41/go.mod h1:DbzwytT4g/odXquuOCqroKvtxxldI4nb3nuesHF/Exo=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/cmd/jan-cli/main.go b/cmd/jan-cli/main.go
new file mode 100644
index 00000000..7eef27d2
--- /dev/null
+++ b/cmd/jan-cli/main.go
@@ -0,0 +1,57 @@
+﻿package main
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+)
+
+var version = "1.0.0"
+
+func main() {
+	if err := rootCmd.Execute(); err != nil {
+		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+		os.Exit(1)
+	}
+}
+
+var rootCmd = &cobra.Command{
+	Use:   "jan-cli",
+	Short: "Jan Server CLI - Unified command-line tool for Jan Server",
+	Long: `jan-cli is the official command-line interface for Jan Server.
+
+It provides tools for configuration management, service operations,
+database management, deployment, and development workflows.
+
+Quick Start:
+  jan-cli setup-and-run          # Interactive setup and start all services
+
+Examples:
+  # Configuration management
+  jan-cli config validate
+  jan-cli config export --format env
+  
+  # Service operations
+  jan-cli service list
+  jan-cli service logs llm-api
+  
+  # Development tools
+  jan-cli dev setup
+  jan-cli dev scaffold my-service`,
+	Version: version,
+}
+
+func init() {
+	rootCmd.AddCommand(configCmd)
+	rootCmd.AddCommand(serviceCmd)
+	rootCmd.AddCommand(devCmd)
+	rootCmd.AddCommand(swaggerCmd)
+	rootCmd.AddCommand(installCmd)
+	rootCmd.AddCommand(setupAndRunCmd)
+	rootCmd.AddCommand(monitorCmd)
+	rootCmd.AddCommand(apiTestCmd)
+
+	rootCmd.PersistentFlags().BoolP("verbose", "v", false, "Enable verbose output")
+	rootCmd.PersistentFlags().String("config-dir", "config", "Configuration directory")
+}
diff --git a/cmd/jan-cli/paths.go b/cmd/jan-cli/paths.go
new file mode 100644
index 00000000..f5780313
--- /dev/null
+++ b/cmd/jan-cli/paths.go
@@ -0,0 +1,133 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/spf13/cobra"
+)
+
+const projectModuleName = "github.com/janhq/jan-server"
+
+// getConfigDir resolves the configured config directory to an absolute path.
+func getConfigDir(cmd *cobra.Command) (string, error) {
+	flag := cmd.Root().Flag("config-dir")
+	if flag == nil {
+		return "", fmt.Errorf("config-dir flag not found")
+	}
+	return resolvePathForOutput(flag.Value.String(), flag.Changed)
+}
+
+// resolveOutputDir resolves the output flag for config generation.
+func resolveOutputDir(cmd *cobra.Command) (string, error) {
+	flag := cmd.Flags().Lookup("output")
+	if flag == nil {
+		return "", fmt.Errorf("output flag not found")
+	}
+	return resolvePathForOutput(flag.Value.String(), flag.Changed)
+}
+
+func resolvePathForOutput(path string, flagChanged bool) (string, error) {
+	if path == "" {
+		return "", fmt.Errorf("path cannot be empty")
+	}
+
+	if filepath.IsAbs(path) {
+		return filepath.Clean(path), nil
+	}
+
+	if flagChanged {
+		wd, err := os.Getwd()
+		if err != nil {
+			return "", err
+		}
+		return filepath.Join(wd, path), nil
+	}
+
+	if root, err := findProjectRoot(); err == nil {
+		return filepath.Join(root, path), nil
+	}
+
+	wd, err := os.Getwd()
+	if err != nil {
+		return "", err
+	}
+	return filepath.Join(wd, path), nil
+}
+
+// resolveConfigFile resolves a config file path, preferring the project config directory.
+func resolveConfigFile(cmd *cobra.Command, file string) (string, error) {
+	if file == "" {
+		return "", fmt.Errorf("config file must be specified")
+	}
+
+	if filepath.IsAbs(file) {
+		return filepath.Clean(file), nil
+	}
+
+	cleaned := filepath.Clean(file)
+
+	if configDir, err := getConfigDir(cmd); err == nil {
+		rel := trimLeadingConfigDir(cleaned)
+		candidate := filepath.Join(configDir, rel)
+		if _, err := os.Stat(candidate); err == nil {
+			return candidate, nil
+		}
+	}
+
+	wd, err := os.Getwd()
+	if err != nil {
+		return "", err
+	}
+	return filepath.Join(wd, cleaned), nil
+}
+
+func trimLeadingConfigDir(pathValue string) string {
+	cleaned := filepath.Clean(pathValue)
+	prefix := "config" + string(os.PathSeparator)
+
+	if strings.HasPrefix(cleaned, prefix) {
+		return strings.TrimPrefix(cleaned, prefix)
+	}
+
+	if cleaned == "config" {
+		return ""
+	}
+
+	return cleaned
+}
+
+// findProjectRoot walks up from the current working directory to locate the jan-server module root.
+func findProjectRoot() (string, error) {
+	dir, err := os.Getwd()
+	if err != nil {
+		return "", err
+	}
+
+	for {
+		goMod := filepath.Join(dir, "go.mod")
+		if data, err := os.ReadFile(goMod); err == nil {
+			if moduleNameFromGoMod(data) == projectModuleName {
+				return dir, nil
+			}
+		}
+
+		parent := filepath.Dir(dir)
+		if parent == dir {
+			return "", fmt.Errorf("project root not found")
+		}
+		dir = parent
+	}
+}
+
+func moduleNameFromGoMod(data []byte) string {
+	for _, line := range strings.Split(string(data), "\n") {
+		line = strings.TrimSpace(line)
+		if strings.HasPrefix(line, "module ") {
+			return strings.TrimSpace(strings.TrimPrefix(line, "module "))
+		}
+	}
+	return ""
+}
diff --git a/cmd/jan-cli/utils.go b/cmd/jan-cli/utils.go
new file mode 100644
index 00000000..fc8b5060
--- /dev/null
+++ b/cmd/jan-cli/utils.go
@@ -0,0 +1,58 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"runtime"
+)
+
+// isWindows returns true if running on Windows
+func isWindows() bool {
+	return runtime.GOOS == "windows"
+}
+
+// execCommand executes a command and streams output to stdout/stderr
+func execCommand(name string, args ...string) error {
+	cmd := exec.Command(name, args...)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = os.Stdin
+
+	if err := cmd.Run(); err != nil {
+		return fmt.Errorf("command failed: %w", err)
+	}
+	return nil
+}
+
+// execCommandSilent executes a command without showing output
+func execCommandSilent(name string, args ...string) error {
+	cmd := exec.Command(name, args...)
+	return cmd.Run()
+}
+
+// copyFile copies a file from src to dst
+func copyFile(src, dst string) error {
+	data, err := os.ReadFile(src)
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(dst, data, 0755)
+}
+
+// getHealthURL returns the health check URL for a given service
+func getHealthURL(service string) string {
+	healthURLs := map[string]string{
+		"llm-api":       "http://localhost:8080/healthz",
+		"media-api":     "http://localhost:8285/healthz",
+		"response-api":  "http://localhost:8082/healthz",
+		"mcp-tools":     "http://localhost:8091/healthz",
+		"keycloak":      "http://localhost:8085",
+		"kong":          "http://localhost:8000",
+		"searxng":       "http://localhost:8086",
+		"vector-store":  "http://localhost:3015/healthz",
+		"sandboxfusion": "http://localhost:3010",
+	}
+
+	return healthURLs[service]
+}
diff --git a/config/README.md b/config/README.md
new file mode 100644
index 00000000..9cb338ca
--- /dev/null
+++ b/config/README.md
@@ -0,0 +1,360 @@
+# Environment Configuration Guide
+
+## Overview
+
+Jan Server uses environment variables for configuration. This directory contains environment-specific configuration files that override the base template.
+
+## Quick Start
+
+```bash
+# 1. Create .env from template
+make env-create
+
+# 2. Edit .env and set required secrets:
+#    - HF_TOKEN (HuggingFace)
+#    - SERPER_API_KEY (Serper)
+#    - Update passwords/secrets
+
+# 3. Choose your environment
+make env-switch ENV=development   # Docker development (default)
+make env-switch ENV=testing       # Integration testing
+```
+
+## Environment Files
+
+### `.env.template` (Root)
+- **Purpose**: Comprehensive template with documentation
+- **Usage**: Copy to `.env` to get started
+- **Includes**: All variables with explanations and defaults
+- **This is the ONLY template** - use this instead of old .env.example
+
+### `config/defaults.env`
+- **Purpose**: Base defaults inherited by all environments
+- **DO NOT** modify unless changing defaults globally
+- **Includes**: Non-sensitive defaults, ports, flags
+
+### `config/development.env`
+- **Purpose**: Full Docker development (all services containerized)
+- **Use when**: Running everything in Docker
+- **URLs**: Use Docker internal DNS (e.g., `keycloak:8085`)
+- **Command**: `make env-switch ENV=development` or `make up-full`
+
+### `config/testing.env`
+- **Purpose**: Integration testing with jan-cli api-test
+- **Use when**: Running `make test-all`
+- **URLs**: Use localhost for test access
+- **Command**: Used automatically by test targets
+
+### `config/production.env.example`
+- **Purpose**: Production deployment template
+- **Use when**: Deploying to production
+- **Security**: Copy to `config/production.env` and customize
+- **WARNING**: NEVER commit actual production.env
+
+### `config/secrets.env.example`
+- **Purpose**: List of all required secrets
+- **Use when**: Setting up new environment
+- **Security**: Reference for secret management setup
+
+## Environment Patterns
+
+### URL Patterns by Environment
+
+| Environment | Database | Keycloak | MCP Tools | Media API |
+|-------------|----------|----------|-----------|-----------|
+| **Development** | `api-db:5432` | `keycloak:8085` | `searxng:8080` | Docker internal |
+| **Hybrid** | `localhost:5432` | `localhost:8085` | `localhost:8086` | `localhost:8285` |
+| **Testing** | `localhost:5432` | `localhost:8085` | `localhost:8086` | `localhost:8285` |
+| **Production** | External DB URL | External Keycloak | External URLs | External URL |
+
+### Required Secrets
+
+All environments require these secrets (set in `.env`):
+
+```bash
+# API Keys
+HF_TOKEN=hf_xxxxx                    # HuggingFace token
+SERPER_API_KEY=xxxxx                 # Serper API key
+
+# Security
+POSTGRES_PASSWORD=xxxxx              # Database password
+KEYCLOAK_ADMIN_PASSWORD=xxxxx        # Keycloak admin password
+BACKEND_CLIENT_SECRET=xxxxx          # OAuth client secret
+VLLM_INTERNAL_KEY=xxxxx              # vLLM API key
+MODEL_PROVIDER_SECRET=xxxxx          # Model provider secret
+```
+
+## Switching Environments
+
+### Method 1: Makefile (Recommended)
+
+```bash
+# Switch back to Docker development
+make env-switch ENV=development
+
+# Switch to testing
+make env-switch ENV=testing
+```
+
+## Validation
+
+### Check Current Environment
+
+```bash
+make env-validate
+```
+
+### Verify Required Variables
+
+```bash
+make check-deps
+```
+
+## Best Practices
+
+### 1. Never Commit Secrets
+- `.env` is gitignored
+- Only commit `.env.template` and `config/*.env.example`
+- Use secret management in production
+
+### 2. Use Environment Switcher
+```bash
+
+# Avoid
+vi .env  # Manual editing error-prone
+```
+
+### 3. Document Custom Variables
+```bash
+# Add to .env.template with comments
+# MY_CUSTOM_VAR=default_value  # Description of what this does
+```
+
+### 4. Separate Secrets from Config
+- Configuration: In version control (config/*.env)
+- Secrets: In .env (not in version control)
+- Production: Use secret management (Vault, AWS Secrets Manager, etc.)
+
+## Troubleshooting
+
+### Keycloak JWT Validation Fails
+
+**Symptom**: `401 Unauthorized`
+
+**Solution**: Check JWKS_URL matches your environment
+- Development: `http://keycloak:8085/...`
+- Hybrid: `http://localhost:8085/...`
+
+```bash
+# Verify URLs
+grep JWKS_URL .env
+grep KEYCLOAK_BASE_URL .env
+```
+
+### MCP Tools Not Found
+
+**Symptom**: MCP tools timeout or not found
+
+**Solution**: Check MCP provider URLs match your environment
+```bash
+# Development
+SEARXNG_URL=http://searxng:8080
+
+# Hybrid
+SEARXNG_URL=http://localhost:8086
+```
+
+### Environment Switch Not Working
+
+**Symptom**: Changes not reflected after `make env-switch`
+
+**Solution**:
+```bash
+# Restart services to pick up new .env
+make restart
+
+# Or full restart
+make down && make up-full
+```
+
+### Missing Required Secrets
+
+**Symptom**: Services fail to start, missing API keys
+
+**Solution**:
+```bash
+# Check what secrets are needed
+cat config/secrets.env.example
+
+# Set in .env
+vi .env  # Add HF_TOKEN, SERPER_API_KEY, etc.
+```
+
+## Example Workflows
+
+### Docker Development
+```bash
+make setup
+make env-switch ENV=development
+make up-full
+# All services running in Docker
+```
+
+### Hybrid Development
+```bash
+make setup
+make env-switch ENV=hybrid
+make up-infra             # Start only infrastructure
+cd services/llm-api
+air                       # Run API natively with hot reload
+```
+
+### Integration Testing
+```bash
+make setup
+make env-switch ENV=testing
+make test-setup
+make test-all
+```
+
+### Production Deployment
+```bash
+# 1. Copy production template
+cp config/production.env.example config/production.env
+
+# 2. Edit with production values
+vi config/production.env
+
+# 3. Set secrets (use secret manager in real production)
+# Set HF_TOKEN, SERPER_API_KEY, passwords, etc.
+
+# 4. Use in deployment
+cp config/production.env .env
+# Deploy with your orchestration tool (Docker Swarm, Kubernetes, etc.)
+```
+
+## Migration from Old Structure
+
+### Removed Files
+
+The following files were removed in the restructure:
+
+| Old File | Replacement |
+|----------|-------------|
+| `.env.example` | Use `.env.template` |
+| `.env.docker` | Use `config/development.env` |
+| `.env.local` | Use `config/hybrid.env` |
+| `.env.mcp.example` | Merged into `.env.template` |
+
+### Migration Steps
+
+If you were using old files:
+
+```bash
+# 1. Backup current .env
+cp .env .env.backup
+
+# 2. Create new .env from template
+make env-create
+
+# 3. Restore your secrets from backup
+# Copy: HF_TOKEN, SERPER_API_KEY, passwords, etc.
+# Use: vi .env or your preferred editor
+
+# 4. Choose environment
+make env-switch ENV=development  # or hybrid, testing
+```
+
+## Advanced Topics
+
+### Provider Bootstrap (llm-api)
+
+The llm-api service can preload providers from a YAML manifest. Enable it via:
+
+```bash
+JAN_PROVIDER_CONFIGS=true
+JAN_PROVIDER_CONFIGS_FILE=config/providers.yml
+JAN_PROVIDER_CONFIG_SET=default
+```
+
+`JAN_PROVIDER_CONFIGS_FILE` defaults to `config/providers.yml` inside `services/llm-api` (copied to `/app/config/providers.yml` in Docker). Each set under the `providers` key defines one or more providers:
+
+```yaml
+providers:
+  default:
+    - name: Local vLLM Provider
+      type: jan
+      url: http://vllm-jan-gpu:8101/v1
+      api_key: ${VLLM_INTERNAL_KEY}
+      auto_enable_new_models: true
+      sync_models: true
+```
+
+Environment variables (e.g., `${VLLM_INTERNAL_KEY}`) are expanded at load time, so secrets stay in `.env`. Create multiple sets such as `default`, `production`, etc., and select one with `JAN_PROVIDER_CONFIG_SET`. When the YAML flag is disabled, llm-api falls back to the legacy `JAN_DEFAULT_NODE_*` variables.
+
+### Adding a New Environment
+
+1. Create `config/myenv.env`:
+```bash
+# My Custom Environment
+DB_DSN=postgres://user:pass@custom-db:5432/dbname
+KEYCLOAK_BASE_URL=http://custom-keycloak:8085
+# ... other overrides
+```
+
+2. Switch to it:
+```bash
+make env-switch ENV=myenv
+```
+
+### Using Multiple Environments Simultaneously
+
+```bash
+# Terminal 1: Development environment
+ENV_FILE=config/development.env docker-compose -p dev up
+
+# Terminal 2: Testing environment
+ENV_FILE=config/testing.env docker-compose -p test up
+```
+
+### Environment Variable Precedence
+
+1. Shell environment variables (highest priority)
+2. `.env` file
+3. `config/<environment>.env` (when explicitly loaded)
+4. `config/defaults.env` (lowest priority)
+5. Defaults in docker-compose.yml
+
+## Security Checklist
+
+- [ ] `.env` is in `.gitignore`
+- [ ] Never commit `.env` with real secrets
+- [ ] Change default passwords in production
+- [ ] Use strong passwords (20+ characters)
+- [ ] Use secret management in production (Vault, AWS Secrets Manager)
+- [ ] Rotate secrets regularly
+- [ ] Limit access to production `.env` files
+- [ ] Use read-only secrets in containers when possible
+
+## Reference
+
+### All Environment Variables
+
+For complete list of variables, see:
+- `.env.template` - Full template with documentation
+- `config/secrets.env.example` - Required secrets list
+- `config/defaults.env` - Default values
+
+### Commands
+
+```bash
+make env-create          # Create .env from template
+make env-switch ENV=X    # Switch to environment X
+make env-validate        # Validate current .env
+make env-list            # List available environments
+make check-deps          # Check required tools
+```
+
+---
+
+**Quick Reference**: `make help-env` | **Validate**: `make env-validate` | **Switch**: `make env-switch ENV=<environment>`
diff --git a/config/defaults.yaml b/config/defaults.yaml
new file mode 100644
index 00000000..f7b3464a
--- /dev/null
+++ b/config/defaults.yaml
@@ -0,0 +1,168 @@
+# Jan Server Default Configuration
+# Generated from pkg/config/types.go
+# DO NOT EDIT MANUALLY - this file is auto-generated
+#
+# To customize, create environment-specific overrides in:
+#   - config/environments/development.yaml
+#   - config/environments/staging.yaml
+#   - config/environments/production.yaml
+
+meta:
+  version: 1.0.0
+  environment: development
+infrastructure:
+  database:
+    postgres:
+      host: api-db
+      port: 5432
+      user: jan_user
+      database: jan_llm_api
+      ssl_mode: disable
+      max_connections: 100
+      max_idle_conns: 5
+      max_open_conns: 15
+      conn_max_lifetime: 30m0s
+  auth:
+    keycloak:
+      base_url: http://keycloak:8085
+      public_url: ""
+      realm: jan
+      http_port: 8085
+      admin_user: admin
+      admin_realm: master
+      admin_client_id: admin-cli
+      backend_client_id: backend
+      client: jan-client
+      oauth_redirect_uri: http://localhost:8000/auth/callback
+      jwks_url: ""
+      oidc_discovery_url: ""
+      issuer: http://localhost:8085/realms/jan
+      account: account
+      refresh_jwks_interval: 5m0s
+      auth_clock_skew: 1m0s
+      guest_role: guest
+      features:
+        - token-exchange
+        - preview
+  gateway:
+    kong:
+      http_port: 8000
+      admin_port: 8001
+      admin_url: http://kong:8001
+      log_level: info
+services:
+  llm_api:
+    http_port: 8080
+    metrics_port: 9091
+    log_level: info
+    log_format: json
+    auto_migrate: true
+    provider_config_file: config/providers.yml
+    provider_config_set: default
+    provider_configs_enabled: true
+    api_key:
+      prefix: sk_live
+      default_ttl: 2160h0m0s
+      max_ttl: 2160h0m0s
+      max_per_user: 5
+    model_provider_secret: jan-model-provider-secret-2024
+    model_sync_enabled: true
+    model_sync_interval_minutes: 60
+    prompt_orchestration:
+      enabled: false
+      enable_memory: false
+      enable_templates: false
+      enable_tools: false
+      default_persona: ""
+    media_resolve_url: http://kong:8000/media/v1/media/resolve
+    media_resolve_timeout: 5s
+  mcp_tools:
+    http_port: 8091
+    log_level: info
+    log_format: json
+    search_engine: serper
+    searxng_url: http://searxng:8080
+    vector_store_url: http://vector-store:3015
+    sandbox_fusion_url: http://sandboxfusion:8080
+    sandbox_require_approval: true
+    mcp_config_file: configs/mcp-providers.yml
+  media_api:
+    http_port: 8285
+    log_level: info
+    max_upload_bytes: 20971520
+    retention_days: 30
+    proxy_download: true
+    remote_fetch_timeout: 15s
+    s3:
+      endpoint: https://s3.menlo.ai
+      public_endpoint: ""
+      region: us-west-2
+      bucket: platform-dev
+      use_path_style: true
+      presign_ttl: 5m0s
+  response_api:
+    http_port: 8082
+    log_level: info
+    llm_api_url: http://llm-api:8080
+    mcp_tools_url: http://mcp-tools:8091
+    max_tool_depth: 8
+    tool_timeout: 45s
+  memory_tools:
+    enabled: false
+    http_port: 0
+    embedding:
+      base_url: ""
+      timeout: 0s
+      validate_on_startup: false
+      expected_model: ""
+      expected_dimension: 0
+      retry:
+        enabled: false
+        max_attempts: 0
+        initial_backoff: 0s
+        max_backoff: 0s
+      cache:
+        enabled: false
+        type: ""
+        redis:
+          url: ""
+          key_prefix: ""
+          ttl: 0s
+        memory:
+          max_size: 0
+          ttl: 0s
+      batch:
+        enabled: false
+        max_size: 0
+        timeout: 0s
+      circuit_breaker:
+        enabled: false
+        threshold: 0
+        timeout: 0s
+        max_concurrent: 0
+inference:
+  vllm:
+    enabled: true
+    port: 8101
+    model: Qwen/Qwen2.5-0.5B-Instruct
+    served_name: qwen2.5-0.5b-instruct
+    gpu_utilization: 0.66
+monitoring:
+  otel:
+    enabled: false
+    tracing_enabled: false
+    service_name: llm-api
+    service_version: ""
+    endpoint: http://otel-collector:4318
+    sampling_rate: 0
+    pii_level: ""
+    metric_interval: ""
+    http_port: 4318
+    grpc_port: 4317
+  prometheus:
+    port: 9090
+  grafana:
+    port: 3001
+    admin_user: admin
+  jaeger:
+    ui_port: 16686
diff --git a/config/production.env.example b/config/production.env.example
new file mode 100644
index 00000000..ce8c256b
--- /dev/null
+++ b/config/production.env.example
@@ -0,0 +1,186 @@
+# Production Environment Configuration
+# COPY THIS FILE TO config/production.env AND CUSTOMIZE
+# NEVER COMMIT production.env TO VERSION CONTROL
+
+# ============================================================================
+# Database (Production)
+# ============================================================================
+# Use managed database service (RDS, Cloud SQL, Azure Database, etc.)
+# Primary database for llm-api, media-api, response-api, template-api, memory-tools
+DB_POSTGRESQL_WRITE_DSN=postgres://prod_user:CHANGE_ME_STRONG_PASSWORD@prod-db.example.com:5432/jan_llm_api?sslmode=require
+
+# Optional: Read replica for scaling read operations
+# DB_POSTGRESQL_READ1_DSN=postgres://prod_user:CHANGE_ME_STRONG_PASSWORD@prod-db-replica.example.com:5432/jan_llm_api?sslmode=require
+
+# Memory Tools Database (can use same database or separate)
+MEMORY_DB_POSTGRESQL_WRITE_DSN=${DB_POSTGRESQL_WRITE_DSN}
+# MEMORY_DB_POSTGRESQL_READ1_DSN=${DB_POSTGRESQL_READ1_DSN}
+
+# PostgreSQL container settings (for local docker)
+POSTGRES_USER=prod_user
+POSTGRES_PASSWORD=CHANGE_ME_STRONG_PASSWORD
+POSTGRES_DB=jan_llm_api
+
+# ============================================================================
+# Authentication (Production Keycloak)
+# ============================================================================
+KEYCLOAK_BASE_URL=https://auth.yourdomain.com
+JWKS_URL=https://auth.yourdomain.com/realms/jan/protocol/openid-connect/certs
+ISSUER=https://auth.yourdomain.com/realms/jan
+OAUTH_REDIRECT_URI=https://api.yourdomain.com/auth/callback
+KEYCLOAK_ADMIN=admin
+KEYCLOAK_ADMIN_PASSWORD=CHANGE_ME_STRONG_PASSWORD
+KEYCLOAK_REALM=jan
+ACCOUNT=account
+REFRESH_JWKS_INTERVAL=5m
+
+# Guest provisioning
+BACKEND_CLIENT_ID=backend
+BACKEND_CLIENT_SECRET=CHANGE_ME_STRONG_PASSWORD
+CLIENT=jan-client
+GUEST_ROLE=guest
+
+# ============================================================================
+# API Service
+# ============================================================================
+HTTP_PORT=8080
+LOG_LEVEL=info
+LOG_FORMAT=json
+AUTO_MIGRATE=false  # Run migrations separately in production
+
+# ============================================================================
+# Kong API Gateway
+# ============================================================================
+KONG_HTTP_PORT=8000
+
+# ============================================================================
+# vLLM Inference
+# ============================================================================
+VLLM_PORT=8101
+VLLM_MODEL=Qwen/Qwen2.5-3B-Instruct-AWQ
+VLLM_SERVED_NAME=qwen2.5-3b-instruct-awq
+VLLM_GPU_UTIL=0.95
+VLLM_INTERNAL_KEY=CHANGE_ME_STRONG_PASSWORD
+
+# Model provider
+MODEL_PROVIDER_SECRET=CHANGE_ME_STRONG_PASSWORD
+JAN_PROVIDER_CONFIGS=true
+JAN_PROVIDER_CONFIGS_FILE=config/providers.yml
+JAN_PROVIDER_CONFIG_SET=production
+# Legacy fallback (single provider)
+# JAN_DEFAULT_NODE_SETUP=true
+# JAN_DEFAULT_NODE_URL=https://inference.yourdomain.com/v1
+# JAN_DEFAULT_NODE_API_KEY=${VLLM_INTERNAL_KEY}
+
+# ============================================================================
+# MCP Tools Service
+# ============================================================================
+MCP_TOOLS_HTTP_PORT=8091
+
+# Search configuration
+SEARCH_ENGINE=serper
+SERPER_DOMAIN_FILTER=
+SERPER_LOCATION_HINT=
+SERPER_OFFLINE_MODE=false
+
+# MCP Provider URLs (Production - use HTTPS)
+SEARXNG_URL=https://search.yourdomain.com
+VECTOR_STORE_URL=https://vector.yourdomain.com
+SANDBOXFUSION_URL=https://sandbox.yourdomain.com
+SANDBOX_FUSION_REQUIRE_APPROVAL=true
+MCP_ENABLE_PYTHON_EXEC=true
+MCP_ENABLE_MEMORY_RETRIEVE=true
+
+# Browser automation
+CODE_SANDBOX_ENABLED=true
+CODE_SANDBOX_URL=https://code-sandbox.yourdomain.com/mcp
+PLAYWRIGHT_ENABLED=true
+PLAYWRIGHT_URL=https://playwright.yourdomain.com
+
+# MCP debugging (disable in production)
+MCP_PROVIDER_DEBUG=false
+
+# ============================================================================
+# Memory Tools Service
+# ============================================================================
+MEMORY_TOOLS_PORT=8090
+
+# Embedding Service (BGE-M3)
+EMBEDDING_SERVICE_URL=http://bge-m3:8091
+
+# Embedding Cache Configuration
+EMBEDDING_CACHE_TYPE=redis  # Options: redis, memory, noop
+EMBEDDING_CACHE_REDIS_URL=redis://redis:6379/3
+EMBEDDING_CACHE_KEY_PREFIX=emb:
+EMBEDDING_CACHE_MAX_SIZE=10000
+EMBEDDING_CACHE_TTL=1h
+
+# Logging
+MEMORY_LOG_LEVEL=info
+MEMORY_LOG_FORMAT=json
+
+# ============================================================================
+# Media API - S3 Storage Configuration
+# ============================================================================
+# S3-compatible storage for media uploads (AWS S3, MinIO, etc.)
+MEDIA_S3_ENDPOINT=https://s3.amazonaws.com
+MEDIA_S3_REGION=us-east-1
+MEDIA_S3_BUCKET=jan-production-media
+MEDIA_S3_ACCESS_KEY_ID=CHANGE_ME_AWS_ACCESS_KEY_ID
+MEDIA_S3_SECRET_ACCESS_KEY=CHANGE_ME_AWS_SECRET_ACCESS_KEY
+MEDIA_S3_USE_PATH_STYLE=false
+MEDIA_S3_PRESIGN_TTL=5m
+
+# ============================================================================
+# Observability (Production)
+# ============================================================================
+OTEL_ENABLED=true
+OTEL_SERVICE_NAME=llm-api
+OTEL_EXPORTER_OTLP_ENDPOINT=https://otel-collector.yourdomain.com:4318
+OTEL_HTTP_PORT=4318
+OTEL_GRPC_PORT=4317
+
+# Monitoring stack
+PROMETHEUS_PORT=9090
+JAEGER_UI_PORT=16686
+GRAFANA_PORT=3001
+GRAFANA_ADMIN_USER=admin
+GRAFANA_ADMIN_PASSWORD=CHANGE_ME_STRONG_PASSWORD
+
+# ============================================================================
+# Secrets (Use Secret Manager in Production)
+# ============================================================================
+# In production, these should come from:
+# - AWS Secrets Manager
+# - Azure Key Vault
+# - HashiCorp Vault
+# - Google Secret Manager
+# - Kubernetes Secrets
+#
+# Examples:
+# HF_TOKEN=${SECRET_MANAGER_HF_TOKEN}
+# SERPER_API_KEY=${SECRET_MANAGER_SERPER_KEY}
+
+HF_TOKEN=CHANGE_ME_USE_SECRET_MANAGER
+SERPER_API_KEY=CHANGE_ME_USE_SECRET_MANAGER
+
+# ============================================================================
+# Production Checklist
+# ============================================================================
+# Before deploying to production:
+# [ ] Change ALL passwords to strong values (20+ characters)
+# [ ] Set up secret management (Vault, AWS Secrets Manager, etc.)
+# [ ] Use managed database service with SSL
+# [ ] Enable HTTPS for all services
+# [ ] Set AUTO_MIGRATE=false (run migrations manually)
+# [ ] Enable observability (OTEL_ENABLED=true)
+# [ ] Set LOG_LEVEL=info (not debug)
+# [ ] Use production-grade Keycloak deployment
+# [ ] Configure backup strategy for database
+# [ ] Set up monitoring alerts
+# [ ] Review and test disaster recovery plan
+# [ ] Enable rate limiting in Kong
+# [ ] Configure CORS properly
+# [ ] Review security headers
+# [ ] Enable audit logging
+# ============================================================================
diff --git a/config/schema/config.schema.json b/config/schema/config.schema.json
new file mode 100644
index 00000000..8f227aa3
--- /dev/null
+++ b/config/schema/config.schema.json
@@ -0,0 +1 @@
+{"$schema":"1.0.0","$id":"https://github.com/janhq/jan-server/pkg/config/config","$defs":{"APIKeyConfig":{"properties":{"prefix":{"type":"string"},"default_ttl":{"type":"integer"},"max_ttl":{"type":"integer"},"max_per_user":{"type":"integer","minimum":1}},"additionalProperties":false,"type":"object","required":["prefix","default_ttl","max_ttl","max_per_user"]},"AuthConfig":{"properties":{"keycloak":{"$ref":"#/$defs/KeycloakConfig"}},"additionalProperties":false,"type":"object","required":["keycloak"]},"BatchConfig":{"properties":{"enabled":{"type":"boolean"},"max_size":{"type":"integer","minimum":1},"timeout":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["enabled","max_size","timeout"]},"CacheConfig":{"properties":{"enabled":{"type":"boolean"},"type":{"type":"string","enum":["redis","memory","noop"]},"redis":{"$ref":"#/$defs/RedisCacheConfig"},"memory":{"$ref":"#/$defs/MemoryCacheConfig"}},"additionalProperties":false,"type":"object","required":["enabled","type","redis","memory"]},"CircuitBreakerConfig":{"properties":{"enabled":{"type":"boolean"},"threshold":{"type":"integer","minimum":1},"timeout":{"type":"integer"},"max_concurrent":{"type":"integer","minimum":1}},"additionalProperties":false,"type":"object","required":["enabled","threshold","timeout","max_concurrent"]},"DatabaseConfig":{"properties":{"postgres":{"$ref":"#/$defs/PostgresConfig"}},"additionalProperties":false,"type":"object","required":["postgres"]},"EmbeddingConfig":{"properties":{"base_url":{"type":"string","format":"uri"},"api_key":{"type":"string"},"timeout":{"type":"integer"},"validate_on_startup":{"type":"boolean"},"expected_model":{"type":"string"},"expected_dimension":{"type":"integer","minimum":1},"retry":{"$ref":"#/$defs/RetryConfig"},"cache":{"$ref":"#/$defs/CacheConfig"},"batch":{"$ref":"#/$defs/BatchConfig"},"circuit_breaker":{"$ref":"#/$defs/CircuitBreakerConfig"}},"additionalProperties":false,"type":"object","required":["base_url","timeout","validate_on_startup","expected_model","expected_dimension","retry","cache","batch","circuit_breaker"]},"GatewayConfig":{"properties":{"kong":{"$ref":"#/$defs/KongConfig"}},"additionalProperties":false,"type":"object","required":["kong"]},"GrafanaConfig":{"properties":{"port":{"type":"integer","maximum":65535,"minimum":1},"admin_user":{"type":"string"},"admin_password":{"type":"string"}},"additionalProperties":false,"type":"object","required":["port","admin_user"]},"InferenceConfig":{"properties":{"vllm":{"$ref":"#/$defs/VLLMConfig"}},"additionalProperties":false,"type":"object","required":["vllm"]},"InfrastructureConfig":{"properties":{"database":{"$ref":"#/$defs/DatabaseConfig"},"auth":{"$ref":"#/$defs/AuthConfig"},"gateway":{"$ref":"#/$defs/GatewayConfig"}},"additionalProperties":false,"type":"object","required":["database","auth","gateway"]},"JaegerConfig":{"properties":{"ui_port":{"type":"integer","maximum":65535,"minimum":1}},"additionalProperties":false,"type":"object","required":["ui_port"]},"KeycloakConfig":{"properties":{"base_url":{"type":"string","format":"uri"},"public_url":{"type":"string","format":"uri"},"realm":{"type":"string"},"http_port":{"type":"integer","maximum":65535,"minimum":1},"admin_user":{"type":"string"},"admin_password":{"type":"string"},"admin_realm":{"type":"string"},"admin_client_id":{"type":"string"},"backend_client_id":{"type":"string"},"backend_client_secret":{"type":"string"},"client":{"type":"string"},"oauth_redirect_uri":{"type":"string","format":"uri"},"jwks_url":{"type":"string","format":"uri"},"oidc_discovery_url":{"type":"string","format":"uri"},"issuer":{"type":"string","format":"uri"},"account":{"type":"string"},"refresh_jwks_interval":{"type":"integer"},"auth_clock_skew":{"type":"integer"},"guest_role":{"type":"string"},"features":{"items":{"type":"string"},"type":"array"}},"additionalProperties":false,"type":"object","required":["base_url","public_url","realm","http_port","admin_user","admin_password","admin_realm","admin_client_id","backend_client_id","backend_client_secret","client","oauth_redirect_uri","jwks_url","oidc_discovery_url","issuer","account","refresh_jwks_interval","auth_clock_skew","guest_role","features"]},"KongConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"admin_port":{"type":"integer","maximum":65535,"minimum":1},"admin_url":{"type":"string","format":"uri"},"log_level":{"type":"string","enum":["debug","info","warn","error"]}},"additionalProperties":false,"type":"object","required":["http_port","admin_port","admin_url","log_level"]},"LLMAPIConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"metrics_port":{"type":"integer","maximum":65535,"minimum":1},"log_level":{"type":"string","enum":["debug","info","warn","error"]},"log_format":{"type":"string","enum":["json","console"]},"auto_migrate":{"type":"boolean"},"provider_config_file":{"type":"string"},"provider_config_set":{"type":"string"},"provider_configs_enabled":{"type":"boolean"},"api_key":{"$ref":"#/$defs/APIKeyConfig"},"model_provider_secret":{"type":"string"},"model_sync_enabled":{"type":"boolean"},"model_sync_interval_minutes":{"type":"integer","minimum":1},"prompt_orchestration":{"$ref":"#/$defs/PromptOrchestrationConfig"},"media_resolve_url":{"type":"string","format":"uri"},"media_resolve_timeout":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["http_port","metrics_port","log_level","log_format","auto_migrate","provider_config_file","provider_config_set","provider_configs_enabled","api_key","model_sync_enabled","model_sync_interval_minutes","prompt_orchestration","media_resolve_url","media_resolve_timeout"]},"MCPToolsConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"log_level":{"type":"string","enum":["debug","info","warn","error"]},"log_format":{"type":"string","enum":["json","console"]},"search_engine":{"type":"string","enum":["serper","searxng"]},"serper_api_key":{"type":"string"},"searxng_url":{"type":"string","format":"uri"},"vector_store_url":{"type":"string","format":"uri"},"sandbox_fusion_url":{"type":"string","format":"uri"},"sandbox_require_approval":{"type":"boolean"},"mcp_config_file":{"type":"string"}},"additionalProperties":false,"type":"object","required":["http_port","log_level","log_format","search_engine","searxng_url","vector_store_url","sandbox_fusion_url","sandbox_require_approval","mcp_config_file"]},"MediaAPIConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"log_level":{"type":"string","enum":["debug","info","warn","error"]},"max_upload_bytes":{"type":"integer","minimum":1},"retention_days":{"type":"integer","minimum":1},"proxy_download":{"type":"boolean"},"remote_fetch_timeout":{"type":"integer"},"s3":{"$ref":"#/$defs/S3Config"}},"additionalProperties":false,"type":"object","required":["http_port","log_level","max_upload_bytes","retention_days","proxy_download","remote_fetch_timeout","s3"]},"MemoryCacheConfig":{"properties":{"max_size":{"type":"integer","minimum":1},"ttl":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["max_size","ttl"]},"MemoryToolsConfig":{"properties":{"enabled":{"type":"boolean"},"http_port":{"type":"integer","maximum":65535,"minimum":1},"embedding":{"$ref":"#/$defs/EmbeddingConfig"}},"additionalProperties":false,"type":"object","required":["enabled","http_port","embedding"]},"MetaConfig":{"properties":{"version":{"type":"string"},"environment":{"type":"string"}},"additionalProperties":false,"type":"object","required":["version","environment"]},"MonitoringConfig":{"properties":{"otel":{"$ref":"#/$defs/OTELConfig"},"prometheus":{"$ref":"#/$defs/PrometheusConfig"},"grafana":{"$ref":"#/$defs/GrafanaConfig"},"jaeger":{"$ref":"#/$defs/JaegerConfig"}},"additionalProperties":false,"type":"object","required":["otel","prometheus","grafana","jaeger"]},"OTELConfig":{"properties":{"enabled":{"type":"boolean"},"tracing_enabled":{"type":"boolean"},"service_name":{"type":"string"},"service_version":{"type":"string"},"endpoint":{"type":"string","format":"uri"},"headers":{"additionalProperties":{"type":"string"},"type":"object"},"sampling_rate":{"type":"number","maximum":1,"minimum":0},"pii_level":{"type":"string","enum":["none","hashed","full"]},"metric_interval":{"type":"string"},"http_port":{"type":"integer","maximum":65535,"minimum":1},"grpc_port":{"type":"integer","maximum":65535,"minimum":1}},"additionalProperties":false,"type":"object","required":["enabled","tracing_enabled","service_name","service_version","endpoint","sampling_rate","pii_level","metric_interval","http_port","grpc_port"]},"PostgresConfig":{"properties":{"host":{"type":"string"},"port":{"type":"integer","maximum":65535,"minimum":1},"user":{"type":"string"},"database":{"type":"string"},"password":{"type":"string"},"ssl_mode":{"type":"string","enum":["disable","require","verify-ca","verify-full"]},"max_connections":{"type":"integer","maximum":1000,"minimum":1},"max_idle_conns":{"type":"integer","minimum":1},"max_open_conns":{"type":"integer","minimum":1},"conn_max_lifetime":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["host","port","user","database","password","ssl_mode","max_connections","max_idle_conns","max_open_conns","conn_max_lifetime"]},"PrometheusConfig":{"properties":{"port":{"type":"integer","maximum":65535,"minimum":1}},"additionalProperties":false,"type":"object","required":["port"]},"PromptOrchestrationConfig":{"properties":{"enabled":{"type":"boolean"},"enable_memory":{"type":"boolean"},"enable_templates":{"type":"boolean"},"enable_tools":{"type":"boolean"},"default_persona":{"type":"string"}},"additionalProperties":false,"type":"object","required":["enabled","enable_memory","enable_templates","enable_tools","default_persona"]},"RedisCacheConfig":{"properties":{"url":{"type":"string","format":"uri"},"key_prefix":{"type":"string"},"ttl":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["url","key_prefix","ttl"]},"ResponseAPIConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"log_level":{"type":"string","enum":["debug","info","warn","error"]},"llm_api_url":{"type":"string","format":"uri"},"mcp_tools_url":{"type":"string","format":"uri"},"max_tool_depth":{"type":"integer","maximum":20,"minimum":1},"tool_timeout":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["http_port","log_level","llm_api_url","mcp_tools_url","max_tool_depth","tool_timeout"]},"RetryConfig":{"properties":{"enabled":{"type":"boolean"},"max_attempts":{"type":"integer","minimum":1},"initial_backoff":{"type":"integer"},"max_backoff":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["enabled","max_attempts","initial_backoff","max_backoff"]},"S3Config":{"properties":{"endpoint":{"type":"string","format":"uri"},"public_endpoint":{"type":"string","format":"uri"},"region":{"type":"string"},"bucket":{"type":"string"},"access_key":{"type":"string"},"secret_key":{"type":"string"},"use_path_style":{"type":"boolean"},"presign_ttl":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["endpoint","public_endpoint","region","bucket","use_path_style","presign_ttl"]},"ServicesConfig":{"properties":{"llm_api":{"$ref":"#/$defs/LLMAPIConfig"},"mcp_tools":{"$ref":"#/$defs/MCPToolsConfig"},"media_api":{"$ref":"#/$defs/MediaAPIConfig"},"response_api":{"$ref":"#/$defs/ResponseAPIConfig"},"memory_tools":{"$ref":"#/$defs/MemoryToolsConfig"}},"additionalProperties":false,"type":"object","required":["llm_api","mcp_tools","media_api","response_api","memory_tools"]},"VLLMConfig":{"properties":{"enabled":{"type":"boolean"},"port":{"type":"integer","maximum":65535,"minimum":1},"model":{"type":"string"},"served_name":{"type":"string"},"gpu_utilization":{"type":"number","maximum":1,"minimum":0},"internal_key":{"type":"string"},"hf_token":{"type":"string"}},"additionalProperties":false,"type":"object","required":["enabled","port","model","served_name","gpu_utilization"]}},"properties":{"meta":{"$ref":"#/$defs/MetaConfig"},"infrastructure":{"$ref":"#/$defs/InfrastructureConfig"},"services":{"$ref":"#/$defs/ServicesConfig"},"inference":{"$ref":"#/$defs/InferenceConfig"},"monitoring":{"$ref":"#/$defs/MonitoringConfig"}},"additionalProperties":false,"type":"object","required":["meta","infrastructure","services","inference","monitoring"],"title":"Jan Server Configuration","description":"Complete configuration schema for Jan Server infrastructure and services"}
\ No newline at end of file
diff --git a/config/schema/inference.schema.json b/config/schema/inference.schema.json
new file mode 100644
index 00000000..e12ca8c8
--- /dev/null
+++ b/config/schema/inference.schema.json
@@ -0,0 +1 @@
+{"$schema":"https://json-schema.org/draft/2020-12/schema","$id":"https://github.com/janhq/jan-server/pkg/config/inference-config","$defs":{"VLLMConfig":{"properties":{"enabled":{"type":"boolean"},"port":{"type":"integer","maximum":65535,"minimum":1},"model":{"type":"string"},"served_name":{"type":"string"},"gpu_utilization":{"type":"number","maximum":1,"minimum":0},"internal_key":{"type":"string"},"hf_token":{"type":"string"}},"additionalProperties":false,"type":"object","required":["enabled","port","model","served_name","gpu_utilization"]}},"properties":{"vllm":{"$ref":"#/$defs/VLLMConfig"}},"additionalProperties":false,"type":"object","required":["vllm"]}
\ No newline at end of file
diff --git a/config/schema/infrastructure.schema.json b/config/schema/infrastructure.schema.json
new file mode 100644
index 00000000..a091f715
--- /dev/null
+++ b/config/schema/infrastructure.schema.json
@@ -0,0 +1 @@
+{"$schema":"https://json-schema.org/draft/2020-12/schema","$id":"https://github.com/janhq/jan-server/pkg/config/infrastructure-config","$defs":{"AuthConfig":{"properties":{"keycloak":{"$ref":"#/$defs/KeycloakConfig"}},"additionalProperties":false,"type":"object","required":["keycloak"]},"DatabaseConfig":{"properties":{"postgres":{"$ref":"#/$defs/PostgresConfig"}},"additionalProperties":false,"type":"object","required":["postgres"]},"GatewayConfig":{"properties":{"kong":{"$ref":"#/$defs/KongConfig"}},"additionalProperties":false,"type":"object","required":["kong"]},"KeycloakConfig":{"properties":{"base_url":{"type":"string","format":"uri"},"public_url":{"type":"string","format":"uri"},"realm":{"type":"string"},"http_port":{"type":"integer","maximum":65535,"minimum":1},"admin_user":{"type":"string"},"admin_password":{"type":"string"},"admin_realm":{"type":"string"},"admin_client_id":{"type":"string"},"backend_client_id":{"type":"string"},"backend_client_secret":{"type":"string"},"client":{"type":"string"},"oauth_redirect_uri":{"type":"string","format":"uri"},"jwks_url":{"type":"string","format":"uri"},"oidc_discovery_url":{"type":"string","format":"uri"},"issuer":{"type":"string","format":"uri"},"account":{"type":"string"},"refresh_jwks_interval":{"type":"integer"},"auth_clock_skew":{"type":"integer"},"guest_role":{"type":"string"},"features":{"items":{"type":"string"},"type":"array"}},"additionalProperties":false,"type":"object","required":["base_url","public_url","realm","http_port","admin_user","admin_password","admin_realm","admin_client_id","backend_client_id","backend_client_secret","client","oauth_redirect_uri","jwks_url","oidc_discovery_url","issuer","account","refresh_jwks_interval","auth_clock_skew","guest_role","features"]},"KongConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"admin_port":{"type":"integer","maximum":65535,"minimum":1},"admin_url":{"type":"string","format":"uri"},"log_level":{"type":"string","enum":["debug","info","warn","error"]}},"additionalProperties":false,"type":"object","required":["http_port","admin_port","admin_url","log_level"]},"PostgresConfig":{"properties":{"host":{"type":"string"},"port":{"type":"integer","maximum":65535,"minimum":1},"user":{"type":"string"},"database":{"type":"string"},"password":{"type":"string"},"ssl_mode":{"type":"string","enum":["disable","require","verify-ca","verify-full"]},"max_connections":{"type":"integer","maximum":1000,"minimum":1},"max_idle_conns":{"type":"integer","minimum":1},"max_open_conns":{"type":"integer","minimum":1},"conn_max_lifetime":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["host","port","user","database","password","ssl_mode","max_connections","max_idle_conns","max_open_conns","conn_max_lifetime"]}},"properties":{"database":{"$ref":"#/$defs/DatabaseConfig"},"auth":{"$ref":"#/$defs/AuthConfig"},"gateway":{"$ref":"#/$defs/GatewayConfig"}},"additionalProperties":false,"type":"object","required":["database","auth","gateway"]}
\ No newline at end of file
diff --git a/config/schema/monitoring.schema.json b/config/schema/monitoring.schema.json
new file mode 100644
index 00000000..01bf2660
--- /dev/null
+++ b/config/schema/monitoring.schema.json
@@ -0,0 +1 @@
+{"$schema":"https://json-schema.org/draft/2020-12/schema","$id":"https://github.com/janhq/jan-server/pkg/config/monitoring-config","$defs":{"GrafanaConfig":{"properties":{"port":{"type":"integer","maximum":65535,"minimum":1},"admin_user":{"type":"string"},"admin_password":{"type":"string"}},"additionalProperties":false,"type":"object","required":["port","admin_user"]},"JaegerConfig":{"properties":{"ui_port":{"type":"integer","maximum":65535,"minimum":1}},"additionalProperties":false,"type":"object","required":["ui_port"]},"OTELConfig":{"properties":{"enabled":{"type":"boolean"},"tracing_enabled":{"type":"boolean"},"service_name":{"type":"string"},"service_version":{"type":"string"},"endpoint":{"type":"string","format":"uri"},"headers":{"additionalProperties":{"type":"string"},"type":"object"},"sampling_rate":{"type":"number","maximum":1,"minimum":0},"pii_level":{"type":"string","enum":["none","hashed","full"]},"metric_interval":{"type":"string"},"http_port":{"type":"integer","maximum":65535,"minimum":1},"grpc_port":{"type":"integer","maximum":65535,"minimum":1}},"additionalProperties":false,"type":"object","required":["enabled","tracing_enabled","service_name","service_version","endpoint","sampling_rate","pii_level","metric_interval","http_port","grpc_port"]},"PrometheusConfig":{"properties":{"port":{"type":"integer","maximum":65535,"minimum":1}},"additionalProperties":false,"type":"object","required":["port"]}},"properties":{"otel":{"$ref":"#/$defs/OTELConfig"},"prometheus":{"$ref":"#/$defs/PrometheusConfig"},"grafana":{"$ref":"#/$defs/GrafanaConfig"},"jaeger":{"$ref":"#/$defs/JaegerConfig"}},"additionalProperties":false,"type":"object","required":["otel","prometheus","grafana","jaeger"]}
\ No newline at end of file
diff --git a/config/schema/services.schema.json b/config/schema/services.schema.json
new file mode 100644
index 00000000..f7a3d9fe
--- /dev/null
+++ b/config/schema/services.schema.json
@@ -0,0 +1 @@
+{"$schema":"https://json-schema.org/draft/2020-12/schema","$id":"https://github.com/janhq/jan-server/pkg/config/services-config","$defs":{"APIKeyConfig":{"properties":{"prefix":{"type":"string"},"default_ttl":{"type":"integer"},"max_ttl":{"type":"integer"},"max_per_user":{"type":"integer","minimum":1}},"additionalProperties":false,"type":"object","required":["prefix","default_ttl","max_ttl","max_per_user"]},"BatchConfig":{"properties":{"enabled":{"type":"boolean"},"max_size":{"type":"integer","minimum":1},"timeout":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["enabled","max_size","timeout"]},"CacheConfig":{"properties":{"enabled":{"type":"boolean"},"type":{"type":"string","enum":["redis","memory","noop"]},"redis":{"$ref":"#/$defs/RedisCacheConfig"},"memory":{"$ref":"#/$defs/MemoryCacheConfig"}},"additionalProperties":false,"type":"object","required":["enabled","type","redis","memory"]},"CircuitBreakerConfig":{"properties":{"enabled":{"type":"boolean"},"threshold":{"type":"integer","minimum":1},"timeout":{"type":"integer"},"max_concurrent":{"type":"integer","minimum":1}},"additionalProperties":false,"type":"object","required":["enabled","threshold","timeout","max_concurrent"]},"EmbeddingConfig":{"properties":{"base_url":{"type":"string","format":"uri"},"api_key":{"type":"string"},"timeout":{"type":"integer"},"validate_on_startup":{"type":"boolean"},"expected_model":{"type":"string"},"expected_dimension":{"type":"integer","minimum":1},"retry":{"$ref":"#/$defs/RetryConfig"},"cache":{"$ref":"#/$defs/CacheConfig"},"batch":{"$ref":"#/$defs/BatchConfig"},"circuit_breaker":{"$ref":"#/$defs/CircuitBreakerConfig"}},"additionalProperties":false,"type":"object","required":["base_url","timeout","validate_on_startup","expected_model","expected_dimension","retry","cache","batch","circuit_breaker"]},"LLMAPIConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"metrics_port":{"type":"integer","maximum":65535,"minimum":1},"log_level":{"type":"string","enum":["debug","info","warn","error"]},"log_format":{"type":"string","enum":["json","console"]},"auto_migrate":{"type":"boolean"},"provider_config_file":{"type":"string"},"provider_config_set":{"type":"string"},"provider_configs_enabled":{"type":"boolean"},"api_key":{"$ref":"#/$defs/APIKeyConfig"},"model_provider_secret":{"type":"string"},"model_sync_enabled":{"type":"boolean"},"model_sync_interval_minutes":{"type":"integer","minimum":1},"prompt_orchestration":{"$ref":"#/$defs/PromptOrchestrationConfig"},"media_resolve_url":{"type":"string","format":"uri"},"media_resolve_timeout":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["http_port","metrics_port","log_level","log_format","auto_migrate","provider_config_file","provider_config_set","provider_configs_enabled","api_key","model_sync_enabled","model_sync_interval_minutes","prompt_orchestration","media_resolve_url","media_resolve_timeout"]},"MCPToolsConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"log_level":{"type":"string","enum":["debug","info","warn","error"]},"log_format":{"type":"string","enum":["json","console"]},"search_engine":{"type":"string","enum":["serper","searxng"]},"serper_api_key":{"type":"string"},"searxng_url":{"type":"string","format":"uri"},"vector_store_url":{"type":"string","format":"uri"},"sandbox_fusion_url":{"type":"string","format":"uri"},"sandbox_require_approval":{"type":"boolean"},"mcp_config_file":{"type":"string"}},"additionalProperties":false,"type":"object","required":["http_port","log_level","log_format","search_engine","searxng_url","vector_store_url","sandbox_fusion_url","sandbox_require_approval","mcp_config_file"]},"MediaAPIConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"log_level":{"type":"string","enum":["debug","info","warn","error"]},"max_upload_bytes":{"type":"integer","minimum":1},"retention_days":{"type":"integer","minimum":1},"proxy_download":{"type":"boolean"},"remote_fetch_timeout":{"type":"integer"},"s3":{"$ref":"#/$defs/S3Config"}},"additionalProperties":false,"type":"object","required":["http_port","log_level","max_upload_bytes","retention_days","proxy_download","remote_fetch_timeout","s3"]},"MemoryCacheConfig":{"properties":{"max_size":{"type":"integer","minimum":1},"ttl":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["max_size","ttl"]},"MemoryToolsConfig":{"properties":{"enabled":{"type":"boolean"},"http_port":{"type":"integer","maximum":65535,"minimum":1},"embedding":{"$ref":"#/$defs/EmbeddingConfig"}},"additionalProperties":false,"type":"object","required":["enabled","http_port","embedding"]},"PromptOrchestrationConfig":{"properties":{"enabled":{"type":"boolean"},"enable_memory":{"type":"boolean"},"enable_templates":{"type":"boolean"},"enable_tools":{"type":"boolean"},"default_persona":{"type":"string"}},"additionalProperties":false,"type":"object","required":["enabled","enable_memory","enable_templates","enable_tools","default_persona"]},"RedisCacheConfig":{"properties":{"url":{"type":"string","format":"uri"},"key_prefix":{"type":"string"},"ttl":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["url","key_prefix","ttl"]},"ResponseAPIConfig":{"properties":{"http_port":{"type":"integer","maximum":65535,"minimum":1},"log_level":{"type":"string","enum":["debug","info","warn","error"]},"llm_api_url":{"type":"string","format":"uri"},"mcp_tools_url":{"type":"string","format":"uri"},"max_tool_depth":{"type":"integer","maximum":20,"minimum":1},"tool_timeout":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["http_port","log_level","llm_api_url","mcp_tools_url","max_tool_depth","tool_timeout"]},"RetryConfig":{"properties":{"enabled":{"type":"boolean"},"max_attempts":{"type":"integer","minimum":1},"initial_backoff":{"type":"integer"},"max_backoff":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["enabled","max_attempts","initial_backoff","max_backoff"]},"S3Config":{"properties":{"endpoint":{"type":"string","format":"uri"},"public_endpoint":{"type":"string","format":"uri"},"region":{"type":"string"},"bucket":{"type":"string"},"access_key":{"type":"string"},"secret_key":{"type":"string"},"use_path_style":{"type":"boolean"},"presign_ttl":{"type":"integer"}},"additionalProperties":false,"type":"object","required":["endpoint","public_endpoint","region","bucket","use_path_style","presign_ttl"]}},"properties":{"llm_api":{"$ref":"#/$defs/LLMAPIConfig"},"mcp_tools":{"$ref":"#/$defs/MCPToolsConfig"},"media_api":{"$ref":"#/$defs/MediaAPIConfig"},"response_api":{"$ref":"#/$defs/ResponseAPIConfig"},"memory_tools":{"$ref":"#/$defs/MemoryToolsConfig"}},"additionalProperties":false,"type":"object","required":["llm_api","mcp_tools","media_api","response_api","memory_tools"]}
\ No newline at end of file
diff --git a/config/secrets.env.example b/config/secrets.env.example
new file mode 100644
index 00000000..db25b959
--- /dev/null
+++ b/config/secrets.env.example
@@ -0,0 +1,171 @@
+# Secrets Template
+# This file lists all secret variables required by Jan Server
+# DO NOT commit actual secrets to version control
+
+# ============================================================================
+# API Keys & Tokens
+# ============================================================================
+
+# HuggingFace API token (required for model downloads)
+# Get from: https://huggingface.co/settings/tokens
+# Permissions needed: Read access to models
+HF_TOKEN=
+
+# Serper API key (required for MCP google_search tool)
+# Get from: https://serper.dev
+# Free tier: 2,500 searches/month
+SERPER_API_KEY=
+
+# ============================================================================
+# Database Passwords
+# ============================================================================
+
+# PostgreSQL password for LLM API database
+# Minimum 20 characters, use: letters, numbers, special characters
+POSTGRES_PASSWORD=
+
+# Keycloak admin console password
+# Minimum 20 characters, use: letters, numbers, special characters
+KEYCLOAK_ADMIN_PASSWORD=
+
+# ============================================================================
+# Service Secrets
+# ============================================================================
+
+# vLLM internal API authentication key
+# Used for API calls between llm-api and vLLM inference server
+# Minimum 32 characters recommended
+VLLM_INTERNAL_KEY=
+
+# Model provider authentication secret
+# Used for securing model provider endpoints
+# Minimum 32 characters recommended
+MODEL_PROVIDER_SECRET=
+
+# OAuth backend client secret
+# Used for Keycloak service account authentication
+# Minimum 32 characters recommended
+BACKEND_CLIENT_SECRET=
+
+# ============================================================================
+# S3 Storage Credentials
+# ============================================================================
+
+# S3-compatible storage credentials (AWS S3, MinIO, etc.)
+# Required for Media API file uploads
+# AWS S3: Get from IAM user or role credentials
+# MinIO: Default is minioadmin/minioadmin (change in production)
+MEDIA_S3_ACCESS_KEY_ID=
+MEDIA_S3_SECRET_ACCESS_KEY=
+
+# ============================================================================
+# Monitoring & Observability
+# ============================================================================
+
+# Grafana admin password
+# Used for Grafana dashboard access
+# Minimum 20 characters recommended
+GRAFANA_ADMIN_PASSWORD=
+
+# ============================================================================
+# Secret Management by Environment
+# ============================================================================
+
+# Development:
+#   - Store in .env file (gitignored)
+#   - Use simple passwords for testing
+#   - Keep HF_TOKEN and SERPER_API_KEY valid for real API calls
+
+# Production:
+#   - Use secret management service:
+#     * AWS Secrets Manager
+#     * Azure Key Vault
+#     * HashiCorp Vault
+#     * Google Secret Manager
+#     * Kubernetes Secrets
+#   - Never store secrets in .env in production
+#   - Use secret rotation
+#   - Audit secret access
+
+# ============================================================================
+# Password Requirements
+# ============================================================================
+
+# Development (Minimum):
+#   - Length: 12 characters
+#   - Complexity: Mix of letters and numbers
+
+# Production (Required):
+#   - Length: 20+ characters
+#   - Complexity: Uppercase, lowercase, numbers, special characters
+#   - Uniqueness: Different for each service
+#   - Rotation: Every 90 days
+
+# ============================================================================
+# Example Secret Generation
+# ============================================================================
+
+# Generate strong passwords:
+#
+# Linux/macOS:
+#   openssl rand -base64 32
+#   pwgen -s 32 1
+#
+# PowerShell:
+#   -join ((48..57) + (65..90) + (97..122) | Get-Random -Count 32 | % {[char]$_})
+#
+# Online (use with caution):
+#   https://passwordsgenerator.net/
+
+# ============================================================================
+# Security Checklist
+# ============================================================================
+
+# Before deploying:
+# [ ] All secrets are set (no empty values)
+# [ ] Passwords meet minimum requirements
+# [ ] Different passwords for each service
+# [ ] Secrets are stored securely (not in code)
+# [ ] .env file is in .gitignore
+# [ ] Production uses secret management service
+# [ ] Secret rotation policy is in place
+# [ ] Access to secrets is logged and audited
+# [ ] Secrets are encrypted at rest
+# [ ] Secrets are encrypted in transit
+
+# ============================================================================
+# Getting API Keys
+# ============================================================================
+
+# HuggingFace Token:
+#   1. Go to https://huggingface.co/settings/tokens
+#   2. Click "New token"
+#   3. Select "Read" permissions
+#   4. Copy the token (starts with hf_)
+
+# Serper API Key:
+#   1. Go to https://serper.dev
+#   2. Sign up for free account
+#   3. Go to API Keys section
+#   4. Copy your API key
+
+# ============================================================================
+# Troubleshooting
+# ============================================================================
+
+# "Invalid API key" errors:
+#   - Check key is copied correctly (no extra spaces)
+#   - Verify key hasn't expired
+#   - Confirm key has necessary permissions
+
+# "Authentication failed" errors:
+#   - Check password is correct
+#   - Ensure no special characters are breaking shell parsing
+#   - Try wrapping value in quotes if it contains special chars
+
+# Services won't start:
+#   - Verify all required secrets are set
+#   - Check .env file exists and is readable
+#   - Confirm secrets meet minimum length requirements
+
+# ============================================================================
diff --git a/docker-compose.dev-full.yml b/docker-compose.dev-full.yml
new file mode 100644
index 00000000..f2f46337
--- /dev/null
+++ b/docker-compose.dev-full.yml
@@ -0,0 +1,21 @@
+# Docker Compose override for dev-full mode
+# When running services natively on host, Kong needs to route to host.docker.internal
+# Usage: docker compose -f docker-compose.yml -f docker-compose.dev-full.yml up -d
+
+services:
+  llm-api:
+    environment:
+      # Use Docker-internal hostname for server-to-server communication
+      KEYCLOAK_BASE_URL: http://keycloak:8085
+      # Use localhost for browser redirects (public-facing)
+      KEYCLOAK_PUBLIC_URL: http://localhost:8085
+      ISSUER: http://localhost:8085/realms/jan
+      # JWKS_URL uses Docker-internal hostname for token validation
+      JWKS_URL: http://keycloak:8085/realms/jan/protocol/openid-connect/certs
+
+  kong:
+    environment:
+      KONG_DECLARATIVE_CONFIG: /kong/kong-dev-full.yml
+    volumes:
+      - ./kong/kong-dev-full.yml:/kong/kong-dev-full.yml:ro
+      - ./kong/plugins/keycloak-apikey:/opt/kong-plugins/kong/plugins/keycloak-apikey:ro
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..66846654
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,6 @@
+﻿include:
+  - path: docker/infrastructure.yml
+  - path: docker/services-api.yml
+  - path: docker/services-mcp.yml
+  - path: docker/services-memory.yml
+  - path: docker/inference.yml
diff --git a/docker/bge-m3-mock/Dockerfile b/docker/bge-m3-mock/Dockerfile
new file mode 100644
index 00000000..104e85bb
--- /dev/null
+++ b/docker/bge-m3-mock/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+ENV PYTHONUNBUFFERED=1
+ENV PORT=8091
+
+RUN pip install --no-cache-dir flask gunicorn
+
+COPY server.py .
+
+EXPOSE 8091
+
+CMD ["gunicorn", "-b", "0.0.0.0:8091", "server:app"]
diff --git a/docker/bge-m3-mock/server.py b/docker/bge-m3-mock/server.py
new file mode 100644
index 00000000..f9046d47
--- /dev/null
+++ b/docker/bge-m3-mock/server.py
@@ -0,0 +1,57 @@
+import hashlib
+import os
+import random
+from typing import List, Union
+
+from flask import Flask, jsonify, request
+
+app = Flask(__name__)
+
+EMBEDDING_DIM = int(os.environ.get("EMBEDDING_DIM", "1024"))
+
+
+def generate_embedding(text: str) -> List[float]:
+    """Create a deterministic pseudo-embedding so tests behave consistently."""
+    seed = int(hashlib.sha256(text.encode("utf-8")).hexdigest(), 16)
+    rng = random.Random(seed)
+    return [rng.uniform(-1.0, 1.0) for _ in range(EMBEDDING_DIM)]
+
+
+def normalize_inputs(payload: dict) -> List[str]:
+    inputs: Union[str, List[str]] = payload.get("inputs", [])
+    if isinstance(inputs, str):
+        return [inputs]
+    if not isinstance(inputs, list):
+        return []
+    return [str(item) for item in inputs]
+
+
+@app.get("/health")
+def health() -> tuple[str, int]:
+    return "ok", 200
+
+
+@app.get("/info")
+def info():
+    return jsonify({"model_id": "BAAI/bge-m3"})
+
+
+@app.post("/embed")
+def embed():
+    payload = request.get_json(force=True, silent=True) or {}
+    inputs = normalize_inputs(payload)
+    embeddings = [generate_embedding(text) for text in inputs]
+    return jsonify(embeddings)
+
+
+@app.post("/embed_sparse")
+def embed_sparse():
+    # The client only checks that the endpoint responds with valid JSON.
+    payload = request.get_json(force=True, silent=True) or {}
+    inputs = normalize_inputs(payload)
+    empty_sparse = [[{"index": 0, "value": 0.0}] for _ in inputs]
+    return jsonify(empty_sparse)
+
+
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "8091")))
diff --git a/docker/dev-full.yml b/docker/dev-full.yml
new file mode 100644
index 00000000..7aa41225
--- /dev/null
+++ b/docker/dev-full.yml
@@ -0,0 +1,79 @@
+# Development Mode Configuration
+# Extends existing services with host.docker.internal networking
+# Allows stopping any service and running it manually on the host for testing
+# Use profile 'dev-full' which is equivalent to 'full' but with dev configuration
+
+services:
+  # Infrastructure - add host.docker.internal
+  api-db:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+  keycloak-db:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+  keycloak:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+  # Kong - use dev-full configuration
+  kong:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      KONG_DATABASE: "off"
+      KONG_DECLARATIVE_CONFIG: /kong/kong-dev-full.yml
+      KONG_LOG_LEVEL: info
+      KONG_PROXY_LISTEN: 0.0.0.0:8000, 0.0.0.0:8443 ssl
+      KONG_ADMIN_LISTEN: 0.0.0.0:8001, 0.0.0.0:8444 ssl
+      KONG_PLUGINS: bundled,keycloak-apikey
+      KONG_LUA_PACKAGE_PATH: /opt/kong-plugins/?.lua;/opt/kong-plugins/?/init.lua;;
+    volumes:
+      - ../kong/kong-dev-full.yml:/kong/kong-dev-full.yml:ro
+      - ../kong/plugins/keycloak-apikey:/opt/kong-plugins/kong/plugins/keycloak-apikey:ro
+
+  # API Services - add host.docker.internal
+  llm-api:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      LOG_LEVEL: ${LOG_LEVEL:-debug}
+      LOG_FORMAT: ${LOG_FORMAT:-console}
+
+  media-api:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      LOG_LEVEL: ${LOG_LEVEL:-debug}
+      LOG_FORMAT: ${LOG_FORMAT:-console}
+
+  response-api:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      LOG_LEVEL: ${LOG_LEVEL:-debug}
+
+  # MCP Services - add host.docker.internal
+  # redis-searxng:
+  #   extra_hosts:
+  #     - "host.docker.internal:host-gateway"
+
+  # searxng:
+  #   extra_hosts:
+  #     - "host.docker.internal:host-gateway"
+
+  vector-store:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+  # sandboxfusion:
+  #   extra_hosts:
+  #     - "host.docker.internal:host-gateway"
+
+  mcp-tools:
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    environment:
+      LOG_LEVEL: ${LOG_LEVEL:-debug}
+      LOG_FORMAT: ${LOG_FORMAT:-console}
diff --git a/docker/inference.yml b/docker/inference.yml
new file mode 100644
index 00000000..661379df
--- /dev/null
+++ b/docker/inference.yml
@@ -0,0 +1,99 @@
+# vLLM Inference Services
+# GPU and CPU model inference
+
+volumes:
+  hf-cache:
+  models-cache:
+
+services:
+  # vLLM GPU Inference (requires NVIDIA GPU)
+  vllm-jan-gpu:
+    image: vllm/vllm-openai:v0.11.2
+    pull_policy: always
+    restart: unless-stopped
+    profiles: ["gpu", "full"]
+    env_file:
+      - ${ENV_FILE:-../.env}
+    command: >
+      --model ${VLLM_MODEL:-janhq/Jan-v1-4B}
+      --host ${VLLM_HOST:-0.0.0.0}
+      --port ${VLLM_PORT:-8101}
+      --dtype ${VLLM_DTYPE:-float16}
+      --api-key ${VLLM_API_KEY:-changeme}
+      --gpu-memory-utilization ${VLLM_GPU_MEMORY_UTIL:-0.66}
+      --enforce-eager
+      --download-dir /models
+    environment:
+      # Hugging Face
+      HF_TOKEN: ${HF_TOKEN}
+      
+      # Sampling Configuration
+      VLLM_USE_FLASHINFER_SAMPLER: ${VLLM_USE_FLASHINFER_SAMPLER:-0}
+      VLLM_DISABLE_FLASHINFER_PREFILL: ${VLLM_DISABLE_FLASHINFER_PREFILL:-1}
+      
+      # Attention Backend
+      # TORCH_SDPA is not registered in vLLM v1; use Triton by default so
+      # older (<SM80) GPUs can start without backend errors.
+      VLLM_ATTENTION_BACKEND: ${VLLM_ATTENTION_BACKEND:-TRITON_ATTN}
+      
+      # Compilation Configuration
+      VLLM_USE_STANDALONE_COMPILE: ${VLLM_USE_STANDALONE_COMPILE:-0}
+      VLLM_DISABLE_COMPILE_CACHE: ${VLLM_DISABLE_COMPILE_CACHE:-1}
+      VLLM_TORCH_COMPILE: ${VLLM_TORCH_COMPILE:-0}
+      
+      # Flash Attention
+      VLLM_USE_FLASH_ATTENTION: ${VLLM_USE_FLASH_ATTENTION:-0}
+      VLLM_USE_FLASHINFER: ${VLLM_USE_FLASHINFER:-0}
+    volumes:
+      - hf-cache:/root/.cache/huggingface
+      - models-cache:/models
+    shm_size: "4g"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "-H", "Authorization: Bearer ${VLLM_INTERNAL_KEY:-changeme}", "http://localhost:8101/v1/models"]
+      interval: 10s
+      timeout: 5s
+      retries: 60
+    ports:
+      - "${VLLM_PORT:-8101}:${VLLM_PORT:-8101}"
+
+  # vLLM CPU Inference (slower, no GPU required)
+  vllm-jan-cpu:
+    image: vllm/vllm-openai:latest
+    pull_policy: always
+    restart: unless-stopped
+    profiles: ["cpu"]
+    env_file:
+      - ${ENV_FILE:-../.env}
+    command: >
+      --model ${VLLM_CPU_MODEL:-janhq/Jan-v1-4B}
+      --served-model-name ${VLLM_CPU_SERVED_NAME:-jan-v1-4b}
+      --host ${VLLM_HOST:-0.0.0.0}
+      --port ${VLLM_PORT:-8101}
+      --dtype ${VLLM_CPU_DTYPE:-float32}
+      --api-key ${VLLM_API_KEY:-changeme}
+      --download-dir /models
+      --enforce-eager
+    environment:
+      # Hugging Face
+      HF_TOKEN: ${HF_TOKEN}
+      
+      # Flash Attention (disabled for CPU)
+      VLLM_USE_FLASHINFER: ${VLLM_USE_FLASHINFER:-0}
+      VLLM_USE_FLASH_ATTENTION: ${VLLM_USE_FLASH_ATTENTION:-0}
+    volumes:
+      - hf-cache:/root/.cache/huggingface
+      - models-cache:/models
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "-H", "Authorization: Bearer ${VLLM_INTERNAL_KEY:-changeme}", "http://localhost:8101/v1/models"]
+      interval: 10s
+      timeout: 5s
+      retries: 60
+    ports:
+      - "${VLLM_PORT:-8101}:${VLLM_PORT:-8101}"
diff --git a/docker/infrastructure.yml b/docker/infrastructure.yml
new file mode 100644
index 00000000..14284157
--- /dev/null
+++ b/docker/infrastructure.yml
@@ -0,0 +1,155 @@
+# Core infrastructure services
+# PostgreSQL, Keycloak, Kong
+
+volumes:
+  api-db-data:
+  keycloak-db-data:
+
+networks:
+  default:
+    name: jan-server_default
+  mcp-network:
+    driver: bridge
+    name: jan-server_mcp-network
+
+services:
+  # PostgreSQL Database for LLM API
+  api-db:
+    image: ankane/pgvector:latest
+    restart: unless-stopped
+    environment:
+      POSTGRES_USER: ${POSTGRES_USER:-jan_user}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-jan_password}
+      POSTGRES_DB: ${POSTGRES_DB:-jan_llm_api}
+    ports:
+      - "${POSTGRES_PORT:-5432}:5432"
+    volumes:
+      - api-db-data:/var/lib/postgresql
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-jan_user} -d ${POSTGRES_DB:-jan_llm_api}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    profiles: ["infra", "full"]
+
+  # PostgreSQL Database for Keycloak
+  keycloak-db:
+    image: postgres:18
+    restart: unless-stopped
+    environment:
+      POSTGRES_USER: keycloak
+      POSTGRES_PASSWORD: keycloak
+      POSTGRES_DB: keycloak
+    ports:
+      - "${KEYCLOAK_DB_PORT:-5433}:5432"
+    volumes:
+      - keycloak-db-data:/var/lib/postgresql
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U keycloak"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    profiles: ["infra", "full"]
+
+  # Keycloak Authentication Server
+  keycloak:
+    image: quay.io/keycloak/keycloak:24.0.5
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    environment:
+      # Database
+      KC_DB: postgres
+      KC_DB_URL_HOST: keycloak-db
+      KC_DB_USERNAME: keycloak
+      KC_DB_PASSWORD: keycloak
+      KC_DB_URL_DATABASE: keycloak
+      
+      # Features (aligned with KEYCLOAK_FEATURES in centralized config)
+      KC_FEATURES: ${KEYCLOAK_FEATURES:-token-exchange,preview,admin-fine-grained-authz}
+      
+      # HTTP Configuration
+      KC_HTTP_PORT: ${KEYCLOAK_HTTP_PORT:-8085}
+      KC_HOSTNAME_URL: ${KEYCLOAK_PUBLIC_URL:-http://localhost:8085}
+      KC_HOSTNAME_ADMIN_URL: ${KEYCLOAK_ADMIN_URL:-http://localhost:8085}
+      KC_HOSTNAME_STRICT: "false"
+      KC_HOSTNAME_STRICT_HTTPS: "false"
+      KC_HOSTNAME_STRICT_BACKCHANNEL: "false"
+      KC_PROXY: "edge"
+      KC_HEALTH_ENABLED: "true"
+      KC_HTTP_RELATIVE_PATH: /
+      
+      # Admin Credentials
+      KEYCLOAK_ADMIN: ${KEYCLOAK_ADMIN:-admin}
+      KEYCLOAK_ADMIN_PASSWORD: ${KEYCLOAK_ADMIN_PASSWORD:-admin}
+      
+      # OpenID Connect
+      KC_SPI_LOGIN_PROTOCOL_OPENID_CONNECT_LEGACY_LOGOUT_REDIRECT_URI: "true"
+      
+      # CORS Configuration - Disabled for local development to avoid admin console issues
+      # Note: For production, configure proper CORS origins
+      QUARKUS_HTTP_CORS: "false"
+      
+      # Logging / Debug Configuration
+      QUARKUS_LOG_LEVEL: ${KEYCLOAK_LOG_LEVEL:-INFO}
+      QUARKUS_LOG_CATEGORY__ORG_KEYCLOAK__LEVEL: ${KEYCLOAK_LOG_LEVEL:-DEBUG}
+      QUARKUS_LOG_CONSOLE_ENABLE: "true"
+    command:
+      - start
+      - --import-realm
+      - --hostname-strict=false
+      - --hostname-strict-backchannel=false
+      - --http-enabled=true
+      - --proxy=edge
+      - --proxy-headers=xforwarded
+      - --spi-login-protocol-openid-connect-suppress-logout-confirmation-screen=true
+      - --log-level=${KEYCLOAK_LOG_LEVEL:-INFO}
+    depends_on:
+      keycloak-db:
+        condition: service_healthy
+    ports:
+      - "${KEYCLOAK_HTTP_PORT:-8085}:${KEYCLOAK_HTTP_PORT:-8085}"
+    volumes:
+      - ../keycloak/import:/opt/keycloak/data/import:ro
+      - ../keycloak/init:/opt/keycloak/init:ro
+    healthcheck:
+      test: ["CMD-SHELL", "exec 3<>/dev/tcp/localhost/8085 && echo -e 'GET /health/ready HTTP/1.1\\r\\nHost: localhost\\r\\nConnection: close\\r\\n\\r\\n' >&3 && cat <&3 | grep -q '200 OK'"]
+      interval: 10s
+      timeout: 5s
+      retries: 30
+      start_period: 60s
+    profiles: ["infra", "full"]
+
+  # Kong API Gateway
+  kong:
+    image: kong:3.5
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    environment:
+      # Database Configuration
+      KONG_DATABASE: "off"
+      KONG_DECLARATIVE_CONFIG: /kong/kong.yml
+      
+      # Logging
+      KONG_LOG_LEVEL: ${KONG_LOG_LEVEL:-info}
+      
+      # Proxy Configuration
+      KONG_PROXY_LISTEN: ${KONG_PROXY_LISTEN:-0.0.0.0:8000, 0.0.0.0:8443 ssl}
+      KONG_ADMIN_LISTEN: ${KONG_ADMIN_LISTEN:-0.0.0.0:8001, 0.0.0.0:8444 ssl}
+      
+      # Plugin Configuration
+      KONG_PLUGINS: ${KONG_PLUGINS:-bundled,keycloak-apikey}
+      KONG_LUA_PACKAGE_PATH: ${KONG_LUA_PACKAGE_PATH:-/opt/kong-plugins/?.lua;/opt/kong-plugins/?/init.lua;;}
+    volumes:
+      - ../kong/kong.yml:/kong/kong.yml:ro
+      - ../kong/plugins/keycloak-apikey:/opt/kong-plugins/kong/plugins/keycloak-apikey:ro
+    ports:
+      - "${KONG_HTTP_PORT:-8000}:8000"
+      - "127.0.0.1:${KONG_ADMIN_PORT:-8001}:8001"
+    healthcheck:
+      test: ["CMD", "kong", "health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    profiles: ["infra", "full"]
diff --git a/docker/observability.yml b/docker/observability.yml
new file mode 100644
index 00000000..7dc583f2
--- /dev/null
+++ b/docker/observability.yml
@@ -0,0 +1,73 @@
+# Observability Stack
+# Prometheus, Grafana, Jaeger, OpenTelemetry Collector
+
+volumes:
+  prometheus-data:
+  grafana-data:
+
+services:
+  otel-collector:
+    image: otel/opentelemetry-collector-contrib:0.90.1
+    restart: unless-stopped
+    environment:
+      - ENVIRONMENT=${ENVIRONMENT:-development}
+    env_file:
+      - ${ENV_FILE:-../.env}
+    volumes:
+      - ../monitoring/otel-collector.yaml:/etc/otel-collector.yaml:ro
+    command: ["--config=/etc/otel-collector.yaml"]
+    ports:
+      - "${OTEL_GRPC_PORT:-4317}:4317"
+      - "${OTEL_HTTP_PORT:-4318}:4318"
+      - "${OTEL_METRICS_PORT:-8889}:8889"
+      - "13133:13133"  # Health check endpoint
+      - "1777:1777"    # pprof endpoint
+    depends_on:
+      - jaeger
+
+  prometheus:
+    image: prom/prometheus:v2.48.0
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    volumes:
+      - ../monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+      - prometheus-data:/prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/etc/prometheus/console_libraries'
+      - '--web.console.templates=/etc/prometheus/consoles'
+      - '--web.enable-lifecycle'
+    ports:
+      - "${PROMETHEUS_PORT:-9090}:9090"
+
+  jaeger:
+    image: jaegertracing/all-in-one:1.51
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    environment:
+      COLLECTOR_OTLP_ENABLED: ${JAEGER_OTLP_ENABLED:-true}
+    ports:
+      - "${JAEGER_UI_PORT:-16686}:16686"
+      - "${JAEGER_COLLECTOR_PORT:-14250}:14250"
+      - "${JAEGER_UDP_PORT:-6831}:6831/udp"
+
+  grafana:
+    image: grafana/grafana:10.2.2
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    environment:
+      GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
+      GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
+      GF_USERS_ALLOW_SIGN_UP: ${GRAFANA_ALLOW_SIGNUP:-false}
+    volumes:
+      - grafana-data:/var/lib/grafana
+      - ../monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
+    ports:
+      - "${GRAFANA_PORT:-3331}:3000"
+    depends_on:
+      - prometheus
+      - jaeger
diff --git a/docker/services-api.yml b/docker/services-api.yml
new file mode 100644
index 00000000..a64e1467
--- /dev/null
+++ b/docker/services-api.yml
@@ -0,0 +1,197 @@
+# LLM API Service
+
+services:
+  llm-api:
+    build: ../services/llm-api
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    environment:
+      # HTTP Server
+      HTTP_PORT: ${HTTP_PORT:-8080}
+      METRICS_PORT: ${METRICS_PORT:-9091}
+      
+      # Database
+      DB_POSTGRESQL_WRITE_DSN: ${DB_POSTGRESQL_WRITE_DSN:-postgres://${POSTGRES_USER:-jan_user}:${POSTGRES_PASSWORD:-jan_password}@${POSTGRES_HOST:-api-db}:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-jan_llm_api}?sslmode=disable}
+      DB_POSTGRESQL_READ1_DSN: ${DB_POSTGRESQL_READ1_DSN:-}
+      
+      # Keycloak / Auth
+      KEYCLOAK_BASE_URL: ${KEYCLOAK_BASE_URL:-http://keycloak:8085}
+      KEYCLOAK_PUBLIC_URL: ${KEYCLOAK_PUBLIC_URL:-}
+      KEYCLOAK_REALM: ${KEYCLOAK_REALM:-jan}
+      KEYCLOAK_ADMIN: ${KEYCLOAK_ADMIN:-admin}
+      KEYCLOAK_ADMIN_PASSWORD: ${KEYCLOAK_ADMIN_PASSWORD:-admin}
+      KEYCLOAK_ADMIN_REALM: ${KEYCLOAK_ADMIN_REALM:-master}
+      KEYCLOAK_ADMIN_CLIENT_ID: ${KEYCLOAK_ADMIN_CLIENT_ID:-admin-cli}
+      BACKEND_CLIENT_ID: ${BACKEND_CLIENT_ID:-backend}
+      BACKEND_CLIENT_SECRET: ${BACKEND_CLIENT_SECRET:-}
+      CLIENT: ${CLIENT:-jan-client}
+      OAUTH_REDIRECT_URI: ${OAUTH_REDIRECT_URI:-http://localhost:8000/auth/callback}
+      JWKS_URL: ${JWKS_URL:-http://keycloak:8085/realms/jan/protocol/openid-connect/certs}
+      ISSUER: ${ISSUER:-http://keycloak:8085/realms/jan}
+      ACCOUNT: ${ACCOUNT:-account}
+      JWKS_REFRESH_INTERVAL: ${JWKS_REFRESH_INTERVAL:-5m}
+      AUTH_CLOCK_SKEW: ${AUTH_CLOCK_SKEW:-60s}
+      GUEST_ROLE: ${GUEST_ROLE:-guest}
+      
+      # API Keys
+      API_KEY_PREFIX: ${API_KEY_PREFIX:-sk_live}
+      API_KEY_DEFAULT_TTL: ${API_KEY_DEFAULT_TTL:-2160h}
+      API_KEY_MAX_TTL: ${API_KEY_MAX_TTL:-2160h}
+      API_KEY_MAX_PER_USER: ${API_KEY_MAX_PER_USER:-5}
+      
+      # Gateway
+      KONG_ADMIN_URL: ${KONG_ADMIN_URL:-http://kong:8001}
+      
+      # Model Provider
+      MODEL_PROVIDER_SECRET: ${MODEL_PROVIDER_SECRET:-jan-model-provider-secret-2024}
+      JAN_PROVIDER_CONFIGS: ${JAN_PROVIDER_CONFIGS:-true}
+      JAN_PROVIDER_CONFIG_SET: ${JAN_PROVIDER_CONFIG_SET:-default}
+      JAN_PROVIDER_CONFIGS_FILE: ${JAN_PROVIDER_CONFIGS_FILE:-config/providers.yml}
+      
+      # vLLM Provider
+      VLLM_PROVIDER_URL: ${VLLM_PROVIDER_URL:-http://vllm-jan-gpu:8101/v1}
+      VLLM_INTERNAL_KEY: ${VLLM_INTERNAL_KEY:-changeme}
+      VLLM_TOOL_SUPPORT: ${VLLM_TOOL_SUPPORT:-false}
+      
+      # Model Sync
+      MODEL_SYNC_ENABLED: ${MODEL_SYNC_ENABLED:-true}
+      MODEL_SYNC_INTERVAL_MINUTES: ${MODEL_SYNC_INTERVAL_MINUTES:-60}
+      
+      # Logging
+      LOG_LEVEL: ${LOG_LEVEL:-info}
+      LOG_FORMAT: ${LOG_FORMAT:-json}
+      
+      # Features
+      AUTO_MIGRATE: ${AUTO_MIGRATE:-true}
+      
+      # Observability
+      OTEL_ENABLED: ${OTEL_ENABLED:-false}
+      OTEL_SERVICE_NAME: ${OTEL_SERVICE_NAME:-llm-api}
+      OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://otel-collector:4318}
+      
+      # Media Integration
+      MEDIA_RESOLVE_URL: ${MEDIA_RESOLVE_URL:-http://kong:8000/media/v1/media/resolve}
+      MEDIA_RESOLVE_TIMEOUT: ${MEDIA_RESOLVE_TIMEOUT:-5s}
+    ports:
+      - "${HTTP_PORT:-8080}:${HTTP_PORT:-8080}"
+    depends_on:
+      api-db:
+        condition: service_healthy
+      keycloak:
+        condition: service_healthy
+      # vllm-jan-gpu:
+      #   condition: service_healthy
+    volumes:
+      - ../services/llm-api/config:/app/config:ro
+      - ../services/llm-api/migrations:/app/migrations:ro
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "http://localhost:${HTTP_PORT:-8080}/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+    profiles: ["api", "full"]
+
+  media-api:
+    build: ../services/media-api
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    environment:
+      # HTTP Server (using service-specific port)
+      MEDIA_API_PORT: ${MEDIA_API_PORT:-8285}
+      
+      # Database (using default account)
+      DB_POSTGRESQL_WRITE_DSN: ${MEDIA_DB_POSTGRESQL_WRITE_DSN:-postgres://${POSTGRES_USER:-jan_user}:${POSTGRES_PASSWORD:-jan_password}@${POSTGRES_HOST:-api-db}:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-jan_llm_api}?sslmode=disable}
+      DB_POSTGRESQL_READ1_DSN: ${MEDIA_DB_POSTGRESQL_READ1_DSN:-}
+      
+      # S3 Storage
+      MEDIA_S3_ENDPOINT: ${MEDIA_S3_ENDPOINT:-https://s3.menlo.ai}
+      MEDIA_S3_PUBLIC_ENDPOINT: ${MEDIA_S3_PUBLIC_ENDPOINT:-}
+      MEDIA_S3_REGION: ${MEDIA_S3_REGION:-us-west-2}
+      MEDIA_S3_BUCKET: ${MEDIA_S3_BUCKET:-platform-dev}
+      MEDIA_S3_ACCESS_KEY_ID: ${MEDIA_S3_ACCESS_KEY_ID:-XXXXX}
+      MEDIA_S3_SECRET_ACCESS_KEY: ${MEDIA_S3_SECRET_ACCESS_KEY:-YYYY}
+      MEDIA_S3_USE_PATH_STYLE: ${MEDIA_S3_USE_PATH_STYLE:-true}
+      MEDIA_S3_PRESIGN_TTL: ${MEDIA_S3_PRESIGN_TTL:-5m}
+      
+      # Media Configuration
+      MEDIA_MAX_UPLOAD_BYTES: ${MEDIA_MAX_UPLOAD_BYTES:-20971520}
+      MEDIA_PROXY_DOWNLOAD: ${MEDIA_PROXY_DOWNLOAD:-true}
+      MEDIA_RETENTION_DAYS: ${MEDIA_RETENTION_DAYS:-30}
+      MEDIA_REMOTE_FETCH_TIMEOUT: ${MEDIA_REMOTE_FETCH_TIMEOUT:-15s}
+      
+      # Logging
+      MEDIA_LOG_LEVEL: ${MEDIA_LOG_LEVEL:-info}
+      
+      # Authentication
+      AUTH_ENABLED: "true"
+      AUTH_ISSUER: ${ISSUER:-http://localhost:8085/realms/jan}
+      ACCOUNT: ${ACCOUNT:-account}
+      AUTH_JWKS_URL: ${JWKS_URL:-http://keycloak:8085/realms/jan/protocol/openid-connect/certs}
+    ports:
+      - "${MEDIA_API_PORT:-8285}:${MEDIA_API_PORT:-8285}"
+    depends_on:
+      api-db:
+        condition: service_healthy
+      keycloak:
+        condition: service_healthy
+    volumes:
+      - ../services/media-api/config:/app/config:ro
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "http://localhost:${MEDIA_API_PORT:-8285}/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+    profiles: ["api", "full"]
+
+  response-api:
+    build: ../services/response-api
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    environment:
+      # HTTP Server (using service-specific port)
+      RESPONSE_API_PORT: ${RESPONSE_API_PORT:-8082}
+      
+      # Database (using default account)
+      DB_POSTGRESQL_WRITE_DSN: ${RESPONSE_DB_POSTGRESQL_WRITE_DSN:-postgres://${POSTGRES_USER:-jan_user}:${POSTGRES_PASSWORD:-jan_password}@${POSTGRES_HOST:-api-db}:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-jan_llm_api}?sslmode=disable}
+      DB_POSTGRESQL_READ1_DSN: ${RESPONSE_DB_POSTGRESQL_READ1_DSN:-}
+      
+      # Service URLs (with prefixes for clarity)
+      RESPONSE_LLM_API_URL: ${RESPONSE_LLM_API_URL:-http://llm-api:8080}
+      RESPONSE_MCP_TOOLS_URL: ${RESPONSE_MCP_TOOLS_URL:-http://mcp-tools:8091}
+      
+      # Tool Execution
+      RESPONSE_MAX_TOOL_DEPTH: ${RESPONSE_MAX_TOOL_DEPTH:-8}
+      RESPONSE_TOOL_TIMEOUT: ${RESPONSE_TOOL_TIMEOUT:-45s}
+      
+      # Logging
+      RESPONSE_LOG_LEVEL: ${RESPONSE_LOG_LEVEL:-info}
+      
+      # Authentication
+      AUTH_ENABLED: "true"
+      AUTH_ISSUER: ${ISSUER:-http://localhost:8085/realms/jan}
+      ACCOUNT: ${ACCOUNT:-account}
+      AUTH_JWKS_URL: ${JWKS_URL:-http://keycloak:8085/realms/jan/protocol/openid-connect/certs}
+      
+      # Observability
+      OTEL_ENABLED: ${OTEL_ENABLED:-false}
+      OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://otel-collector:4318}
+    ports:
+      - "${RESPONSE_API_PORT:-8082}:${RESPONSE_API_PORT:-8082}"
+    depends_on:
+      api-db:
+        condition: service_healthy
+      keycloak:
+        condition: service_healthy
+      # llm-api:
+      #   condition: service_healthy
+      # mcp-tools:
+      #   condition: service_started
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "http://localhost:${RESPONSE_API_PORT:-8082}/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+    profiles: ["api", "full"]
diff --git a/docker/services-mcp.yml b/docker/services-mcp.yml
new file mode 100644
index 00000000..30f88f6b
--- /dev/null
+++ b/docker/services-mcp.yml
@@ -0,0 +1,126 @@
+# MCP Tools and Infrastructure Services
+# SearXNG, Vector Store, SandboxFusion, MCP Tools API
+
+services:
+  # Redis for SearXNG caching and rate limiting
+  # redis-searxng:
+  #   image: redis:7-alpine
+  #   restart: unless-stopped
+  #   command: ["redis-server", "--appendonly", "no"]
+  #   networks:
+  #     - mcp-network
+  #   healthcheck:
+  #     test: ["CMD", "redis-cli", "ping"]
+  #     interval: 10s
+  #     timeout: 5s
+  #     retries: 5
+  #   profiles: ["mcp", "full"]
+
+  # # SearXNG - Meta Search Engine (HTTP on 8080 inside container)
+  # searxng:
+  #   image: searxng/searxng:latest
+  #   restart: unless-stopped
+  #   environment:
+  #     SEARXNG_BASE_URL: http://localhost:${SEARXNG_PORT:-8086}/
+  #     SEARXNG_REDIS_URL: redis://redis-searxng:6379/0
+  #   depends_on:
+  #     redis-searxng:
+  #       condition: service_healthy
+  #   ports:
+  #     - "${SEARXNG_PORT:-8086}:8080"
+  #   networks:
+  #     - mcp-network
+  #   healthcheck:
+  #     test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080"]
+  #     interval: 10s
+  #     timeout: 5s
+  #     retries: 5
+  #   profiles: ["mcp", "full"]
+
+  # Lightweight vector store for MCP file search
+  vector-store:
+    build: ../services/mcp-tools/tools/vector-store-service
+    restart: unless-stopped
+    environment:
+      VECTOR_STORE_PORT: ${VECTOR_STORE_PORT:-3015}
+    ports:
+      - "${VECTOR_STORE_PORT:-3015}:3015"
+    networks:
+      - mcp-network
+    # Distroless image - no healthcheck tools available
+    # healthcheck:
+    #   test: ["CMD", "wget", "--spider", "-q", "http://localhost:3015/health"]
+    #   interval: 10s
+    #   timeout: 5s
+    #   retries: 5
+    profiles: ["mcp", "full"]
+
+  # SandboxFusion code interpreter
+  # sandboxfusion:
+  #   image: volcengine/sandbox-fusion:server-20250609
+  #   restart: unless-stopped
+  #   ports:
+  #     - "${SANDBOXFUSION_PORT:-3010}:8080"
+  #   networks:
+  #     - mcp-network
+  #   healthcheck:
+  #     test: ["CMD", "wget", "--spider", "-q", "http://localhost:8080"]
+  #     interval: 10s
+  #     timeout: 5s
+  #     retries: 5
+  #   profiles: ["mcp", "full"]
+
+  # MCP Tools API - Unified MCP interface
+  mcp-tools:
+    build: ../services/mcp-tools
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    environment:
+      # HTTP Server (service-specific port)
+      MCP_TOOLS_HTTP_PORT: ${MCP_TOOLS_HTTP_PORT:-8091}
+      
+      # Search Configuration
+      MCP_SEARCH_ENGINE: ${MCP_SEARCH_ENGINE:-serper}
+      SERPER_API_KEY: ${SERPER_API_KEY:-}
+      
+      # Logging
+      MCP_TOOLS_LOG_LEVEL: ${MCP_TOOLS_LOG_LEVEL:-info}
+      MCP_TOOLS_LOG_FORMAT: ${MCP_TOOLS_LOG_FORMAT:-json}
+      
+      # MCP Provider endpoints (internal Docker network)
+      SEARXNG_URL: ${SEARXNG_URL:-http://searxng:8080}
+      VECTOR_STORE_URL: ${VECTOR_STORE_URL:-http://vector-store:3015}
+      SANDBOXFUSION_URL: ${SANDBOXFUSION_URL:-http://sandboxfusion:8080}
+      MEMORY_TOOLS_URL: ${MEMORY_TOOLS_URL:-http://memory-tools:8090}
+      
+      # MCP Configuration
+      MCP_CONFIG_FILE: ${MCP_CONFIG_FILE:-configs/mcp-providers.yml}
+      MCP_SANDBOX_REQUIRE_APPROVAL: ${MCP_SANDBOX_REQUIRE_APPROVAL:-true}
+      
+      # Authentication
+      AUTH_ENABLED: ${AUTH_ENABLED:-false}
+    ports:
+      - "${MCP_TOOLS_HTTP_PORT:-8091}:${MCP_TOOLS_HTTP_PORT:-8091}"
+    depends_on:
+      # searxng:
+      #   condition: service_healthy
+      vector-store:
+        condition: service_started
+      memory-tools:
+        condition: service_started
+      # sandboxfusion:
+      #   condition: service_started
+    volumes:
+      - ../services/mcp-tools/configs:/app/configs:ro
+    healthcheck:
+      test: ["CMD", "wget", "--spider", "-q", "http://localhost:${MCP_TOOLS_HTTP_PORT:-8091}/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - default
+      - mcp-network
+    profiles: ["mcp", "full"]
+
+# Note: Networks are defined in infrastructure.yml and shared across all compose files
diff --git a/docker/services-memory.yml b/docker/services-memory.yml
new file mode 100644
index 00000000..f50f0a0d
--- /dev/null
+++ b/docker/services-memory.yml
@@ -0,0 +1,80 @@
+# Memory Tools Service
+
+services:
+  # Redis cache for embedding vectors (optional)
+  redis-memory:
+    image: redis:7-alpine
+    restart: unless-stopped
+    command: ["redis-server", "--appendonly", "yes", "--maxmemory", "256mb", "--maxmemory-policy", "allkeys-lru"]
+    ports:
+      - "${REDIS_MEMORY_PORT:-6379}:6379"
+    volumes:
+      - redis-memory-data:/data
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 3s
+      retries: 5
+    profiles: ["memory-redis", "full"]
+
+  bge-m3:
+    build:
+      context: ./bge-m3-mock
+    environment:
+      PORT: ${EMBEDDING_PORT:-8091}
+    ports:
+      # Expose embedding mock on a host port that won't collide with MCP tools (8091)
+      - "${EMBEDDING_HOST_PORT:-8092}:${EMBEDDING_PORT:-8091}"
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8091/health')"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+    profiles: ["memory-mock"]
+
+  memory-tools:
+    build: ../services/memory-tools
+    restart: unless-stopped
+    env_file:
+      - ${ENV_FILE:-../.env}
+    environment:
+      # HTTP Server
+      MEMORY_TOOLS_PORT: ${MEMORY_TOOLS_PORT:-8090}
+      
+      # Database (PostgreSQL with pgvector)
+      DB_POSTGRESQL_WRITE_DSN: ${MEMORY_DB_POSTGRESQL_WRITE_DSN:-postgres://${POSTGRES_USER:-jan_user}:${POSTGRES_PASSWORD:-jan_password}@${POSTGRES_HOST:-api-db}:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-jan_llm_api}?sslmode=disable}
+      DB_POSTGRESQL_READ1_DSN: ${MEMORY_DB_POSTGRESQL_READ1_DSN:-}
+      
+      # BGE-M3 Embedding Service
+      EMBEDDING_SERVICE_URL: ${EMBEDDING_SERVICE_URL:-http://bge-m3:8091}
+      
+      # Redis Cache
+      EMBEDDING_CACHE_TYPE: ${EMBEDDING_CACHE_TYPE:-memory}
+      EMBEDDING_CACHE_REDIS_URL: ${EMBEDDING_CACHE_REDIS_URL:-redis://redis:6379/3}
+      
+      # Logging
+      LOG_LEVEL: ${MEMORY_LOG_LEVEL:-info}
+      LOG_FORMAT: ${MEMORY_LOG_FORMAT:-json}
+      
+      # Observability
+      OTEL_ENABLED: ${OTEL_ENABLED:-false}
+      OTEL_SERVICE_NAME: ${OTEL_SERVICE_NAME:-memory-tools}
+      OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://otel-collector:4318}
+    ports:
+      - "${MEMORY_TOOLS_PORT:-8090}:${MEMORY_TOOLS_PORT:-8090}"
+    depends_on:
+      api-db:
+        condition: service_healthy
+    volumes:
+      - ../services/memory-tools/config:/app/config:ro
+      - ../services/memory-tools/migrations:/app/migrations:ro
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "http://localhost:${MEMORY_TOOLS_PORT:-8090}/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+    profiles: ["memory", "full"]
+
+volumes:
+  redis-memory-data:
+
diff --git a/docs/Architect.png b/docs/Architect.png
deleted file mode 100644
index b20b3eff..00000000
Binary files a/docs/Architect.png and /dev/null differ
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..a5bbc8c7
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,86 @@
+# Documentation Hub
+
+Welcome to the Jan Server documentation. Use this page as a map to the rest of the guides.
+
+> New to the project? Start with the [Documentation Index](index.md). 
+> Need to know what was reviewed? See the [Documentation Quality Report](../DOCUMENTATION_QUALITY_REPORT.md).
+
+## Structure
+
+| Section | Description | Key Files |
+|---------|-------------|-----------|
+| **Getting Started** | Five minute setup for Docker Compose | [getting-started/README.md](getting-started/README.md) |
+| **Configuration** | Centralized config system with YAML + env vars | [configuration/](configuration/) |
+| **Guides** | Development, deployment, monitoring, IDE, troubleshooting | [guides/](guides/) |
+| **API Reference** | LLM, Response, Media, MCP Tools APIs | [api/README.md](api/README.md) |
+| **Services** | Responsibilities, ports, dependencies | [architecture/services.md](architecture/services.md) |
+| **Architecture** | System design, security, observability, data flow | [architecture/](architecture/) |
+| **Conventions** | Code standards and workflows | [conventions/conventions.md](conventions/conventions.md) |
+| **Planning** | Roadmaps, initiatives, and completed plans | [planning/README.md](planning/README.md) |
+| **Audits** | Latest documentation review | [Documentation Quality Report](../DOCUMENTATION_QUALITY_REPORT.md) |
+
+## Quick Links
+
+### New users
+- [Quick Start](getting-started/README.md) - Docker-first setup flow
+- [API Overview](api/README.md) - Authentication model and available services
+- [First Request](api/llm-api/README.md#quick-start) - Sample curl request with tokens
+
+### Developers
+- [Development Guide](guides/development.md) - Run services locally (Docker + hybrid)
+- [Configuration System](configuration/README.md) - Type-safe config rules and precedence
+- [Testing Guide](guides/testing.md) - jan-cli api-test collections, targets, and coverage
+- [Hybrid Mode](guides/hybrid-mode.md) - Mix native binaries with Compose services
+- [Service Template](guides/services-template.md) - Generate a new microservice
+- [IDE Setup](guides/ide/vscode.md) - VS Code debugging and launch configs
+
+### API consumers
+- [LLM API](api/llm-api/README.md) - Chat, models, streaming
+- [Response API](api/response-api/README.md) - Multi-step orchestration and tools
+- [Media API](api/media-api/README.md) - Upload, storage, jan_* ID resolution
+- [MCP Tools](api/mcp-tools/README.md) - JSON-RPC endpoints for MCP providers
+- [LLM Examples](api/llm-api/examples.md) - Ready-to-run curl snippets
+
+### Operators
+- [Deployment Guide](guides/deployment.md) - Docker, Kubernetes, and CI/CD paths
+- [Kubernetes Setup](../k8s/SETUP.md) - Helm chart installation steps
+- [Monitoring Guide](guides/monitoring.md) - Grafana, Jaeger, and OTEL collector
+- [Authentication & Gateway](guides/authentication.md) - Kong + Keycloak configuration
+- [Troubleshooting](guides/troubleshooting.md) - Common failure modes and fixes
+- [Security Policy](architecture/security.md) - Responsible disclosure process
+- [Architecture Security](architecture/security.md) - Keycloak, JWT, and network posture
+- [Observability](architecture/observability.md) - Metrics, tracing, logging sinks
+
+## Need help?
+
+| Issue | Resource |
+|-------|----------|
+| Services fail to start | [Troubleshooting Guide](guides/troubleshooting.md) |
+| API errors | [API Reference](api/README.md) |
+| Auth problems | [LLM API Auth](api/llm-api/README.md#authentication) |
+| Performance issues | [Monitoring Guide](guides/monitoring.md) |
+
+## Contributing and Updates
+- Contribution process: [../CONTRIBUTING.md](../CONTRIBUTING.md)
+- Security process: [architecture/security.md](architecture/security.md)
+- Release notes: [../CHANGELOG.md](../CHANGELOG.md)
+
+## Documentation Philosophy
+
+Our documentation follows these principles:
+- **Single Source of Truth**: No duplicate content - each concept is documented once in the most logical location
+- **Clear Separation**: API docs (for users) vs implementation docs (for contributors) vs guides (for developers)
+- **Consistent Naming**: Lowercase with hyphens (e.g., `service-name.md`), except `README.md` and `CONTRIBUTING.md`
+- **Service-First**: Technical and implementation docs live with the service code in `/services/<service>/README.md` (or adjacent docs) so changes stay close to the implementation
+- **User-First**: Guides and API references live in central `/docs` for easy discovery, with templates in `/docs/templates` to keep structure consistent
+
+### Where to Document What
+
+- **API Reference** -> `/docs/api/` - For external users consuming the APIs
+- **Implementation Details** -> `/services/[service-name]/README.md` (or `/services/[service-name]/docs/`) - For contributors working on services
+- **How-To Guides** -> `/docs/guides/` - For developers setting up and using the system
+- **Architecture** -> `/docs/architecture/` - For technical leads and architects
+- **Configuration** -> `/docs/configuration/` - For DevOps and deployment
+- **Planning** -> `/docs/planning/` - For roadmaps and initiatives
+
+Still lost? Jump to the [Documentation Index](index.md) or search within this directory.
diff --git a/docs/api/README.md b/docs/api/README.md
new file mode 100644
index 00000000..9712b025
--- /dev/null
+++ b/docs/api/README.md
@@ -0,0 +1,328 @@
+# API Reference
+
+Complete API documentation for Jan Server services.
+
+## Available APIs
+
+### 1. LLM API (Port 8080)
+OpenAI-compatible API for chat completions, conversations, and models.
+
+**What it does:**
+- Generate AI responses to user messages
+- Manage conversations and chat history
+- Organize conversations in projects
+- List available AI models
+- Handle user authentication
+- Support images via jan_* IDs
+
+**Documentation:**
+- **[Complete Documentation](llm-api/)** - Full API reference, endpoints, examples
+- **[Authentication](llm-api/#authentication)** - Auth methods, API keys, and token management
+- **[Chat Completions](llm-api/#chat-completions)** - Main completion endpoint
+- **[Conversations](llm-api/#conversations)** - Conversation CRUD operations
+- **[Projects](llm-api/#projects)** - Project management for organizing conversations
+- **[Admin Endpoints](llm-api/#admin-endpoints)** - Provider and model catalog management
+- **[With Media](llm-api/#with-media-visual-input)** - Media references using `jan_*` IDs
+- **[Examples](llm-api/examples.md)** - cURL, Python, and JavaScript snippets
+
+### 2. Response API (Port 8082)
+Executes tools and generates AI responses for complex tasks.
+
+**What it does:**
+- Run multiple tools in sequence (up to 8 steps)
+- Chain tool outputs together
+- Generate final answers using LLM
+- Track execution time and status
+
+**Documentation:**
+- **[Complete Documentation](response-api/)** - Full API reference, configuration, examples
+- **[Create Response](response-api/#create-response-multi-step-orchestration)** - Main orchestration endpoint
+- **[Tool Execution Flow](response-api/#tool-execution-flow)** - How tools are executed
+- **[Configuration](response-api/#tool-execution-parameters)** - Depth and timeout settings
+
+### 3. Media API (Port 8285)
+Handles image uploads and storage.
+
+**What it does:**
+- Upload images from URLs or base64 data
+- Store images in S3 cloud storage
+- Generate jan_* IDs for images
+- Create temporary download links
+- Prevent duplicate uploads
+
+**Documentation:**
+- **[Complete Documentation](media-api/)** - Full API reference, storage flow, examples
+- **[Upload Media](media-api/#upload-media)** - Upload from remote URL or data URL
+- **[Presigned URL](media-api/#prepare-upload-presigned-url)** - Client-side S3 upload
+- **[Jan ID System](media-api/#jan-id-system)** - Understanding `jan_*` identifiers
+- **[Resolution](media-api/#resolve-media-ids)** - Convert IDs to presigned URLs
+
+### 4. MCP Tools API (Port 8091)
+Provides Model Context Protocol tools for search, scraping, lightweight vector search, and sandboxed execution.
+
+**Available Tools:**
+- **google_search** - Serper/SearXNG-backed web search with filters and location hints
+- **scrape** - Fetch and parse a web page (optional Markdown output)
+- **file_search_index / file_search_query** - Index custom text into the bundled vector store and run similarity queries
+- **python_exec** - Run trusted code via SandboxFusion, returning stdout/stderr/artifacts
+
+**Documentation:**
+- **[Complete Documentation](mcp-tools/)** - Full API reference, tool descriptions, examples
+- **[JSON-RPC Protocol](mcp-tools/#json-rpc-20-protocol)** - Standard protocol format
+- **[Call Tool](mcp-tools/#call-tool)** - Execute any tool
+- **[List Tools](mcp-tools/#list-tools)** - Discover available tools
+- **[Tool Details](mcp-tools/#available-tools)** - Specific tool parameters
+- **[Providers](../services/mcp-tools/mcp-providers.md)** - MCP provider configuration
+- **[Integration](../services/mcp-tools/integration.md)** - Integration guide
+
+## Quick Reference
+
+### Base URLs
+
+| Environment | LLM API | Response API | Media API | MCP Tools | Gateway |
+|-------------|---------|--------------|-----------|-----------|---------|
+| **Local** | http://localhost:8080 | http://localhost:8082 | http://localhost:8285 | http://localhost:8091 | http://localhost:8000 |
+| **Docker** | http://llm-api:8080 | http://response-api:8082 | http://media-api:8285 | http://mcp-tools:8091 | http://kong:8000 |
+
+**Recommended**: Point all public clients at the Kong gateway (port 8000) so authentication, rate limiting, and routing stay consistent. Direct service ports remain available for internal tests but still require JWT/API key headers.
+
+### Authentication
+
+Most API endpoints require authentication. The Kong gateway (port 8000) validates your credentials.
+
+**Two ways to authenticate:**
+1. **Bearer Token**: Get a token from `/llm/auth/guest-login`, then use `Authorization: Bearer <token>` header
+2. **API Key**: Use `X-API-Key: sk_*` header
+
+> Note: API key + JWT validation happens at the Kong gateway. When you call a service directly (8080/8082/8285/8091) you still need to forward a valid JWT issued by Keycloak.
+
+**Quick guest access:**
+
+```bash
+# Request a guest token
+curl -X POST http://localhost:8000/llm/auth/guest-login
+
+# Response
+{
+ "access_token": "eyJhbGci...",
+ "refresh_token": "eyJhbGci...",
+ "expires_in": 300
+}
+
+# Use the token
+curl -H "Authorization: Bearer <access_token>" \
+ http://localhost:8000/v1/models
+```
+
+### Quick Examples
+
+#### Chat Completion
+
+```bash
+curl -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "jan-v1-4b",
+ "messages": [
+ {"role": "user", "content": "Hello!"}
+ ]
+ }'
+```
+
+#### Google Search (MCP)
+
+```bash
+curl -X POST http://localhost:8000/v1/mcp \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "method": "tools/call",
+ "params": {
+ "name": "google_search",
+ "arguments": {"q": "AI news"}
+ }
+ }'
+```
+
+> Calling MCP Tools directly (e.g., `http://localhost:8091/v1/mcp`) is supported for internal testing, but the gateway-provided JWT/API key is still required when Kong proxies the request.
+
+#### List Models
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/models
+```
+
+## API Conventions
+
+### Response Format
+
+All successful responses return JSON:
+
+```json
+{
+ "data": {...},
+ "meta": {...}
+}
+```
+
+### Error Format
+
+All errors follow this structure:
+
+```json
+{
+ "error": {
+ "type": "invalid_request_error",
+ "code": "invalid_parameter",
+ "message": "Parameter 'model' is required",
+ "param": "model",
+ "request_id": "req_123xyz"
+ }
+}
+```
+
+### Error Types
+
+| Type | Description | HTTP Status |
+|------|-------------|-------------|
+| `invalid_request_error` | Invalid request parameters | 400 |
+| `auth_error` | Authentication failed | 401 |
+| `permission_error` | Insufficient permissions | 403 |
+| `not_found_error` | Resource not found | 404 |
+| `rate_limit_error` | Too many requests | 429 |
+| `internal_error` | Server error | 500 |
+
+### Headers
+
+**Request Headers**:
+- `Authorization: Bearer <token>` - Required for authenticated endpoints
+- `Content-Type: application/json` - For POST/PUT requests
+- `Idempotency-Key: <uuid>` - Optional, for idempotent POST requests
+- `X-Request-Id: <uuid>` - Optional, for request tracing
+
+**Response Headers**:
+- `X-Request-Id` - Request identifier for tracing
+- `X-Auth-Method` - Authentication method used (jwt or api_key)
+- `Content-Type: application/json` - JSON response
+- `Content-Type: text/event-stream` - SSE streaming response
+
+### Pagination
+
+List endpoints support pagination:
+
+```bash
+curl "http://localhost:8000/v1/conversations?limit=10&after=conv_123"
+```
+
+Response:
+```json
+{
+ "data": [...],
+ "next_after": "conv_456"
+}
+```
+
+### Streaming
+
+Chat completions support Server-Sent Events (SSE) streaming:
+
+```bash
+curl -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer <token>" \
+ -d '{"model":"jan-v1-4b","messages":[...],"stream":true}'
+```
+
+Response:
+```
+data: {"id":"chat-123","choices":[{"delta":{"content":"Hello"}}]}
+
+data: {"id":"chat-123","choices":[{"delta":{"content":"!"}}]}
+
+data: [DONE]
+```
+
+## Interactive API Documentation
+
+Access the interactive Swagger UI:
+
+**Local**: http://localhost:8000/v1/swagger/index.html
+
+Try API calls directly from your browser with built-in authentication.
+
+## SDK & Client Libraries
+
+### Official SDKs
+
+Official SDKs are coming soon. In the meantime, use OpenAI-compatible clients with the Jan Server base URL.
+
+### Community SDKs
+
+Contributions welcome! Jan Server is OpenAI-compatible, so most OpenAI client libraries work with minor configuration changes.
+
+#### Python Example (OpenAI SDK)
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+ base_url="http://localhost:8000/v1",
+ api_key="your_guest_token_here"
+)
+
+response = client.chat.completions.create(
+ model="jan-v1-4b",
+ messages=[
+ {"role": "user", "content": "Hello!"}
+ ]
+)
+
+print(response.choices[0].message.content)
+```
+
+#### JavaScript Example (OpenAI SDK)
+
+```javascript
+import OpenAI from 'openai';
+
+const client = new OpenAI({
+ baseURL: 'http://localhost:8000/v1',
+ apiKey: 'your_guest_token_here',
+});
+
+const response = await client.chat.completions.create({
+ model: 'jan-v1-4b',
+ messages: [
+ { role: 'user', content: 'Hello!' }
+ ],
+});
+
+console.log(response.choices[0].message.content);
+```
+
+## Rate Limits
+
+Currently, Jan Server does not enforce rate limits in development mode. 
+
+Production deployments should configure rate limiting via Kong Gateway.
+
+## API Versioning
+
+All APIs are versioned using URL path versioning:
+
+- Current version: `/v1/`
+- Future versions will be: `/v2/`, `/v3/`, etc.
+
+Breaking changes will only occur in new major versions.
+
+## Support
+
+- Docs [Full Documentation](../README.md)
+- Bug [Report API Issues](https://github.com/janhq/jan-server/issues)
+- Discussion [API Discussions](https://github.com/janhq/jan-server/discussions)
+
+---
+
+**Explore APIs**: [LLM API ->](llm-api/) | [MCP Tools ->](mcp-tools/) | **Interactive Docs**: [Swagger UI ->](http://localhost:8000/v1/swagger/)
diff --git a/docs/api/llm-api/README.md b/docs/api/llm-api/README.md
new file mode 100644
index 00000000..73a94ba9
--- /dev/null
+++ b/docs/api/llm-api/README.md
@@ -0,0 +1,451 @@
+# LLM API Documentation
+
+The LLM API lets you send messages to AI models and get responses. It works like the OpenAI API.
+
+## Quick Start
+
+### URLs
+- **Direct access**: http://localhost:8080
+- **Through gateway** (recommended): http://localhost:8000
+- **Inside Docker**: http://llm-api:8080
+
+### Authentication
+All endpoints need authentication through the Kong gateway at port 8000.
+
+**Get a guest token:**
+
+```bash
+# Get guest token
+curl -X POST http://localhost:8000/llm/auth/guest-login
+
+# Response:
+{
+ "access_token": "eyJhbGc...",
+ "token_type": "Bearer",
+ "expires_in": 3600,
+ "refresh_token": "..."
+}
+
+# Use token in requests
+curl -H "Authorization: Bearer <token>" http://localhost:8000/v1/chat/completions
+```
+
+## What You Can Do
+
+- **Chat with AI** - Send messages and get responses (like ChatGPT)
+- **Stream responses** - Get word-by-word output in real-time
+- **Save conversations** - Keep chat history for later
+- **Add images** - Reference images using jan_* IDs
+- **Multiple models** - Works with vLLM, OpenAI, Anthropic, and others
+- **Track everything** - Built-in logging and monitoring
+- **(Future) Prompt Orchestration** - Dynamic prompt composition with memory, templates, and conditional modules (see `docs/todo/prompt-orchestration-todo.md`)
+
+## Service Ports & Configuration
+
+| Component | Port | Environment Variable |
+|-----------|------|---------------------|
+| **HTTP Server** | 8080 | `HTTP_PORT` |
+| **Database** | 5432 | `DB_DSN` |
+| **Keycloak** | 8085 | `KEYCLOAK_BASE_URL` |
+
+### Required Environment Variables
+
+```bash
+HTTP_PORT=8080 # HTTP listen port
+DB_DSN=postgres://jan_user:password@api-db:5432/jan_llm_api?sslmode=disable
+LOG_LEVEL=info # debug, info, warn, error
+LOG_FORMAT=json # json or text
+KEYCLOAK_BASE_URL=http://keycloak:8085 # Keycloak URL
+JWKS_URL=http://keycloak:8085/realms/jan/protocol/openid-connect/certs
+ISSUER=http://localhost:8090/realms/jan # Token issuer
+ACCOUNT=account # JWT audience/account claim
+```
+
+### Optional Configuration
+
+```bash
+OTEL_ENABLED=false # Enable OpenTelemetry
+OTEL_SERVICE_NAME=llm-api # Service name for tracing
+OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # Jaeger endpoint
+MEDIA_RESOLVE_URL=http://kong:8000/media/v1/media/resolve # Default Media API resolver via Kong
+MEDIA_RESOLVE_TIMEOUT=5s # Media resolution timeout
+```
+
+> Override `MEDIA_RESOLVE_URL` only if you need to call the Media API directly (e.g., `http://media-api:8285/v1/media/resolve` inside Docker).
+
+## Main Endpoints
+
+### Chat Completions
+
+**POST** `/v1/chat/completions`
+
+OpenAI-compatible chat completion endpoint.
+
+```bash
+# Simple completion
+curl -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "jan-v1-4b",
+ "messages": [
+ {"role": "user", "content": "Hello!"}
+ ],
+ "temperature": 0.7,
+ "max_tokens": 100
+ }'
+
+# Streaming completion
+curl -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "jan-v1-4b",
+ "messages": [
+ {"role": "user", "content": "Hello!"}
+ ],
+ "stream": true
+ }'
+```
+
+**Request Parameters:**
+- `model` (required) - Model identifier (e.g., "jan-v1-4b")
+- `messages` (required) - Array of message objects
+ - `role` - "system", "user", or "assistant"
+ - `content` - Text content (string) or content array (for media)
+- `stream` (optional) - Enable streaming responses (default: false)
+- `temperature` (optional) - 0.0-2.0, controls randomness (default: 0.7)
+- `top_p` (optional) - 0.0-1.0, nucleus sampling (default: 1.0)
+- `max_tokens` (optional) - Maximum response length
+- `stop` (optional) - Stop sequences
+
+**Response:**
+```json
+{
+ "id": "chatcmpl-...",
+ "object": "chat.completion",
+ "created": 1699999999,
+ "model": "jan-v1-4b",
+ "choices": [
+ {
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "Hello! How can I help you today?"
+ },
+ "finish_reason": "stop"
+ }
+ ],
+ "usage": {
+ "prompt_tokens": 10,
+ "completion_tokens": 12,
+ "total_tokens": 22
+ }
+}
+```
+
+### Conversations
+
+**GET** `/v1/conversations`
+
+List all conversations for the authenticated user.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/conversations
+```
+
+**Query Parameters:**
+- `limit` (optional) - Number of conversations to return (default: 20)
+- `after` (optional) - Cursor for pagination
+- `order` (optional) - Sort order: "asc" or "desc" (default: "desc")
+
+**POST** `/v1/conversations`
+
+Create a new conversation.
+
+```bash
+curl -X POST -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "title": "My Conversation",
+ "project_id": "proj_123"
+ }' \
+ http://localhost:8000/v1/conversations
+```
+
+**GET** `/v1/conversations/{conv_public_id}`
+
+Get a specific conversation with its items.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/conversations/conv_123
+```
+
+**POST** `/v1/conversations/{conv_public_id}`
+
+Update a conversation (title, archived status).
+
+```bash
+curl -X POST -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{"title": "Updated Title"}' \
+ http://localhost:8000/v1/conversations/conv_123
+```
+
+**DELETE** `/v1/conversations/{conv_public_id}`
+
+Delete a conversation.
+
+```bash
+curl -X DELETE -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/conversations/conv_123
+```
+
+### Conversation Items (Messages)
+
+**GET** `/v1/conversations/{conv_public_id}/items`
+
+List all items (messages) in a conversation.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/conversations/conv_123/items
+```
+
+**POST** `/v1/conversations/{conv_public_id}/items`
+
+Add items (messages) to a conversation.
+
+```bash
+curl -X POST -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "items": [
+ {
+ "type": "message",
+ "role": "user",
+ "content": [
+ {"type": "input_text", "text": "Hello!"}
+ ]
+ }
+ ]
+ }' \
+ http://localhost:8000/v1/conversations/conv_123/items
+```
+
+**GET** `/v1/conversations/{conv_public_id}/items/{item_id}`
+
+Get a specific item from a conversation.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/conversations/conv_123/items/item_456
+```
+
+**DELETE** `/v1/conversations/{conv_public_id}/items/{item_id}`
+
+Delete an item from a conversation.
+
+```bash
+curl -X DELETE -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/conversations/conv_123/items/item_456
+```
+
+### Projects
+
+Projects help organize conversations into logical groups.
+
+**POST** `/v1/projects`
+
+Create a new project.
+
+```bash
+curl -X POST -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Marketing Campaign",
+ "instruction": "You are a marketing expert."
+ }' \
+ http://localhost:8000/v1/projects
+```
+
+**GET** `/v1/projects`
+
+List all projects for the authenticated user.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/projects
+```
+
+**Query Parameters:**
+- `limit` (optional) - Number of projects to return
+- `after` (optional) - Cursor for pagination
+- `order` (optional) - Sort order: "asc" or "desc"
+
+**GET** `/v1/projects/{project_id}`
+
+Get a specific project by ID.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/projects/proj_123
+```
+
+**PATCH** `/v1/projects/{project_id}`
+
+Update a project's name, instruction, or archived status.
+
+```bash
+curl -X PATCH -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Updated Project Name",
+ "instruction": "New instruction text",
+ "archived": false
+ }' \
+ http://localhost:8000/v1/projects/proj_123
+```
+
+**DELETE** `/v1/projects/{project_id}`
+
+Soft-delete a project.
+
+```bash
+curl -X DELETE -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/projects/proj_123
+```
+
+### Models
+
+**GET** `/v1/models`
+
+List all available models.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/models
+```
+
+**GET** `/v1/models/catalogs/{model_public_id}`
+
+Get details for a specific model from the catalog.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/models/catalogs/jan-v1-4b
+```
+
+**GET** `/v1/models/providers`
+
+List all available model providers.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/models/providers
+```
+
+### Health Checks
+
+**GET** `/v1/healthz`
+
+Basic health check endpoint.
+
+```bash
+curl http://localhost:8080/v1/healthz
+```
+
+**GET** `/v1/readyz`
+
+Readiness check endpoint (service ready to accept traffic).
+
+```bash
+curl http://localhost:8080/v1/readyz
+```
+
+**GET** `/v1/version`
+
+Get API version and build information.
+
+```bash
+curl http://localhost:8080/v1/version
+```
+
+## With Media (Visual Input)
+
+Reference media using `jan_*` IDs from the Media API:
+
+```bash
+curl -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "gpt-4o-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "What is this?"},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0"
+ }
+ }
+ ]
+ }
+ ]
+ }'
+```
+
+Use any vision-capable model you have configured (for local-only setups, point `jan-cli` at a remote provider such as OpenAI, Anthropic, or Qwen VL).
+
+## Related Services
+
+- **Response API** (Port 8082) - Multi-step orchestration using this service
+- **Media API** (Port 8285) - Media resolution for `jan_*` IDs
+- **MCP Tools** (Port 8091) - Tool integration for LLM responses
+- **Kong Gateway** (Port 8000) - API routing and load balancing
+
+## Error Handling
+
+Common HTTP status codes:
+
+| Code | Meaning |
+|------|---------|
+| 200 | Success |
+| 400 | Invalid request parameters |
+| 401 | Unauthorized (invalid/expired token) |
+| 403 | Forbidden (insufficient permissions) |
+| 404 | Resource not found |
+| 429 | Rate limited |
+| 500 | Server error |
+
+Example error response:
+```json
+{
+ "error": {
+ "message": "Invalid model specified",
+ "type": "invalid_request_error",
+ "code": "invalid_model"
+ }
+}
+```
+
+## Rate Limiting
+
+Requests routed through Kong inherit its rate-limiting plugin:
+- Default (development): 100 requests per minute **per client IP** (`kong/kong-dev-full.yml`)
+- Headers: `X-RateLimit-Limit-minute`, `X-RateLimit-Remaining-minute`
+- Exceeding the limit returns HTTP 429
+
+Calling the service directly on port 8080 bypasses the gateway rate limiter (useful for internal health checks).
+
+## See Also
+
+- [Architecture Overview](../../architecture/)
+- [Development Guide](../../guides/development.md)
+- [Testing Guide](../../guides/testing.md)
+- [Monitoring Guide](../../guides/monitoring.md)
diff --git a/docs/api/llm-api/examples.md b/docs/api/llm-api/examples.md
new file mode 100644
index 00000000..de1aa81f
--- /dev/null
+++ b/docs/api/llm-api/examples.md
@@ -0,0 +1,306 @@
+# LLM API Examples
+
+All examples assume `make up-full` is running locally so that Kong Gateway is available at `http://localhost:8000`.
+
+## Prerequisites
+1. Create `.env` from `.env.template` and run `make setup`.
+2. Start the stack: `make up-full`.
+3. Grab a guest token through the Kong gateway:
+ ```bash
+ ACCESS_TOKEN=$(curl -s -X POST http://localhost:8000/llm/auth/guest-login | jq -r '.access_token')
+ export ACCESS_TOKEN
+ ```
+
+All `/v1/*` requests are routed through Kong, which validates Keycloak JWTs or the custom API key plugin (`X-API-Key: sk_*`) before forwarding to the LLM API.
+
+## 1. Basic Chat Completion (cURL)
+```bash
+curl -s -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer $ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "jan-v1-4b",
+ "messages": [
+ {"role": "user", "content": "Give me a fun fact about Saturn."}
+ ]
+ }' | jq
+```
+
+## 2. Streaming Response (cURL)
+```bash
+curl -N -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer $ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "jan-v1-4b",
+ "messages": [
+ {"role": "user", "content": "Explain transformers in two sentences."}
+ ],
+ "stream": true
+ }'
+```
+
+## 3. Conversation Management
+```bash
+# Create a conversation
+curl -s -X POST http://localhost:8000/v1/conversations \
+ -H "Authorization: Bearer $ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{"title":"Docs Demo"}' | jq
+
+# List conversations
+curl -s http://localhost:8000/v1/conversations \
+ -H "Authorization: Bearer $ACCESS_TOKEN" | jq
+```
+
+## 4. Python (openai>=1.0)
+```python
+from openai import OpenAI
+
+client = OpenAI(
+ base_url="http://localhost:8000/v1",
+ api_key="YOUR_GUEST_TOKEN"
+)
+
+response = client.chat.completions.create(
+ model="jan-v1-4b",
+ messages=[{"role": "user", "content": "List three cities in France."}]
+)
+
+print(response.choices[0].message.content)
+```
+
+## 5. JavaScript (openai@4)
+```javascript
+import OpenAI from "openai";
+
+const client = new OpenAI({
+ baseURL: "http://localhost:8000/v1",
+ apiKey: process.env.ACCESS_TOKEN,
+});
+
+const response = await client.chat.completions.create({
+ model: "jan-v1-4b",
+ messages: [{ role: "user", content: "What is the Jan Server stack?" }],
+});
+
+console.log(response.choices[0].message.content);
+```
+
+## 6. With Media (jan_* ID)
+```bash
+curl -s -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer $ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "gpt-4o-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "Describe this image"},
+ {"type": "image_url", "image_url": {"url": "jan_01hr0..." }}
+ ]
+ }
+ ]
+ }'
+```
+Replace `jan_01hr0...` with a real `jan_*` ID from Media API.
+
+Use whichever vision-capable model you configured (for example, `gpt-4o-mini` on OpenAI or another provider added via the admin catalog).
+
+## 7. Projects Management
+```bash
+# Create a project
+curl -s -X POST http://localhost:8000/v1/projects \
+ -H "Authorization: Bearer $ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Marketing Campaign",
+ "instruction": "You are a marketing expert."
+ }' | jq
+
+# List all projects
+curl -s http://localhost:8000/v1/projects \
+ -H "Authorization: Bearer $ACCESS_TOKEN" | jq
+
+# Get a specific project
+PROJECT_ID="proj_123"
+curl -s http://localhost:8000/v1/projects/$PROJECT_ID \
+ -H "Authorization: Bearer $ACCESS_TOKEN" | jq
+
+# Update project
+curl -s -X PATCH http://localhost:8000/v1/projects/$PROJECT_ID \
+ -H "Authorization: Bearer $ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Updated Project Name",
+ "archived": false
+ }' | jq
+
+# Create conversation in project
+curl -s -X POST http://localhost:8000/v1/conversations \
+ -H "Authorization: Bearer $ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "title": "Project Conversation",
+ "project_id": "'$PROJECT_ID'"
+ }' | jq
+```
+
+## 8. API Key Management
+```bash
+# Create an API key
+curl -s -X POST http://localhost:8000/llm/auth/api-keys \
+ -H "Authorization: Bearer $ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "Production Key",
+ "scopes": ["read", "write"]
+ }' | jq
+
+# Save the returned API key (only shown once)
+API_KEY="sk_test_..."
+
+# Use API key instead of Bearer token
+curl -s http://localhost:8000/v1/models \
+ -H "X-API-Key: $API_KEY" | jq
+
+# List all API keys
+curl -s http://localhost:8000/llm/auth/api-keys \
+ -H "Authorization: Bearer $ACCESS_TOKEN" | jq
+
+# Delete an API key
+KEY_ID="key_123"
+curl -s -X DELETE http://localhost:8000/llm/auth/api-keys/$KEY_ID \
+ -H "Authorization: Bearer $ACCESS_TOKEN" | jq
+```
+
+## 9. Admin - Provider Management
+```bash
+# Requires admin token
+ADMIN_TOKEN="your_admin_token"
+
+# List all providers
+curl -s http://localhost:8000/v1/admin/providers \
+ -H "Authorization: Bearer $ADMIN_TOKEN" | jq
+
+# Register a new provider
+curl -s -X POST http://localhost:8000/v1/admin/providers \
+ -H "Authorization: Bearer $ADMIN_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "name": "OpenAI",
+ "base_url": "https://api.openai.com",
+ "api_key": "sk-..."
+ }' | jq
+
+# Update provider
+PROVIDER_ID="prov_123"
+curl -s -X PATCH http://localhost:8000/v1/admin/providers/$PROVIDER_ID \
+ -H "Authorization: Bearer $ADMIN_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{"enabled": true}' | jq
+```
+
+## 10. Admin - Model Catalog Management
+```bash
+# List all catalog models
+curl -s http://localhost:8000/v1/admin/models/catalogs \
+ -H "Authorization: Bearer $ADMIN_TOKEN" | jq
+
+# Get specific model
+curl -s http://localhost:8000/v1/admin/models/catalogs/jan-v1-4b \
+ -H "Authorization: Bearer $ADMIN_TOKEN" | jq
+
+# Update model catalog
+curl -s -X PATCH http://localhost:8000/v1/admin/models/catalogs/jan-v1-4b \
+ -H "Authorization: Bearer $ADMIN_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{"enabled": true, "featured": true}' | jq
+
+# Bulk toggle models
+curl -s -X POST http://localhost:8000/v1/admin/models/catalogs/bulk-toggle \
+ -H "Authorization: Bearer $ADMIN_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model_ids": ["jan-v1-4b", "gpt-4"],
+ "enabled": true
+ }' | jq
+```
+
+## 11. Conversation Items (Messages)
+```bash
+# Add items to conversation
+CONV_ID="conv_123"
+curl -s -X POST http://localhost:8000/v1/conversations/$CONV_ID/items \
+ -H "Authorization: Bearer $ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "items": [
+ {
+ "type": "message",
+ "role": "user",
+ "content": [
+ {"type": "input_text", "text": "What is AI?"}
+ ]
+ }
+ ]
+ }' | jq
+
+# List conversation items
+curl -s http://localhost:8000/v1/conversations/$CONV_ID/items \
+ -H "Authorization: Bearer $ACCESS_TOKEN" | jq
+
+# Get specific item
+ITEM_ID="item_456"
+curl -s http://localhost:8000/v1/conversations/$CONV_ID/items/$ITEM_ID \
+ -H "Authorization: Bearer $ACCESS_TOKEN" | jq
+
+# Delete item
+curl -s -X DELETE http://localhost:8000/v1/conversations/$CONV_ID/items/$ITEM_ID \
+ -H "Authorization: Bearer $ACCESS_TOKEN" | jq
+```
+
+## 12. Python - Projects and Conversations
+```python
+import requests
+
+BASE_URL = "http://localhost:8000"
+headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
+
+# Create project
+project_resp = requests.post(
+ f"{BASE_URL}/v1/projects",
+ headers=headers,
+ json={"name": "AI Research", "instruction": "Focus on recent developments"}
+)
+project_id = project_resp.json()["id"]
+
+# Create conversation in project
+conv_resp = requests.post(
+ f"{BASE_URL}/v1/conversations",
+ headers=headers,
+ json={"title": "GPT-4 Discussion", "project_id": project_id}
+)
+conv_id = conv_resp.json()["id"]
+
+# Add message to conversation
+requests.post(
+ f"{BASE_URL}/v1/conversations/{conv_id}/items",
+ headers=headers,
+ json={
+ "items": [{
+ "type": "message",
+ "role": "user",
+ "content": [{"type": "input_text", "text": "Explain GPT-4"}]
+ }]
+ }
+)
+
+# Get conversation with messages
+conv = requests.get(f"{BASE_URL}/v1/conversations/{conv_id}", headers=headers)
+print(conv.json())
+```
+
+Use these snippets as templates for SDK integrations and tests.
diff --git a/docs/api/llm-api/user-settings-api.md b/docs/api/llm-api/user-settings-api.md
new file mode 100644
index 00000000..76996726
--- /dev/null
+++ b/docs/api/llm-api/user-settings-api.md
@@ -0,0 +1,534 @@
+# User Settings API
+
+## Overview
+
+The User Settings API allows users to control memory features, profile information, advanced features, and other personalization options. Settings are organized into logical groups using JSONB for flexibility.
+
+Prompt orchestration uses profile settings to shape responses: `base_style` drives tone, `custom_instructions` are injected as system guidance, and `nick_name`/`occupation`/`more_about_you` are provided as user context.
+
+## Endpoints
+
+### Get User Settings
+
+**GET** `/v1/users/me/settings`
+
+Retrieves the current user's settings. If no settings exist, returns defaults.
+
+**Headers:**
+- `Authorization: Bearer <access_token>` (required)
+
+**Response:** `200 OK`
+```json
+{
+  "id": 1,
+  "user_id": 123,
+  "memory_config": {
+    "enabled": true,
+    "observe_enabled": true,
+    "inject_user_core": true,
+    "inject_semantic": true,
+    "inject_episodic": false,
+    "max_user_items": 3,
+    "max_project_items": 5,
+    "max_episodic_items": 3,
+    "min_similarity": 0.75
+  },
+  "profile_settings": {
+    "base_style": "Friendly",
+    "custom_instructions": "",
+    "nick_name": "",
+    "occupation": "",
+    "more_about_you": ""
+  },
+  "advanced_settings": {
+    "web_search": false,
+    "code_enabled": false
+  },
+  "enable_trace": false,
+  "enable_tools": true,
+  "preferences": {},
+  "created_at": "2025-11-24T10:00:00Z",
+  "updated_at": "2025-11-24T10:00:00Z"
+}
+```
+
+---
+
+### Update User Settings
+
+**PATCH** `/v1/users/me/settings`
+
+Updates user settings. Only provided fields are updated (partial update). You can update any combination of settings groups.
+
+Profile personalization now includes:
+- `base_style` enum (`Concise`, `Friendly`, `Professional`)
+- Text fields: `custom_instructions`, `nick_name`, `occupation`, `more_about_you`
+The API accepts the legacy `profile_settings.nickname` on input but responses always return `nick_name`.
+
+**Headers:**
+- `Authorization: Bearer <access_token>` (required)
+- `Content-Type: application/json`
+
+**Request Body:**
+```json
+{
+  "memory_config": {
+    "enabled": true,
+    "observe_enabled": true,
+    "max_user_items": 5
+  },
+  "profile_settings": {
+    "base_style": "Professional",
+    "nick_name": "Dev",
+    "occupation": "Software Engineer"
+  },
+  "advanced_settings": {
+    "web_search": true
+  },
+  "enable_trace": false,
+  "enable_tools": true
+}
+```
+
+**Response:** `200 OK`
+```json
+{
+  "id": 1,
+  "user_id": 123,
+  "memory_config": {
+    "enabled": true,
+    "observe_enabled": true,
+    "inject_user_core": true,
+    "inject_semantic": true,
+    "inject_episodic": false,
+    "max_user_items": 5,
+    "max_project_items": 5,
+    "max_episodic_items": 3,
+    "min_similarity": 0.75
+  },
+  "profile_settings": {
+    "base_style": "Professional",
+    "custom_instructions": "",
+    "nick_name": "Dev",
+    "occupation": "Software Engineer",
+    "more_about_you": ""
+  },
+  "advanced_settings": {
+    "web_search": true,
+    "code_enabled": false
+  },
+  "enable_trace": false,
+  "enable_tools": true,
+  "preferences": {},
+  "created_at": "2025-11-24T10:00:00Z",
+  "updated_at": "2025-11-24T12:30:00Z"
+}
+```
+
+---
+
+## Field Descriptions
+
+### Memory Configuration (`memory_config`)
+
+All memory-related settings are grouped in the `memory_config` JSONB object:
+
+| Field | Type | Default | Range | Description |
+|-------|------|---------|-------|-------------|
+| `enabled` | boolean | `true` | - | Master toggle for all memory features (observation and retrieval) |
+| `observe_enabled` | boolean | `true` | - | Automatically observe and learn from conversations |
+| `inject_user_core` | boolean | `true` | - | Include user core facts in memory injection |
+| `inject_semantic` | boolean | `true` | - | Include semantic project facts in memory injection |
+| `inject_episodic` | boolean | `false` | - | Include episodic conversation history in memory injection |
+| `max_user_items` | integer | `3` | 0-20 | Maximum user memory items to retrieve |
+| `max_project_items` | integer | `5` | 0-50 | Maximum project facts to retrieve |
+| `max_episodic_items` | integer | `3` | 0-20 | Maximum episodic events to retrieve |
+| `min_similarity` | float | `0.75` | 0.0-1.0 | Minimum relevance score for memory retrieval |
+
+**Note:** Memory injection is controlled by the application-level `PROMPT_ORCHESTRATION_MEMORY` config. The inject flags above control which types of memory are included when injection is enabled.
+
+### Profile Settings (`profile_settings`)
+
+User profile information stored in the `profile_settings` JSONB object:
+
+| Field | Type | Default | Values | Description |
+|-------|------|---------|--------|-------------|
+| `base_style` | enum | `"Friendly"` | `"Concise"`, `"Friendly"`, `"Professional"` | Conversation style preference |
+| `custom_instructions` | string | `""` | - | Additional behavior, style, and tone preferences for the AI |
+| `nick_name` | string | `""` | - | What should Jan call you? (alias: `nickname` accepted on input) |
+| `occupation` | string | `""` | - | Your occupation or role |
+| `more_about_you` | string | `""` | - | Additional information about yourself |
+
+**Base Style Options:**
+- **Concise**: Short, direct responses focused on efficiency
+- **Friendly**: Warm, conversational tone with more personality
+- **Professional**: Formal, business-appropriate communication
+
+### Advanced Settings (`advanced_settings`)
+
+Advanced feature toggles stored in the `advanced_settings` JSONB object:
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `web_search` | boolean | `false` | Let Jan automatically search the web for answers (privacy consideration) |
+| `code_enabled` | boolean | `false` | Enable code execution features (security consideration) |
+
+### Other Top-Level Settings
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enable_trace` | boolean | `false` | Enable OpenTelemetry tracing for requests (debugging) |
+| `enable_tools` | boolean | `true` | Enable MCP tools and function calling |
+| `preferences` | object | `{}` | Flexible JSON for future extensions |
+
+---
+
+## Memory Architecture
+
+### Three-Layer Control System
+
+Memory in Jan uses a three-layer control architecture:
+
+1. **Application Level** (`MEMORY_ENABLED` config)
+   - Enables memory-tools service integration
+   - When `false`, memory features are completely disabled
+   - When `true`, allows memory observation and retrieval
+
+2. **Prompt Orchestration** (`PROMPT_ORCHESTRATION_MEMORY` config)
+   - Controls whether loaded memory is injected into prompts
+   - When `false`, memory is loaded but not automatically added to prompts
+   - When `true`, memory is injected based on user preferences
+
+3. **User Level** (`memory_config.enabled` in user settings)
+   - User opt-in/out for memory features
+   - Controls both observation and retrieval for this specific user
+   - User injection preferences (`inject_user_core`, `inject_semantic`, `inject_episodic`) filter what types of memory are included
+
+### Memory Flow
+
+```
+User Request → Check MEMORY_ENABLED (app)
+             → Check memory_config.enabled (user)
+             → Load memory from memory-tools service
+             → Filter by user injection preferences
+             → Pass to Prompt Processor
+             → Prompt Processor checks PROMPT_ORCHESTRATION_MEMORY
+             → If enabled, inject filtered memory into prompt
+             → Send to LLM
+```
+
+### Memory Observation
+
+Memory observation (learning from conversations) occurs when:
+- `MEMORY_ENABLED` = true (application level)
+- `memory_config.enabled` = true (user level)
+- `memory_config.observe_enabled` = true (user level)
+- Response has `finish_reason` = "stop" (successful completion)
+
+---
+
+## Usage Examples
+
+### Example 1: Disable All Memory Features for a User
+
+```bash
+curl -X PATCH https://api.jan.ai/v1/users/me/settings \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "memory_config": {
+      "enabled": false
+    }
+  }'
+```
+
+### Example 2: Enable Memory with Custom Retrieval Settings
+
+```bash
+curl -X PATCH https://api.jan.ai/v1/users/me/settings \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "memory_config": {
+      "enabled": true,
+      "observe_enabled": true,
+      "max_user_items": 5,
+      "max_project_items": 10,
+      "min_similarity": 0.80
+    }
+  }'
+```
+
+### Example 3: Update Profile Settings
+
+```bash
+curl -X PATCH https://api.jan.ai/v1/users/me/settings \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "profile_settings": {
+      "base_style": "Professional",
+      "custom_instructions": "Please be concise and use code examples",
+      "nick_name": "Dev",
+      "occupation": "Full Stack Developer",
+      "more_about_you": "I work primarily with Go and TypeScript"
+    }
+  }'
+```
+
+### Example 4: Enable Advanced Features
+
+```bash
+curl -X PATCH https://api.jan.ai/v1/users/me/settings \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "advanced_settings": {
+      "web_search": true,
+      "code_enabled": true
+    }
+  }'
+```
+
+### Example 5: Update Multiple Settings Groups at Once
+
+```bash
+curl -X PATCH https://api.jan.ai/v1/users/me/settings \
+  -H "Authorization: Bearer $TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "memory_config": {
+      "enabled": true,
+      "inject_semantic": true,
+      "inject_episodic": false,
+      "max_user_items": 7
+    },
+    "profile_settings": {
+      "base_style": "Concise",
+      "nick_name": "Alex",
+      "occupation": "DevOps Engineer"
+    },
+    "advanced_settings": {
+      "web_search": true
+    },
+    "enable_tools": true,
+    "enable_trace": false
+  }'
+```
+
+---
+
+## Error Responses
+
+### 400 Bad Request
+Invalid request body or validation failure:
+```json
+{
+  "error": "invalid request body",
+  "message": "memory_config.max_user_items must be between 0 and 20"
+}
+```
+
+Validation rules:
+- `profile_settings.base_style`: Must be one of "Concise", "Friendly", or "Professional"
+- `memory_config.max_user_items`: 0-20
+- `memory_config.max_project_items`: 0-50
+- `memory_config.max_episodic_items`: 0-20
+- `memory_config.min_similarity`: 0.0-1.0
+
+### 401 Unauthorized
+Missing or invalid authentication:
+```json
+{
+  "error": "user not authenticated",
+  "message": "user not authenticated"
+}
+```
+
+### 500 Internal Server Error
+Server-side error:
+```json
+{
+  "error": "failed to update settings",
+  "message": "database error"
+}
+```
+
+---
+
+## Frontend Integration
+
+### Settings Page UI Components
+
+#### Profile Section
+```
+Custom Instructions
+[Text area for custom_instructions]
+What behaviors, style, or tone would you like Jan to follow?
+
+What should Jan call you?
+[Input field for nick_name]
+
+Your occupation
+[Input field for occupation]
+
+More about you
+[Text area for more_about_you]
+Tell Jan more about yourself to personalize responses
+```
+
+#### Memory Section
+```
+[ ] Enable Memory Features (memory_config.enabled)
+    ↳ Allows the system to observe and retrieve context from past conversations
+
+    [ ] Observe conversations (memory_config.observe_enabled)
+        ↳ Automatically learn facts from your conversations
+    
+    Memory Types to Include (when injection is enabled):
+    [ ] User Core Facts (memory_config.inject_user_core)
+        ↳ Your profile, preferences, and personal facts
+    [ ] Semantic/Project Facts (memory_config.inject_semantic)
+        ↳ Project-specific information and documentation
+    [ ] Episodic History (memory_config.inject_episodic)
+        ↳ Conversation history and past interactions
+
+    Retrieval Settings
+    Max user memories: [3] (0-20) (memory_config.max_user_items)
+    Max project facts: [5] (0-50) (memory_config.max_project_items)
+    Max episodic items: [3] (0-20) (memory_config.max_episodic_items)
+    Min relevance score: [0.75] (0.0-1.0) (memory_config.min_similarity)
+```
+
+#### Advanced Settings Section
+```
+[ ] Web Search (advanced_settings.web_search)
+    ↳ Let Jan automatically search the web for answers
+    ⚠️ Privacy consideration: May send queries to external services
+
+[ ] Code Execution (advanced_settings.code_enabled)
+    ↳ Enable code execution features
+    ⚠️ Security consideration: Allows execution of code
+```
+
+#### Tools & Developer Section
+```
+[ ] Enable MCP Tools (enable_tools)
+    ↳ Allows agents to use tools like web search, memory retrieval, code execution
+
+[ ] Enable Request Tracing (enable_trace)
+    ↳ Adds OpenTelemetry traces for debugging (may impact performance)
+```
+
+---
+
+## Migration Notes
+
+### Database Migration
+
+Run the migration to create the `user_settings` table with JSONB columns:
+
+```bash
+# From services/llm-api/
+make migrate-up
+
+# Or manually:
+migrate -path ./migrations -database "$DB_DSN" up
+```
+
+### JSONB Storage Structure
+
+The settings are stored in PostgreSQL with the following columns:
+
+**Scalar columns:**
+- `id` (SERIAL PRIMARY KEY)
+- `user_id` (INTEGER, foreign key to users table)
+- `enable_trace` (BOOLEAN, default: false)
+- `enable_tools` (BOOLEAN, default: true)
+- `created_at` (TIMESTAMPTZ)
+- `updated_at` (TIMESTAMPTZ)
+
+**JSONB columns:**
+- `memory_config` (JSONB) - All memory-related settings in one flexible JSON object
+- `profile_settings` (JSONB) - User profile information
+- `advanced_settings` (JSONB) - Advanced feature toggles
+- `preferences` (JSONB) - Legacy field for backward compatibility
+
+This JSONB approach provides:
+- **Flexibility**: Add new fields without schema migrations
+- **Organization**: Logical grouping of related settings
+- **Efficiency**: One database row per user instead of 15+ columns
+- **Partial Updates**: Update only specific settings groups via PATCH
+
+### Default Behavior After Migration
+
+- **New users**: Get default settings automatically on first GET request
+- **Existing users**: Settings created on first access with safe defaults
+- **Memory defaults**: Enabled with observation ON, injection preferences customizable
+- **Profile defaults**: Empty strings, ready for user input
+- **Advanced defaults**: Both OFF for security/privacy (user must opt-in)
+- **Backward compatible**: System works without settings (uses global defaults)
+
+### API Update Requirements
+
+**Breaking changes from old API:**
+- Individual fields like `memory_enabled`, `memory_auto_inject` are now nested in `memory_config`
+- New fields: `profile_settings` and `advanced_settings` objects
+- `memory_auto_inject` removed (injection controlled by application config + user injection flags)
+
+**Migration for API clients:**
+```javascript
+// Old API format
+{
+  "memory_enabled": true,
+  "memory_max_user_items": 5
+}
+
+// New API format
+{
+  "memory_config": {
+    "enabled": true,
+    "max_user_items": 5
+  }
+}
+```
+
+---
+
+## Summary
+
+The User Settings API provides comprehensive control over:
+
+1. **Memory Configuration** - Enable/disable memory, control observation, set retrieval limits, choose injection types
+2. **Profile Settings** - Custom instructions, nick_name, occupation, personal information
+3. **Advanced Settings** - Web search, code execution (opt-in for security/privacy)
+4. **Feature Toggles** - Tools, tracing, and other system features
+5. **Flexible Preferences** - Extensible JSON for future additions
+
+All settings support partial updates (PATCH), allowing clients to update only specific fields while preserving others. The JSONB storage provides flexibility for adding new settings without database migrations.
+
+## Related Documentation
+
+- [Memory System Documentation](../../../models/references/todos/memory-working-todo.md) - Complete memory system architecture and status
+- [Memory Improvement TODOs](../../../models/references/todos/memory-improvement-todo.md) - Implementation roadmap and progress
+- [MCP Tools Guide](../guides/mcp-testing.md) - MCP tools integration and testing
+- [Prompt Orchestration](../../guides/prompt-orchestration.md) - Memory injection and prompt processing
+
+---
+
+## Future Extensions
+
+The `preferences` JSON field allows for future settings without schema changes:
+
+```json
+{
+  "preferences": {
+    "ui_theme": "dark",
+    "language": "vi",
+    "notification_email": "user@example.com",
+    "custom_system_prompt": "You are a helpful assistant..."
+  }
+}
+```
+
+New boolean flags or structured settings can be added to the main schema as needed.
diff --git a/docs/api/mcp-tools/README.md b/docs/api/mcp-tools/README.md
new file mode 100644
index 00000000..81ee4891
--- /dev/null
+++ b/docs/api/mcp-tools/README.md
@@ -0,0 +1,346 @@
+# MCP Tools API Documentation
+
+The MCP Tools API provides AI tools for web search, web scraping, and code execution.
+
+## Quick Start
+
+### URLs
+- **Direct access**: http://localhost:8091
+- **Through gateway**: http://localhost:8000/v1/mcp (Kong also exposes `/mcp/*` and forwards to `/v1/...`)
+- **Inside Docker**: http://mcp-tools:8091
+
+## Available Tools
+- **google_search** - Serper/SearXNG-backed web search with optional filters and location hints
+- **scrape** - Fetch and parse a web page, optionally returning Markdown
+- **file_search_index** / **file_search_query** - Lightweight vector store to index custom text and run similarity queries
+- **python_exec** - Execute trusted code through SandboxFusion (optional approval flag)
+- **External providers** - Additional tools declared in [`services/mcp-tools/mcp-providers.md`](../../services/mcp-tools/mcp-providers.md) are loaded automatically
+
+## How It Works
+
+All tools use JSON-RPC 2.0 protocol. You send a request with tool name and parameters, get back results.
+
+## Service Ports & Configuration
+
+| Component | Port | Key Environment Variables |
+|-----------|------|--------------------------|
+| **HTTP Server** | 8091 | `MCP_TOOLS_HTTP_PORT` |
+| **Search Providers** | 443 | `SERPER_API_KEY`, `MCP_SEARCH_ENGINE`, `SEARXNG_URL` |
+| **Vector Store** | 3015 | `VECTOR_STORE_URL` |
+| **SandboxFusion** | 8080 | `SANDBOXFUSION_URL`, `MCP_SANDBOX_REQUIRE_APPROVAL` |
+
+### Required Environment Variables
+
+```bash
+MCP_TOOLS_HTTP_PORT=8091
+SERPER_API_KEY=your_serper_api_key
+MCP_SEARCH_ENGINE=serper             # serper | searxng | offline
+SEARXNG_URL=http://searxng:8080      # used when MCP_SEARCH_ENGINE=searxng
+VECTOR_STORE_URL=http://vector-store-mcp:3015
+SANDBOXFUSION_URL=http://sandbox-fusion:8080
+OTEL_ENABLED=false
+
+# Auth (optional)
+AUTH_ENABLED=true
+AUTH_ISSUER=http://localhost:8085/realms/jan
+ACCOUNT=account
+AUTH_JWKS_URL=http://keycloak:8085/realms/jan/protocol/openid-connect/certs
+```
+
+### Optional Configuration
+
+```bash
+MCP_TOOLS_LOG_LEVEL=info
+MCP_TOOLS_LOG_FORMAT=json          # json | console
+SANDBOXFUSION_TIMEOUT=30s
+SERPER_DOMAIN_FILTER=example.com,another.com
+SERPER_LOCATION_HINT=California, United States
+SERPER_OFFLINE_MODE=false
+MCP_SANDBOX_REQUIRE_APPROVAL=true  # force clients to set `approved: true`
+```
+
+## JSON-RPC 2.0 Protocol
+
+All tool calls use JSON-RPC 2.0 format.
+
+### Request Format
+
+```json
+{
+ "jsonrpc": "2.0",
+ "id": 1,
+ "method": "tools/call",
+ "params": {
+ "name": "tool_name",
+ "arguments": {
+ "arg1": "value1",
+ "arg2": "value2"
+ }
+ }
+}
+```
+
+### Response Format
+
+```json
+{
+ "jsonrpc": "2.0",
+ "id": 1,
+ "result": {
+ "content": "Tool output",
+ "is_error": false
+ }
+}
+```
+
+### Error Response
+
+```json
+{
+ "jsonrpc": "2.0",
+ "id": 1,
+ "error": {
+ "code": -32603,
+ "message": "Internal error",
+ "data": "Tool execution failed"
+ }
+}
+```
+
+## MCP Endpoint (`POST /v1/mcp`)
+
+All JSON-RPC requests (initialize, tools/list, tools/call, prompts/list, etc.) go through a single streaming endpoint. When calling through Kong, use `http://localhost:8000/v1/mcp`; direct calls go to `http://localhost:8091/v1/mcp`.
+
+```bash
+curl -N http://localhost:8000/v1/mcp \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "id": 1,
+ "method": "tools/list"
+ }'
+```
+
+Because the service uses `mcp-go`'s streaming HTTP server, responses are sent as Server-Sent Events (SSE). For simple calls you can omit `-N`, but streaming keeps the connection open for multi-part results (tool deltas, long-running sandbox jobs, etc.).
+
+**Response:**
+```json
+{
+ "tools": [
+ {
+ "name": "google_search",
+ "description": "Search Google for query results",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "q": {"type": "string", "description": "Search query"},
+ "num": {"type": "integer", "description": "Number of results", "default": 10}
+ },
+ "required": ["q"]
+ }
+ },
+ {
+ "name": "scrape",
+ "description": "Extract content from a URL",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "url": {"type": "string", "description": "URL to scrape"}
+ },
+ "required": ["url"]
+ }
+ },
+ {
+ "name": "python_exec",
+ "description": "Execute code in a sandboxed environment",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "code": {"type": "string", "description": "Code to execute"},
+ "language": {"type": "string", "enum": ["python", "javascript"], "default": "python"}
+ },
+ "required": ["code"]
+ }
+ }
+ ]
+}
+```
+
+### Health Check
+
+**GET** `/healthz`
+
+```bash
+# Via gateway
+curl http://localhost:8000/mcp/healthz
+
+# Direct
+curl http://localhost:8091/healthz
+```
+
+## Integration with Response API
+
+The Response API uses MCP Tools for multi-step orchestration:
+
+```bash
+# Response API automatically calls MCP tools
+curl -X POST http://localhost:8000/responses/v1/responses \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "gpt-4o-mini",
+ "input": "Search for Python async programming and summarize top 3 results",
+ "stream": true
+ }'
+
+# Response API orchestrates:
+# 1. Call google_search
+# 2. Optionally call scrape/file_search_query
+# 3. Stream deltas as the LLM produces the final response
+```
+
+## Tool Chaining (via Response API)
+
+The Response API enables tool chaining:
+
+```
+google_search 
+ v
+scrape (on each result)
+ v
+python_exec (if needed for analysis)
+ v
+LLM API (final generation)
+```
+
+**Max Depth**: 8 tool calls
+**Timeout per Tool**: 45 seconds
+
+## Error Codes
+
+| Code | Message | Meaning |
+|------|---------|---------|
+| -32700 | Parse error | Invalid JSON |
+| -32600 | Invalid Request | Missing method/params |
+| -32601 | Method not found | Unknown tool |
+| -32602 | Invalid params | Invalid parameters |
+| -32603 | Internal error | Tool execution failed |
+| -32000 | Timeout | Tool execution timeout |
+
+## Related Services
+
+- **Response API** (Port 8082) - Tool orchestration
+- **LLM API** (Port 8080) - Final generation
+- **Kong Gateway** (Port 8000) - API routing
+- **SandboxFusion** - Code execution sandbox
+- **Serper / SearXNG** - Web search providers
+- **Provider Configuration**: [services/mcp-tools/mcp-providers.md](../../services/mcp-tools/mcp-providers.md)
+
+## See Also
+
+- [Response API Documentation](../response-api/)
+- [LLM API Documentation](../llm-api/)
+- [Architecture Overview](../../architecture/)
+- [Response API Documentation](../response-api/)
+- [LLM API Documentation](../llm-api/)
+- [Architecture Overview](../../architecture/)
+- [Provider Configuration](../../services/mcp-tools/mcp-providers.md)
+### Example: List Available Tools
+
+```bash
+curl -s http://localhost:8000/v1/mcp \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "id": 42,
+ "method": "tools/list"
+ }' | jq
+```
+
+### Example: `google_search`
+
+```bash
+curl -s http://localhost:8000/v1/mcp \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "id": 2,
+ "method": "tools/call",
+ "params": {
+ "name": "google_search",
+ "arguments": {"q": "latest AI news", "num": 5}
+ }
+ }'
+```
+
+### Example: `scrape`
+
+```bash
+curl -s http://localhost:8000/v1/mcp \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "id": 3,
+ "method": "tools/call",
+ "params": {
+ "name": "scrape",
+ "arguments": {"url": "https://docs.python.org/3/", "includeMarkdown": true}
+ }
+ }'
+```
+
+### Example: Vector Store
+
+```bash
+# Index a note
+curl -s http://localhost:8000/v1/mcp \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "id": 4,
+ "method": "tools/call",
+ "params": {
+ "name": "file_search_index",
+ "arguments": {
+   "document_id": "notes-1",
+   "text": "Menlo Platform docs live in jan-server/docs/*",
+   "tags": ["docs","menlo"]
+ }
+ }
+ }'
+
+# Query it later
+curl -s http://localhost:8000/v1/mcp \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "id": 5,
+ "method": "tools/call",
+ "params": {
+ "name": "file_search_query",
+ }
+ }'
+```
+
+### Example: Sandbox (`python_exec`)
+
+```bash
+curl -s http://localhost:8000/v1/mcp \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "id": 6,
+ "method": "tools/call",
+ "params": {
+ "name": "python_exec",
+ "arguments": {"code": "import math; print(math.pi)"}
+ }
+ }'
+```
diff --git a/docs/api/media-api/README.md b/docs/api/media-api/README.md
new file mode 100644
index 00000000..d0a54afe
--- /dev/null
+++ b/docs/api/media-api/README.md
@@ -0,0 +1,392 @@
+# Media API Documentation
+
+The Media API handles image uploads and storage.
+
+## Quick Start
+
+### URLs
+- **Direct access**: http://localhost:8285
+- **Through gateway**: http://localhost:8000/media (Kong prefixes `/media` before forwarding)
+- **Inside Docker**: http://media-api:8285
+
+## What You Can Do
+
+- **Upload images** - From URLs or base64 data
+- **Get jan_* IDs** - Unique identifiers for each image
+- **Generate download links** - Temporary URLs that expire after 5 minutes
+- **Prevent duplicates** - Same image uploaded twice gets same ID
+- **Store in S3** - Images saved to cloud storage
+
+## Service Ports & Configuration
+
+| Component | Port | Key Environment Variables |
+|-----------|------|--------------------------|
+| **HTTP Server** | 8285 | `MEDIA_API_PORT` |
+| **Database (PostgreSQL)** | 5432 | `DB_POSTGRESQL_WRITE_DSN`, `DB_POSTGRESQL_READ1_DSN` (optional replica) |
+| **Object Storage (S3-compatible)** | 443 | `MEDIA_STORAGE_BACKEND` (`s3` or `local`), `MEDIA_S3_ENDPOINT`, `MEDIA_S3_BUCKET`, `MEDIA_S3_ACCESS_KEY_ID`, `MEDIA_S3_SECRET_ACCESS_KEY` |
+
+### Required Environment Variables
+
+```bash
+# Core service + database
+MEDIA_API_PORT=8285
+DB_POSTGRESQL_WRITE_DSN=postgres://media:password@api-db:5432/media_api?sslmode=disable
+# Optional read replica
+DB_POSTGRESQL_READ1_DSN=postgres://media_ro:password@api-db-ro:5432/media_api?sslmode=disable
+
+# Auth (enable when fronted by Kong)
+AUTH_ENABLED=true
+AUTH_ISSUER=http://localhost:8085/realms/jan
+ACCOUNT=account
+AUTH_JWKS_URL=http://keycloak:8085/realms/jan/protocol/openid-connect/certs
+
+# Storage backend selection
+MEDIA_STORAGE_BACKEND=s3    # or "local"
+
+# S3 configuration (required when MEDIA_STORAGE_BACKEND=s3)
+MEDIA_S3_BUCKET=platform-dev
+MEDIA_S3_REGION=us-west-2
+MEDIA_S3_ENDPOINT=https://s3.menlo.ai
+MEDIA_S3_ACCESS_KEY_ID=XXXXX
+MEDIA_S3_SECRET_ACCESS_KEY=YYYYY
+MEDIA_S3_USE_PATH_STYLE=true
+```
+
+### Optional Configuration
+
+```bash
+# Public endpoint for download links (falls back to MEDIA_S3_ENDPOINT when empty)
+MEDIA_S3_PUBLIC_ENDPOINT=https://cdn.example.com
+# Presigned URL lifetime
+MEDIA_S3_PRESIGN_TTL=5m
+# Upload limits + retention
+MEDIA_MAX_BYTES=20971520      # 20 MB
+MEDIA_RETENTION_DAYS=30
+MEDIA_REMOTE_FETCH_TIMEOUT=15s
+# Download behavior
+MEDIA_PROXY_DOWNLOAD=true     # stream bytes through the API instead of redirecting
+
+# Local filesystem backend overrides (when MEDIA_STORAGE_BACKEND=local)
+MEDIA_LOCAL_STORAGE_PATH=./media-data
+MEDIA_LOCAL_STORAGE_BASE_URL=http://localhost:8285/v1/files
+```
+
+## Authentication
+
+When accessed through Kong (`http://localhost:8000/media/...`) every request must include either:
+- `Authorization: Bearer <token>` (Keycloak-issued JWT, guest tokens work for dev)
+- `X-API-Key: sk_*` (custom plugin)
+
+Direct calls to port 8285 still honor JWT validation when `AUTH_ENABLED=true` on the service. Use the gateway whenever possible so rate-limiting/cors policies apply consistently.
+
+## Main Endpoints
+
+### Upload Media
+
+**POST** `/v1/media`
+
+Upload media from a remote URL or base64 data. Examples below go through Kong (recommended); replace the host with `http://localhost:8285` if you need to hit the service directly.
+
+```bash
+# Upload from remote URL
+curl -X POST http://localhost:8000/media/v1/media \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "source": {
+ "type": "remote_url",
+ "url": "https://example.com/image.jpg"
+ },
+ "user_id": "user123"
+ }'
+
+# Upload from data URL (base64 image)
+curl -X POST http://localhost:8000/media/v1/media \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "source": {
+ "type": "data_url",
+ "data_url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."
+ },
+ "user_id": "user123"
+ }'
+```
+
+**Response:**
+```json
+{
+ "id": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+ "mime": "image/jpeg",
+ "bytes": 45678,
+ "deduped": false,
+ "presigned_url": "https://s3.menlo.ai/platform-dev/images/jan_...?X-Amz-Signature=..."
+}
+```
+
+### Prepare Upload (Presigned URL)
+
+**POST** `/v1/media/prepare-upload`
+
+Get a presigned URL for client-side S3 upload.
+
+```bash
+curl -X POST http://localhost:8000/media/v1/media/prepare-upload \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "content_type": "image/jpeg",
+ "user_id": "user123"
+ }'
+```
+
+### Direct Upload (Local Storage Only)
+
+If `MEDIA_STORAGE_BACKEND=local`, presigned uploads are disabled. Use the multipart endpoint instead:
+
+```bash
+curl -X POST http://localhost:8000/media/v1/media/upload \
+ -H "Authorization: Bearer <token>" \
+ -F "file=@/path/to/image.png" \
+ -F "user_id=user123"
+```
+
+The service converts the upload to a data URL and stores it on disk (`MEDIA_LOCAL_STORAGE_PATH`).
+
+**Response:**
+```json
+{
+ "jan_id": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+ "presigned_url": "https://s3.menlo.ai/platform-dev/images/jan_...?X-Amz-Signature=...",
+ "presigned_post": {
+ "url": "https://s3.menlo.ai",
+ "fields": {
+ "key": "images/jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+ "policy": "...",
+ "x-amz-signature": "...",
+ "x-amz-date": "..."
+ }
+ }
+}
+```
+
+### Resolve Media IDs
+
+**POST** `/v1/media/resolve`
+
+Resolve `jan_*` IDs to presigned URLs.
+
+```bash
+curl -X POST http://localhost:8000/media/v1/media/resolve \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "ids": [
+ "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+ "jan_01hqr8v9k2x3f4g5h6j7k8m9n1"
+ ]
+ }'
+```
+
+**Response:**
+```json
+{
+ "media": [
+ {
+ "id": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+ "presigned_url": "https://s3.menlo.ai/platform-dev/images/jan_...?X-Amz-Signature=...",
+ "expires_at": "2025-11-10T10:35:00Z"
+ }
+ ]
+}
+```
+
+### Get Media
+
+**GET** `/v1/media/{id}`
+
+Retrieve media metadata and presigned URL.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/media/v1/media/jan_01hqr8v9k2x3f4g5h6j7k8m9n0
+```
+
+**Response:**
+```json
+{
+ "id": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+ "mime": "image/jpeg",
+ "bytes": 45678,
+ "created_at": "2025-11-10T10:30:00Z",
+ "presigned_url": "https://s3.menlo.ai/...",
+ "expires_at": "2025-11-10T10:35:00Z"
+}
+```
+
+### Get Presigned URL
+
+**GET** `/v1/media/{id}/presign`
+
+Get a temporary signed URL for downloading media by jan_id. This is the dedicated endpoint for obtaining presigned URLs without additional metadata.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/media/v1/media/jan_01hqr8v9k2x3f4g5h6j7k8m9n0/presign
+```
+
+**Response:**
+```json
+{
+ "id": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+ "url": "https://s3.menlo.ai/platform-dev/images/jan_...?X-Amz-Signature=...",
+ "expires_in": 300
+}
+```
+
+**Use Cases:**
+- Get download URL after client-side upload via `prepare-upload`
+- Refresh expired presigned URLs
+- Obtain direct S3 access for large file downloads
+- Integration with external services requiring temporary URLs
+
+### Health Check
+
+**GET** `/healthz`
+
+```bash
+# Via gateway
+curl http://localhost:8000/media/healthz
+
+# Direct service port
+curl http://localhost:8285/healthz
+```
+
+## Jan ID System
+
+**Format**: `jan_` prefix + 26-character base32 identifier
+
+### Characteristics
+- **Globally Unique**: No collision across instances
+- **Sortable**: Sequential generation ensures chronological ordering
+- **Opaque**: No encoded information (privacy-preserving)
+- **Example**: `jan_01hqr8v9k2x3f4g5h6j7k8m9n0`
+
+### Usage in Other Services
+
+Reference `jan_*` IDs in LLM API for media:
+
+```bash
+curl -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "gpt-4o-mini",
+ "messages": [{
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "What is this?"},
+ {
+ "type": "image_url",
+ "image_url": {"url": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0"}
+ }
+ ]
+ }]
+ }'
+```
+
+## Deduplication
+
+Media is deduplicated by content hash (SHA-256):
+
+- **First Upload**: Stored in S3, new `jan_*` ID created
+- **Duplicate Upload**: Returns existing `jan_*` ID, skips S3 storage
+- **Response**: `"deduped": true` indicates existing media
+
+```json
+{
+ "id": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+ "deduped": true
+}
+```
+
+## Presigned URL Management
+
+### TTL Configuration
+Default: 5 minutes (300 seconds)
+
+```bash
+MEDIA_S3_PRESIGN_TTL=5m # 5 minutes
+MEDIA_S3_PRESIGN_TTL=30m # 30 minutes
+MEDIA_S3_PRESIGN_TTL=1h # 1 hour
+```
+
+### Expiration
+- URLs are valid for specified TTL
+- Each request to resolve/get generates new presigned URL
+- Expired URLs are no longer valid
+
+## Storage Flow
+
+### 1. Remote URL Upload
+```
+Client -> Media API (remote_url)
+ v
+Media API -> Remote Server (fetch)
+ v
+Media API -> S3 (upload)
+ v
+Media API <- S3 (confirmed)
+ v
+Client <- Media API (jan_id + presigned_url)
+```
+
+### 2. Client-Side Direct Upload
+```
+Client -> Media API (prepare-upload request)
+ v
+Media API -> Client (presigned_url + jan_id)
+ v
+Client -> S3 (direct upload using presigned_url)
+ v
+Client <- S3 (upload confirmed)
+ v
+Client -> Media API GET /v1/media/{jan_id}/presign
+ v
+Client <- Media API (download presigned_url)
+```
+
+## Error Handling
+
+| Status | Error | Cause |
+|--------|-------|-------|
+| 400 | Invalid request | Malformed parameters |
+| 401 | Unauthorized | Missing/invalid bearer token |
+| 404 | Not found | Media ID doesn't exist |
+| 413 | Payload too large | Exceeds max file size |
+| 500 | S3 error | Storage operation failed |
+
+Example error:
+```json
+{
+ "error": {
+ "message": "File size exceeds maximum allowed",
+ "type": "size_error",
+ "code": "max_size_exceeded"
+ }
+}
+```
+
+## Related Services
+
+- **LLM API** (Port 8080) - Media resolution
+- **Response API** (Port 8082) - Tool outputs
+- **Kong Gateway** (Port 8000) - API routing
+- **PostgreSQL** - Metadata storage
+- **Menlo S3** - Media storage
+
+## See Also
+
+- [LLM API Documentation](../llm-api/)
+- [Architecture Overview](../../architecture/)
+- [Development Guide](../../guides/development.md)
diff --git a/docs/api/response-api/README.md b/docs/api/response-api/README.md
new file mode 100644
index 00000000..898b4bdb
--- /dev/null
+++ b/docs/api/response-api/README.md
@@ -0,0 +1,689 @@
+# Response API Documentation
+
+The Response API executes tools and generates AI responses for complex tasks.
+
+## Quick Start
+
+### URLs
+- **Direct access**: http://localhost:8082
+- **Through gateway**: http://localhost:8000/responses (Kong prefixes `/responses`)
+- **Inside Docker**: http://response-api:8082
+
+## What You Can Do
+
+- **Run tools automatically** - AI decides which tools to use
+- **Chain tools together** - Use output from one tool as input to another (up to 8 steps)
+- **Get final answers** - LLM generates natural language response from tool results
+- **Track execution** - See which tools ran and how long they took
+
+## Service Ports & Configuration
+
+| Component | Port | Key Environment Variables |
+|-----------|------|--------------------------|
+| **HTTP Server** | 8082 | `RESPONSE_API_PORT` |
+| **Database (PostgreSQL)** | 5432 | `DB_POSTGRESQL_WRITE_DSN`, `DB_POSTGRESQL_READ1_DSN` |
+| **LLM API upstream** | 8080 | `RESPONSE_LLM_API_URL` |
+| **MCP Tools upstream** | 8091 | `RESPONSE_MCP_TOOLS_URL` |
+
+### Required Environment Variables
+
+```bash
+RESPONSE_API_PORT=8082
+DB_POSTGRESQL_WRITE_DSN=postgres://response_api:password@api-db:5432/response_api?sslmode=disable
+# Optional read replica
+DB_POSTGRESQL_READ1_DSN=postgres://response_ro:password@api-db-ro:5432/response_api?sslmode=disable
+
+# Upstream services
+RESPONSE_LLM_API_URL=http://llm-api:8080
+RESPONSE_MCP_TOOLS_URL=http://mcp-tools:8091
+
+# Tool execution limits
+RESPONSE_MAX_TOOL_DEPTH=8
+TOOL_EXECUTION_TIMEOUT=45s
+```
+
+### Optional Configuration
+
+```bash
+RESPONSE_LOG_LEVEL=info
+ENABLE_TRACING=false
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
+
+# Auth (when fronted by Kong or called directly with JWT)
+AUTH_ENABLED=true
+AUTH_ISSUER=http://localhost:8085/realms/jan
+ACCOUNT=account
+AUTH_JWKS_URL=http://keycloak:8085/realms/jan/protocol/openid-connect/certs
+```
+
+## Main Endpoints
+
+### Create Response (Multi-Step Orchestration)
+
+**POST** `/v1/responses`
+
+Create a new response with automatic tool orchestration.
+
+```bash
+curl -X POST http://localhost:8000/responses/v1/responses \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "gpt-4o-mini",
+ "input": "Search for the latest AI news and summarize the top 3 results",
+ "temperature": 0.3,
+ "tool_choice": {"type": "auto"},
+ "stream": false
+ }'
+```
+
+**Request Body (subset of `CreateResponseRequest`):**
+- `model` *(required)* - Model identifier understood by the LLM API/catalog
+- `input` *(required)* - User prompt (string or structured object)
+- `system_prompt` *(optional)* - Instruction prepended before each run
+- `temperature`, `max_tokens` *(optional)* - Generation controls
+- `tools` *(optional)* - Override available tools (OpenAI-compatible format)
+- `tool_choice` *(optional)* - `{ "type": "auto" | "none" | "required", "function": {"name": "tool"} }`
+- `stream` *(optional)* - `true` to receive SSE events
+- `conversation` *(optional)* - Attach to an existing conversation ID
+- `previous_response_id` *(optional)* - Continue from a prior response
+- `metadata`, `user` *(optional)* - Free-form payload that is persisted with the response
+
+**Response:**
+```json
+{
+ "id": "resp_01hqr8v9k2x3f4g5h6j7k8m9n0",
+ "model": "gpt-4o-mini",
+ "input": "Search for the latest AI news and summarize the top 3 results",
+ "output": "Here are the latest AI news items...",
+ "tool_executions": [
+ {
+ "id": "toolexec_123",
+ "tool": "google_search",
+ "input": {"q": "latest AI news", "num": 3},
+ "output": "...",
+ "duration_ms": 250
+ }
+ ],
+ "execution_metadata": {
+ "max_depth": 8,
+ "actual_depth": 1,
+ "total_duration_ms": 2500,
+ "status": "completed"
+ },
+ "created_at": "2025-11-10T10:30:00Z",
+ "updated_at": "2025-11-10T10:30:02.500Z"
+}
+```
+
+### Streaming Responses
+
+Enable `stream: true` to receive incremental events (`text/event-stream`), matching the SSE observer in `services/response-api/internal/interfaces/httpserver/handlers/response_handler.go`.
+
+```bash
+curl -N http://localhost:8000/responses/v1/responses \
+ -H "Authorization: Bearer <token>" \
+ -H "Content-Type: application/json" \
+ -H "Accept: text/event-stream" \
+ -d '{
+ "model": "gpt-4o-mini",
+ "input": "Search for the latest AI news and summarize the top 3 results",
+ "stream": true
+ }'
+```
+
+The stream emits events such as `response.created`, `response.tool_call`, `response.output_text.delta`, and `response.completed`.
+
+### Get Response
+
+**GET** `/v1/responses/{response_id}`
+
+Retrieve a specific response and its execution metadata.
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/responses/v1/responses/resp_01hqr8v9k2x3f4g5h6j7k8m9n0
+```
+
+### Delete Response
+
+**DELETE** `/v1/responses/{response_id}`
+
+```bash
+curl -X DELETE -H "Authorization: Bearer <token>" \
+ http://localhost:8000/responses/v1/responses/resp_01hqr8v9k2x3f4g5h6j7k8m9n0
+```
+
+### Cancel In-Flight Response
+
+**POST** `/v1/responses/{response_id}/cancel`
+
+```bash
+curl -X POST -H "Authorization: Bearer <token>" \
+ http://localhost:8000/responses/v1/responses/resp_01hqr8v9k2x3f4g5h6j7k8m9n0/cancel
+```
+
+### List Input Items (Conversation Replay)
+
+**GET** `/v1/responses/{response_id}/input_items`
+
+Returns the normalized conversation items that were sent to the LLM (useful for replaying the request or for debugging tool runs).
+
+```bash
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/responses/v1/responses/resp_01hqr8v9k2x3f4g5h6j7k8m9n0/input_items
+```
+
+> The Response API does **not** currently expose a list endpoint for all responses. Persisted executions can be queried directly from the service database.
+
+### Health Check
+
+**GET** `/healthz`
+
+```bash
+# Gateway
+curl http://localhost:8000/responses/healthz
+
+# Direct
+curl http://localhost:8082/healthz
+```
+
+## Tool Execution Flow
+
+### 1. Request Processing
+- Validate input parameters
+- Check tool availability via MCP Tools
+
+### 2. Tool Discovery
+- Query MCP Tools for available tools
+- Build tool call graph
+
+### 3. Iterative Execution
+- Execute tools in sequence/parallel as needed
+- Apply depth limit (max 8)
+- Apply timeout per tool (45s)
+
+### 4. LLM Delegation
+- Pass tool results to LLM API
+- Generate final response using context
+
+### 5. Result Storage
+- Store execution trace in PostgreSQL
+- Record tool outputs and timing
+- Return complete execution metadata
+
+## Tool Execution Parameters
+
+### Max Tool Execution Depth
+Limits how deep tool calls can chain:
+- **Value**: 1-15 (default: 8)
+- **Meaning**: Maximum recursive depth of tool calls
+- **Example**: search -> extract -> summarize = depth 2
+
+### Tool Execution Timeout
+Per-tool call timeout:
+- **Value**: Duration string (default: 45s)
+- **Example**: "30s", "1m", "500ms"
+- **Behavior**: Cancels tool if it exceeds timeout
+
+## Error Handling
+
+| Status | Error | Cause |
+|--------|-------|-------|
+| 400 | Invalid request | Missing/malformed parameters |
+| 404 | Response not found | Invalid response ID |
+| 408 | Tool execution timeout | Tool exceeded timeout |
+| 500 | Execution error | Tool or LLM error |
+
+Example error:
+```json
+{
+ "error": {
+ "message": "Tool execution exceeded maximum depth",
+ "type": "execution_error",
+ "code": "max_depth_exceeded"
+ }
+}
+```
+
+## Related Services
+
+- **LLM API** (Port 8080) - Generates final response
+- **MCP Tools** (Port 8091) - Tool execution and discovery
+- **Kong Gateway** (Port 8000) - API routing
+- **PostgreSQL** - Execution storage
+
+## Configuration Examples
+
+### Quick Response (Single Tool)
+```bash
+MAX_TOOL_EXECUTION_DEPTH=1 # Single tool call only
+TOOL_EXECUTION_TIMEOUT=15s # Short timeout
+```
+
+### Complex Workflows (Deep Chains)
+```bash
+MAX_TOOL_EXECUTION_DEPTH=8 # Allow up to 8 levels
+TOOL_EXECUTION_TIMEOUT=120s # Long timeout for complex work
+```
+
+## See Also
+
+- [MCP Tools API](../mcp-tools/)
+- [LLM API](../llm-api/)
+- [Architecture Overview](../../architecture/)
+- [Development Guide](../../guides/development.md)
+## Authentication
+
+Requests routed through Kong (`http://localhost:8000/responses/...`) must include either:
+- `Authorization: Bearer <token>` (Keycloak JWT - guest tokens work for local testing)
+- `X-API-Key: sk_*` (custom plugin managed by Kong)
+
+When `AUTH_ENABLED=true` the service also validates JWTs on port 8082. Use the gateway path whenever possible for rate limiting and centralized logging.
+
+## Background Mode
+
+The Response API supports OpenAI-compatible background mode for asynchronous response generation. This allows clients to submit long-running requests without holding open HTTP connections.
+
+### Architecture
+
+**Components:**
+1. **PostgreSQL-backed Queue**: Uses the `responses` table with `SELECT FOR UPDATE SKIP LOCKED` for reliable task distribution
+2. **Worker Pool**: Fixed-size pool of background workers (default: 4) that poll for queued tasks
+3. **Webhook Notifications**: HTTP POST callbacks when tasks complete or fail
+4. **Graceful Cancellation**: Queued or in-progress tasks can be cancelled
+
+**Task Lifecycle:**
+```
+Client Request (background=true, store=true)
+    ↓
+Create Response (status=queued, queued_at=now)
+    ↓
+Return Response Immediately (201 Created)
+    ↓
+Worker Dequeues Task
+    ↓
+Mark Processing (status=in_progress, started_at=now)
+    ↓
+Execute LLM Orchestration with Tool Calls
+    ↓
+Update Status (completed/failed, completed_at=now)
+    ↓
+Send Webhook Notification (async, non-blocking)
+```
+
+### Configuration
+
+Add these environment variables to enable background mode:
+
+```bash
+# Worker Pool
+BACKGROUND_WORKER_COUNT=4        # Number of concurrent workers
+BACKGROUND_POLL_INTERVAL=2s      # How often workers check for queued tasks
+BACKGROUND_TASK_TIMEOUT=600s     # Max execution time per task (10 minutes)
+
+# Webhook Delivery
+WEBHOOK_MAX_RETRIES=3            # Retry attempts for failed webhooks
+WEBHOOK_RETRY_DELAY=2s           # Delay between retry attempts
+WEBHOOK_TIMEOUT=10s              # HTTP timeout per webhook attempt
+WEBHOOK_USER_AGENT=jan-response-api/1.0
+```
+
+**Recommended Settings:**
+
+| Environment | Workers | Poll Interval | Task Timeout | Use Case |
+|-------------|---------|---------------|--------------|----------|
+| Development | 2-4 | 2s | 600s (10m) | Local testing, fast iteration |
+| Production | 8-16 | 5s | 1200s (20m) | High throughput, complex tasks |
+| High-load | 16-32 | 3s | 900s (15m) | Many concurrent tasks |
+
+### API Usage
+
+#### Creating a Background Response
+
+Add `"background": true` and `"store": true` to any response request:
+
+**Request:**
+```bash
+curl -X POST http://localhost:8000/responses/v1/responses \
+  -H "Authorization: Bearer <token>" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4",
+    "input": "Write a comprehensive analysis of quantum computing trends",
+    "background": true,
+    "store": true,
+    "metadata": {
+      "webhook_url": "https://example.com/webhooks/responses",
+      "user_id": "user_123"
+    }
+  }'
+```
+
+**Response (201 Created):**
+```json
+{
+  "id": "resp_abc123",
+  "object": "response",
+  "status": "queued",
+  "background": true,
+  "store": true,
+  "queued_at": 1705315800,
+  "created_at": 1705315800,
+  "model": "gpt-4",
+  "input": "Write a comprehensive analysis...",
+  "metadata": {
+    "webhook_url": "https://example.com/webhooks/responses",
+    "user_id": "user_123"
+  }
+}
+```
+
+#### Polling for Status
+
+Use the standard GET endpoint to check task status:
+
+**Request:**
+```bash
+curl -H "Authorization: Bearer <token>" \
+  http://localhost:8000/responses/v1/responses/resp_abc123
+```
+
+**Response (Queued):**
+```json
+{
+  "id": "resp_abc123",
+  "status": "queued",
+  "queued_at": 1705315800,
+  ...
+}
+```
+
+**Response (In Progress):**
+```json
+{
+  "id": "resp_abc123",
+  "status": "in_progress",
+  "queued_at": 1705315800,
+  "started_at": 1705315805,
+  ...
+}
+```
+
+**Response (Completed):**
+```json
+{
+  "id": "resp_abc123",
+  "status": "completed",
+  "output": "The comprehensive analysis of quantum computing trends...",
+  "usage": {
+    "prompt_tokens": 150,
+    "completion_tokens": 500,
+    "total_tokens": 650
+  },
+  "queued_at": 1705315800,
+  "started_at": 1705315805,
+  "completed_at": 1705316122,
+  "tool_executions": [...],
+  ...
+}
+```
+
+#### Cancelling a Background Task
+
+Use the cancel endpoint:
+
+**Request:**
+```bash
+curl -X POST -H "Authorization: Bearer <token>" \
+  http://localhost:8000/responses/v1/responses/resp_abc123/cancel
+```
+
+**Response:**
+```json
+{
+  "id": "resp_abc123",
+  "status": "cancelled",
+  "cancelled_at": 1705315860,
+  ...
+}
+```
+
+**Cancellation Behavior:**
+- If status is `queued`: Immediately marks cancelled, prevents worker pickup
+- If status is `in_progress`: Marks cancelled, but task may complete normally (cooperative cancellation)
+- If status is `completed` or `failed`: No-op, returns current state
+
+### Webhook Notifications
+
+When a background task completes or fails, the Response API sends an HTTP POST to the webhook URL specified in `metadata.webhook_url`.
+
+**Webhook Payload (Completed):**
+```json
+{
+  "id": "resp_abc123",
+  "event": "response.completed",
+  "status": "completed",
+  "output": "The response content...",
+  "usage": {
+    "prompt_tokens": 150,
+    "completion_tokens": 500,
+    "total_tokens": 650
+  },
+  "tool_executions": [...],
+  "metadata": {
+    "webhook_url": "https://example.com/webhooks/responses",
+    "user_id": "user_123"
+  },
+  "queued_at": 1705315800,
+  "started_at": 1705315805,
+  "completed_at": 1705316122
+}
+```
+
+**Webhook Payload (Failed):**
+```json
+{
+  "id": "resp_abc123",
+  "event": "response.failed",
+  "status": "failed",
+  "error": {
+    "code": "execution_failed",
+    "message": "LLM provider timeout after 600s"
+  },
+  "metadata": {
+    "webhook_url": "https://example.com/webhooks/responses",
+    "user_id": "user_123"
+  },
+  "queued_at": 1705315800,
+  "started_at": 1705315805,
+  "completed_at": 1705316405
+}
+```
+
+**Webhook HTTP Headers:**
+- `Content-Type: application/json`
+- `User-Agent: jan-response-api/1.0`
+- `X-Jan-Event: response.completed` (or `response.failed`)
+- `X-Jan-Response-ID: resp_abc123`
+
+**Webhook Delivery:**
+- **Method**: HTTP POST
+- **Retries**: Up to 3 attempts with 2-second delays
+- **Timeout**: 10 seconds per attempt
+- **Non-blocking**: Webhook failures are logged but don't affect task completion
+- **Status Codes**: 2xx considered success, all others trigger retry
+
+### Background Mode Constraints
+
+- **Requires store=true**: Background tasks must be persisted to the database
+- **API Key Storage**: The user's API key (Bearer token or X-API-Key header) is stored securely and used for LLM API calls during background execution
+- **Task Timeout**: Tasks exceeding `BACKGROUND_TASK_TIMEOUT` will be marked as failed
+- **Queue Ordering**: Tasks are processed in FIFO order based on `queued_at` timestamp
+- **No Streaming**: Background mode is incompatible with `stream: true`
+- **Worker Restart**: In-progress tasks may fail if workers restart (status will show `failed`)
+
+### Status Transitions
+
+```
+queued → in_progress → completed
+queued → in_progress → failed
+queued → cancelled
+in_progress → cancelled (cooperative)
+```
+
+**Valid Status Values:**
+- `queued` - Task waiting for worker
+- `in_progress` - Worker currently executing
+- `completed` - Successfully finished
+- `failed` - Error during execution
+- `cancelled` - Cancelled by user
+
+### Testing Background Mode
+
+**Quick Test:**
+```bash
+# 1. Create background task
+RESP_ID=$(curl -s -X POST http://localhost:8000/responses/v1/responses \
+  -H "Authorization: Bearer <token>" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4",
+    "input": "Write a haiku about coding",
+    "background": true,
+    "store": true,
+    "metadata": {"webhook_url": "https://webhook.site/your-id"}
+  }' | jq -r '.id')
+
+echo "Created task: $RESP_ID"
+
+# 2. Poll until complete
+while true; do
+  STATUS=$(curl -s -H "Authorization: Bearer <token>" \
+    "http://localhost:8000/responses/v1/responses/$RESP_ID" \
+    | jq -r '.status')
+  echo "Status: $STATUS"
+  [[ "$STATUS" == "completed" ]] || [[ "$STATUS" == "failed" ]] && break
+  sleep 2
+done
+
+# 3. Get final result
+curl -s -H "Authorization: Bearer <token>" \
+  "http://localhost:8000/responses/v1/responses/$RESP_ID" | jq
+```
+
+**Webhook Testing with webhook.site:**
+1. Go to https://webhook.site/ to get a unique URL
+2. Use that URL as `metadata.webhook_url` in your request
+3. View received webhooks in the browser
+
+**Local Webhook Server:**
+```python
+# webhook_server.py
+from flask import Flask, request
+import json
+
+app = Flask(__name__)
+
+@app.route('/webhook', methods=['POST'])
+def webhook():
+    print("\n=== Webhook Received ===")
+    print(f"Event: {request.headers.get('X-Jan-Event')}")
+    print(f"Response ID: {request.headers.get('X-Jan-Response-ID')}")
+    print(json.dumps(request.get_json(), indent=2))
+    return '', 200
+
+if __name__ == '__main__':
+    app.run(port=9000)
+```
+
+```bash
+# Run webhook server
+python webhook_server.py
+
+# Use http://host.docker.internal:9000/webhook in requests
+```
+
+### Automated Testing
+
+Comprehensive test suite at `tests/automation/responses-background-webhook.json`:
+
+**Test Suites:**
+1. Setup & Authentication
+2. Basic Background Mode
+3. Background with Webhooks
+4. Background with Tool Calling
+5. Cancellation
+6. Conversation Continuity
+7. Error Handling
+8. Complex Scenarios
+9. Monitoring & Observability
+10. Long-Running Research Task
+
+**Running Tests:**
+```bash
+# Run all tests
+jan-cli api-test run tests/automation/responses-background-webhook.json \
+  --timeout-request 60000
+
+# Export results
+jan-cli api-test run tests/automation/responses-background-webhook.json \
+  --timeout-request 60000 \
+  --reporters cli,json
+```
+
+### Troubleshooting
+
+#### Tasks Stuck in Queued
+
+**Symptoms**: Tasks remain in `queued` status indefinitely
+
+**Solutions**:
+1. Check worker logs: `docker logs <response-api-container> --tail 100`
+2. Verify workers started: Look for "worker X started" messages
+3. Check `BACKGROUND_WORKER_COUNT > 0`
+4. Verify database connectivity
+5. Check for database locks: `SELECT * FROM pg_locks WHERE granted = false;`
+
+#### Workers Not Processing Tasks
+
+**Symptoms**: Workers running but queue depth not decreasing
+
+**Solutions**:
+1. Verify `BACKGROUND_POLL_INTERVAL` setting
+2. Check worker logs for errors
+3. Ensure tasks have `background=true` and `store=true`
+4. Check LLM API availability: `curl http://llm-api:8080/healthz`
+
+#### Webhook Delivery Failures
+
+**Symptoms**: Tasks complete but webhooks not received
+
+**Solutions**:
+1. Test webhook URL: `curl -X POST <webhook_url> -d '{"test":"data"}'`
+2. Use `http://host.docker.internal:<port>` for local development
+3. Check response-api logs for webhook errors
+4. Verify webhook endpoint returns 2xx status
+5. Check firewall/network policies
+
+#### Tasks Timing Out
+
+**Symptoms**: Tasks marked as `failed` with timeout errors
+
+**Solutions**:
+1. Increase `BACKGROUND_TASK_TIMEOUT` (default: 600s)
+2. Optimize prompts to reduce processing time
+3. Check LLM API response times
+4. Monitor tool execution duration in logs
+5. Consider breaking into smaller tasks
+
+#### High Queue Depth
+
+**Symptoms**: Many queued tasks, slow processing
+
+**Solutions**:
+1. Increase `BACKGROUND_WORKER_COUNT`
+2. Scale horizontally: Run multiple response-api instances
+3. Monitor database performance
+4. Check LLM API rate limits
+5. Optimize tool execution times
diff --git a/docs/architect-mermaid.txt b/docs/architect-mermaid.txt
deleted file mode 100644
index 3f913a24..00000000
--- a/docs/architect-mermaid.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-graph TB
-    subgraph "Client Layer"
-        WEB[Web Applications]
-        API_CLIENT[API Clients]
-        MOBILE[Mobile Apps]
-        CLI[CLI Tools]
-    end
-    
-    subgraph "Kubernetes Cluster"
-        subgraph "Jan API Gateway Service"
-            GATEWAY[Jan API Gateway<br/>Port 8080]
-            GATEWAY_FEATURES[Features:<br/>- OpenAI Compatible API<br/>- Multi-tenant Auth<br/>- MCP Protocol<br/>- Web Search<br/>- Profiling]
-        end
-        
-        subgraph "Jan Inference Model Service"
-            INFERENCE[Jan Inference Model<br/>Port 8101]
-            INFERENCE_FEATURES[Features:<br/>- Model Serving<br/>- Health Monitoring<br/>- Load Balancing]
-        end
-        
-        subgraph "Database Service"
-            POSTGRES[PostgreSQL<br/>Port 5432]
-            DB_FEATURES[Features:<br/>- Read/Write Replicas<br/>- Auto Migrations<br/>- Connection Pooling]
-        end
-        
-        subgraph "External Integrations"
-            SERPER[Serper API<br/>Web Search]
-            GOOGLE[Google OAuth2<br/>Authentication]
-            PYROSCOPE[Grafana Pyroscope<br/>Profiling]
-        end
-    end
-    
-    %% Client connections
-    WEB --> GATEWAY
-    API_CLIENT --> GATEWAY
-    MOBILE --> GATEWAY
-    CLI --> GATEWAY
-    
-    %% Service connections
-    GATEWAY --> INFERENCE
-    GATEWAY --> POSTGRES
-    GATEWAY --> SERPER
-    GATEWAY --> GOOGLE
-    GATEWAY --> PYROSCOPE
-    
-    %% Styling
-    classDef clientClass fill:#e1f5fe
-    classDef serviceClass fill:#f3e5f5
-    classDef dbClass fill:#e8f5e8
-    classDef externalClass fill:#fff3e0
-    
-    class WEB,API_CLIENT,MOBILE,CLI clientClass
-    class GATEWAY,INFERENCE,GATEWAY_FEATURES,INFERENCE_FEATURES serviceClass
-    class POSTGRES,DB_FEATURES dbClass
-    class SERPER,GOOGLE,PYROSCOPE externalClass
\ No newline at end of file
diff --git a/docs/architecture/README.md b/docs/architecture/README.md
new file mode 100644
index 00000000..22933967
--- /dev/null
+++ b/docs/architecture/README.md
@@ -0,0 +1,90 @@
+# Jan Server Architecture
+
+## Overview
+
+Jan Server is built from multiple small services (microservices) that work together. Each service has a specific job.
+
+## Main Parts
+
+1. **[System Design](system-design.md)** - How all the pieces fit together
+2. **[Services](services.md)** - What each service does
+3. **[Data Flow](data-flow.md)** - How requests move through the system
+4. **[Security](security.md)** - How we keep things secure
+5. **[Observability](observability.md)** - How we monitor and debug
+6. **[Test Flows](test-flows.md)** - How we test everything
+
+## Quick Reference
+
+### Service Ports (Docker Compose defaults)
+
+| Service | Port | Access Notes |
+|---------|------|-------------|
+| **Kong Gateway** | 8000 | Entry point for `/llm/*`, `/responses/*`, `/media/*`, `/mcp` (routing + auth) |
+| **LLM API** | 8080 | Internal; exposed through Kong routes |
+| **Response API** | 8082 | Internal; streaming SSE via Kong `/responses` |
+| **Media API** | 8285 | Internal; proxied by Kong `/media` |
+| **MCP Tools** | 8091 | Internal; routed through Kong `/mcp` |
+| **Template API** | 8185 | Scaffold service generated from `services/template-api` |
+| **Keycloak** | 8085 | Admin console (protect behind VPN/SSO in production) |
+| **vLLM** | 8101 | Inference backend (local GPU/CPU profile) |
+| **Prometheus** | 9090 | Dev-only monitoring UI (`make monitor-up`) |
+| **Jaeger** | 16686 | Trace UI |
+| **Grafana** | 3331 | Dashboards (admin/admin in dev) |
+
+### Technology Stack
+
+| Component | Technology |
+|-----------------|--------------------------------|
+| API Gateway | Kong 3.5 + `keycloak-apikey` plugin |
+| Services | Go 1.21+ (Gin framework, zerolog, wire DI) |
+| MCP Server | mark3labs/mcp-go v0.7.0 |
+| ORM | GORM + goose migrations |
+| Database | PostgreSQL 15/16 (Docker) / managed service |
+| Auth | Keycloak (OpenID Connect) |
+| Inference | vLLM (OpenAI-compatible) or remote providers |
+| Observability | OpenTelemetry Collector |
+| Metrics | Prometheus 2.48 |
+| Tracing | Jaeger 1.51 |
+| Dashboards | Grafana 10.2 |
+
+## How to Run It
+
+You can run Jan Server in different ways:
+
+### Docker Compose (For Development)
+
+Use Docker Compose to run on your local computer:
+
+- `make quickstart` - interactive wizard (creates `.env`, starts stack)
+- `make up-full` - bring up all services (`docker compose.yml` + `docker/*.yml`)
+- `make up-vllm-gpu` / `make up-vllm-cpu` - start vLLM profile
+- `make monitor-up` - start Prometheus, Grafana, Jaeger
+- `make down` / `make down-clean` - stop stack (preserve or remove volumes)
+
+### Kubernetes (For Production)
+
+Use Kubernetes to run in the cloud or on servers:
+
+- **Local testing**: Minikube or kind (see `k8s/SETUP.md`)
+- **Production**: `k8s/jan-server` Helm chart + managed Postgres + managed Keycloak
+- **Hybrid**: Run inference locally while other services run in the cluster
+
+**Helpful references:**
+- [Kubernetes Setup Guide](../../k8s/SETUP.md) - minikube/bootstrap walkthrough
+- [Kubernetes README](../../k8s/README.md) - Helm values, ingress, TLS
+- [Deployment Guide](../guides/deployment.md) - Docker, hybrid, CI/CD instructions
+
+## References
+
+- [System Design Details](system-design.md)
+- [Service Configurations](services.md)
+- [Data Flow Patterns](data-flow.md)
+- [Security Model](security.md)
+- [Observability Guide](observability.md)
+- [Test Flows & Diagrams](test-flows.md)
+- [API Reference](../api/README.md)
+- [Development Guide](../guides/development.md)
+
+
+
+
diff --git a/docs/architecture/data-flow.md b/docs/architecture/data-flow.md
new file mode 100644
index 00000000..448a5b98
--- /dev/null
+++ b/docs/architecture/data-flow.md
@@ -0,0 +1,49 @@
+# Data Flow Reference
+
+## 1. Chat Completion (LLM API)
+1. **Client** calls Kong Gateway `POST /v1/chat/completions` with Bearer token.
+2. **Kong** forwards to `llm-api:8080` (internal DNS) and injects request headers.
+3. **LLM API**:
+ - Validates JWT via Keycloak JWKS.
+ - Resolves any `jan_*` media IDs by calling Media API `/v1/media/resolve`.
+ - **(Future) Prompt Orchestration Processor**: Applies conditional modules (memory, templates, tool instructions) to compose the final prompt before inference.
+ - Selects a provider (local vLLM or configured upstream) and forwards the request.
+4. **Provider** (vLLM) streams tokens back to LLM API.
+5. **LLM API** streams data to the client (SSE) via Kong and persists conversation rows in PostgreSQL.
+
+> **Note**: Prompt orchestration (memory injection, template application, conditional prompt assembly) will be implemented as a processor within LLM API, not as a separate service. See `docs/todo/prompt-orchestration-todo.md` for design details.
+
+## 2. Response API Orchestration
+1. **Client** calls `POST /v1/responses`.
+2. **Response API** looks up conversation state and enqueues tool steps.
+3. For each tool call:
+ - Executes JSON-RPC request against MCP Tools (`/v1/mcp`).
+ - Records execution metadata in PostgreSQL.
+ - Applies depth/timeout limits (`MAX_TOOL_EXECUTION_DEPTH`, `TOOL_EXECUTION_TIMEOUT`).
+4. Final synthesis request is sent to LLM API.
+5. Completed response is stored and streamed back to the caller (SSE `response.*` events).
+
+## 3. Media Upload and Resolution
+1. Client uploads via:
+ - `POST /v1/media` (server-proxied, data URL or remote fetch), or
+ - `POST /v1/media/prepare-upload` followed by direct S3 upload.
+2. Media API stores metadata rows and issues `jan_<snowflake>` IDs.
+3. Other services reference those IDs instead of exposing raw S3 URLs.
+4. Before inference, LLM API calls `/v1/media/resolve` with the request payload; Media API rewrites each placeholder with a fresh presigned URL.
+
+## 4. MCP Tool Execution
+1. Response API or external clients send MCP JSON-RPC requests to `mcp-tools:8091`.
+2. MCP Tools selects the proper backend:
+ - Web search -> Serper or SearXNG (via redis-searxng cache)
+ - Scrape -> HTTP fetcher with metadata
+ - File search -> vector-store service
+ - Python exec -> SandboxFusion container
+3. Results are returned synchronously; streaming support is planned via incremental notifications.
+
+## 5. Observability Pipeline
+1. Services emit traces and metrics via OTLP (4317).
+2. The OpenTelemetry Collector forwards metrics to Prometheus and traces to Jaeger.
+3. Logs are structured JSON printed to stdout; Docker/ Kubernetes aggregates them for your logging stack.
+4. Grafana dashboards connect to Prometheus and Jaeger for live inspection.
+
+Use this file when onboarding engineers or mapping changes that span multiple services.
diff --git a/docs/architecture/observability.md b/docs/architecture/observability.md
new file mode 100644
index 00000000..056ecda0
--- /dev/null
+++ b/docs/architecture/observability.md
@@ -0,0 +1,49 @@
+# Observability Guide
+
+## Metrics
+- **Prometheus** (http://localhost:9090)
+ - Scrapes Go services via `/metrics`.
+ - Includes default dashboards for request rate, latency, error ratio.
+- **Service metrics**:
+ - LLM API: request duration, token usage, provider latency.
+ - Response API: tool execution counts, depth histogram, orchestration latency.
+ - Media API: upload size, S3 latency, resolution cache hits.
+ - MCP Tools: tool success/failure, backend latency.
+
+## Tracing
+- **OpenTelemetry Collector** listens on `otel-collector:4317`.
+- Services enable tracing by setting `OTEL_ENABLED=true` and `OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317`.
+- **Jaeger** UI (http://localhost:16686):
+ - Search by `service.name`.
+ - Correlate request IDs (`X-Request-Id`) between services.
+
+## Logging
+- All services use structured JSON logs (zerolog).
+- Docker Compose aggregates stdout/stderr; use `make logs-<service>` targets:
+ - `make logs-api`, `make logs-media-api`, `make logs-mcp`, `make logs-infra`.
+- For Kubernetes, send logs to Loki or your aggregator via sidecars/DaemonSets.
+
+## Dashboards
+- **Grafana** (http://localhost:3331, admin/admin by default).
+- Import dashboards from `monitoring/grafana/provisioning/dashboards/`.
+- Suggested panels:
+ - Request/response duration per service.
+ - Database connection pool usage.
+ - MCP tool success/error counts.
+ - Media upload throughput and storage utilisation.
+
+## Alerts
+- Configure Alertmanager rules inside `monitoring/prometheus/alerting-rules.yml` (add file as needed).
+- Recommended alerts:
+ - High error rate (>5% for 5 minutes)
+ - Slow LLM responses (>5s p95)
+ - Media API S3 failures
+ - MCP tool timeout spikes
+
+## Developer Workflow
+1. Start the monitoring stack: `make monitor-up`.
+2. Hit the APIs (curl/Postman/jan-cli api-test).
+3. Inspect metrics/traces/logs using the URLs above.
+4. Tear down with `make monitor-down` (if defined) or `docker compose down` for the monitoring profile.
+
+Update this file if ports or dashboards change.
diff --git a/docs/architecture/security.md b/docs/architecture/security.md
new file mode 100644
index 00000000..d5e18854
--- /dev/null
+++ b/docs/architecture/security.md
@@ -0,0 +1,42 @@
+# Security Architecture
+
+## Identity and Access
+- **OAuth2/OIDC** via Keycloak (`keycloak/` Dockerfile).
+- **Kong gateway** (`http://localhost:8000`) protects every `/llm/*` route using the built-in `jwt` plugin (validating Keycloak tokens) plus the custom `keycloak-apikey` plugin (`X-API-Key: sk_*` -> `POST /auth/validate-api-key`).
+- **Clients** obtain tokens using:
+ - Guest endpoint (`POST /llm/auth/guest-login` via Kong) for quick local access; the LLM API coordinates with Keycloak.
+ - OAuth2 (code/password/device) flows against the `jan` realm in Keycloak for registered users.
+- **Services** validate tokens with:
+ - `AUTH_ENABLED=true`
+ - `AUTH_ISSUER`, `ACCOUNT`, `AUTH_JWKS_URL`
+- **Service auth**: Media API, Response API, and MCP Tools enforce Keycloak-issued JWTs via `AUTH_*` settings and inherit Kong headers when needed.
+- **Kong plugins**: besides jwt/apikey, Kong applies rate limiting, request size limits, and header sanitization at the edge to keep unauthenticated traffic out.
+
+## Network Boundaries
+- **Public**: Kong (8000) and, optionally, Keycloak admin (8085) when protected.
+- **Private**: LLM API (8080), Response API (8082), Media API (8285), MCP Tools (8091), vLLM (8101).
+- **MCP network**: SearXNG, Redis, Vector Store, SandboxFusion run on `jan-server_mcp-network` and are not exposed externally.
+- **Kubernetes**: use NetworkPolicies to isolate namespaces or rely on service mesh if available.
+
+## Data Protection
+- **Databases**: PostgreSQL instances run inside Docker/Kubernetes. Use managed services with TLS for production.
+- **S3 credentials**: stored in `.env` or secret stores, mounted into Media API only.
+- **jan_* identifiers**: act as opaque references; actual S3 URLs are short lived.
+- **Logs**: structured JSON, avoid logging secrets (token middleware redacts sensitive headers).
+
+## Secrets Lifecycle
+1. Add new variables to `.env.template` with clear comments.
+2. Mirror them in `config/secrets.env.example`.
+3. Document usage in `config/README.md` and relevant service README.
+4. For production, load values from secret managers or Kubernetes secrets instead of `.env`.
+
+## Threat Mitigations
+- **JWT validation**: services reject expired or mismatched tokens and refresh their JWKS cache periodically.
+- **Tool execution**: SandboxFusion isolates python code; `SANDBOX_FUSION_REQUIRE_APPROVAL` can force manual approval.
+- **Web fetches**: SearXNG provides result filtering; Response API enforces depth/time budgets.
+- **Media uploads**: requests require a Bearer token plus `MEDIA_MAX_BYTES`/content-type validation before accepting bytes.
+- **Rate limits**: configure Kong plugins per route; Response API also throttles multi-step workflows internally.
+
+## Incident Response
+- Capture request IDs from response headers to trace calls across services.
+- Use Jaeger + Prometheus dashboards for triage.
diff --git a/docs/architecture/services.md b/docs/architecture/services.md
new file mode 100644
index 00000000..ee84c10a
--- /dev/null
+++ b/docs/architecture/services.md
@@ -0,0 +1,90 @@
+# Service Overview
+
+Jan Server ships four core services plus shared infrastructure. Use this document to understand how they fit together and where to look in the codebase.
+
+## Core Services
+
+| Service | Purpose | Port(s) | Source | Primary Docs |
+|---------|---------|---------|--------|--------------|
+| **LLM API** | OpenAI-compatible chat completions, conversation storage, model management | 8080 (direct), 8000 via Kong | `services/llm-api` | [api/llm-api/README.md](api/llm-api/README.md), [api/llm-api/examples.md](api/llm-api/examples.md) |
+| **Response API** | Multi-step orchestration, tool chaining, integration with MCP Tools | 8082 | `services/response-api` | [api/response-api/README.md](api/response-api/README.md) |
+| **Media API** | Binary ingestion, jan_* IDs, S3 storage and resolution | 8285 | `services/media-api` | [api/media-api/README.md](api/media-api/README.md) |
+| **MCP Tools** | Model Context Protocol tools (web search, scraping, file search, python exec) | 8091 | `services/mcp-tools` | [api/mcp-tools/README.md](api/mcp-tools/README.md), [services/mcp-tools/README.md](../services/mcp-tools/README.md) |
+| **Template API** | Go microservice scaffold used by new feature teams | 8185 | `services/template-api` | [services/template-api/README.md](../services/template-api/README.md) |
+
+## Configuration
+
+All services use the **centralized configuration system** at `pkg/config/`:
+
+- **Type-safe:** Go structs with compile-time validation
+- **YAML defaults:** `config/defaults.yaml` for base configuration
+- **Environment overrides:** Service-specific env vars (e.g., `LLM_API_HTTP_PORT`)
+- **Kubernetes values:** Auto-generated from configuration structs
+- **CLI tool:** `jan-cli config` for validation and inspection
+
+See [Configuration Documentation](configuration/README.md) for details.
+
+## Infrastructure Components
+- **Kong Gateway (8000)**: exposes public APIs, enforces rate limits, validates Keycloak JWTs/API keys (custom plugin), and proxies `/llm/auth/guest-login` for guest tokens.
+- **Keycloak (8085)**: handles OAuth2/OIDC flows; see `keycloak/`.
+- **PostgreSQL**: `api-db` (LLM/Response/Media data) and `keycloak-db` (Keycloak state).
+- **vLLM (8101)**: inference backend reachable from llm-api.
+- **Observability stack**: Prometheus (9090), Grafana (3331), Jaeger (16686), OpenTelemetry Collector.
+- **MCP support services**: SearXNG (search), Vector Store (file search), SandboxFusion (python execution).
+
+## Creating a New Service
+
+### Quick Start
+
+```bash
+# Generate from template
+scripts/new-service-from-template.ps1 -Name my-service
+```
+
+### Configuration Setup
+
+New services should use the centralized configuration system:
+
+1. **Define service config in `pkg/config/types.go`:**
+ ```go
+ type ServiceConfig struct {
+ HTTP HTTPConfig `yaml:"http"`
+ Database DatabaseConfig `yaml:"database"`
+ // Add service-specific fields
+ }
+ ```
+
+2. **Regenerate config files:**
+ ```bash
+ make config-generate
+ ```
+
+3. **Load config in your service:**
+ ```go
+ import "jan-server/pkg/config"
+ 
+ cfg, _:= config.Load()
+ serviceCfg, _:= cfg.GetServiceConfig("my-service")
+ ```
+
+4. **Update deployment configs:**
+ - Add service to `docker/services-api.yml`
+ - Generate K8s values: `jan-cli config k8s-values --env production`
+
+See [Configuration System](configuration/README.md) and [Service Template](../services/template-api/NEW_SERVICE_GUIDE.md) for complete guide.
+
+### Documentation Requirements
+
+1. Update `docs/services.md` (this file) with new service row
+2. Create `docs/api/<service>/README.md` with API reference
+3. Add service to `docs/index.md` navigation
+4. Update `k8s/jan-server/values.yaml` if deploying to Kubernetes
+
+## Service Interactions
+- **LLM API -> Media API**: LLM API resolves `jan_*` IDs before sending payloads to vLLM or upstream providers (`MEDIA_RESOLVE_URL` env var).
+- **Response API -> LLM API**: Response API delegates final language generation to LLM API (`LLM_API_URL`).
+- **Response API -> MCP Tools**: orchestrated tool calls are issued via JSON-RPC (`MCP_TOOLS_URL`).
+- **MCP Tools -> Infrastructure**: uses SearXNG, Vector Store, and SandboxFusion to execute user requests.
+
+Keep this document updated whenever a service is added, renamed, or retires.
+
diff --git a/docs/architecture/system-design.md b/docs/architecture/system-design.md
new file mode 100644
index 00000000..e9ddf7d5
--- /dev/null
+++ b/docs/architecture/system-design.md
@@ -0,0 +1,118 @@
+# System Design
+
+This reference describes how the Jan Server components fit together. Use it when reviewing cross-service changes or planning deployments.
+
+## 1. System Overview
+
+Jan Server is a microservices platform that exposes OpenAI-compatible APIs through Kong. Each service owns a focused domain:
+
+- **LLM API (8080)** - chat completions, conversations, projects, model catalog.
+- **Response API (8082)** - multi-step orchestration and MCP tool coordination.
+- **Media API (8285)** - binary ingestion, jan_* IDs, presigned URL management.
+- **MCP Tools (8091)** - JSON-RPC endpoint that proxies Serper/SearXNG search, scraping, file search, and SandboxFusion execution.
+- **Template API (8185)** - scaffold for new services (not part of the default stack).
+- **Shared infrastructure** - Kong (8000), Keycloak (8085), PostgreSQL, vLLM (8101), observability stack.
+
+Kong terminates TLS (in production), validates JWT/API keys, applies rate limits, and forwards requests to the internal services.
+
+## 2. Architecture Layers
+
+| Layer | Components | Notes |
+|-------|------------|-------|
+| **Edge** | Kong Gateway, Keycloak | Centralized auth, rate limiting, guest-token endpoint. |
+| **Application** | LLM API, Response API, Media API, MCP Tools | Written in Go using Gin + zerolog, configured via `pkg/config`. |
+| **Tooling** | SearXNG, Serper, SandboxFusion, vector-store | Only accessible from MCP Tools. |
+| **Data/Storage** | PostgreSQL (`api-db`, `keycloak-db`), S3-compatible storage | Media files live in object storage; metadata lives in PostgreSQL. |
+| **Inference** | vLLM (local) or remote OpenAI-compatible providers | Selected per request using the provider metadata catalog. |
+| **Observability** | OpenTelemetry Collector, Prometheus, Grafana, Jaeger | Enabled with `OTEL_ENABLED=true` + `make monitor-up`. |
+
+## 3. Component Diagram
+
+```
+             +------------------------------+
+             |  External Clients / SDKs     |
+             +---------------+--------------+
+                             |
+                             v
+                   +-------------------+
+                   |   Kong Gateway    | 8000
+                   +---+---+----+------+ 
+                       |   |    |
+        +--------------+   |    +----------------+
+        |                  |                     |
+        v                  v                     v
+  +-----------+    +---------------+      +---------------+
+  |  LLM API  |    |  Response API |      |   Media API   |
+  | (8080)    |    |    (8082)     |      |    (8285)     |
+  +-----+-----+    +-------+-------+      +-------+-------+
+        |                  |                     |
+        |                  v                     |
+        |        +-------------------+           |
+        +------->|    MCP Tools      |<----------+
+                 |     (8091)        |
+                 +----+---+----+-----+
+                      |   |    |
+                      |   |    +--> SandboxFusion
+                      |   +-------> Vector Store
+                      +-----------> SearXNG / Serper
+
+Shared dependencies (not shown): PostgreSQL (api-db), S3/Object storage, Keycloak (JWT issuer), vLLM (8101).
+```
+
+## 4. Request Lifecycles
+
+### Chat Completions
+1. Client calls `POST /v1/chat/completions` on `http://localhost:8000`.
+2. Kong validates the JWT/API key and forwards to `llm-api:8080`.
+3. LLM API resolves `jan_*` placeholders via Media API, selects a provider (local vLLM or remote), and streams tokens back to the gateway.
+4. Conversations/projects are persisted in PostgreSQL.
+
+### Response Orchestration
+1. Client calls `POST /responses/v1/responses` (streaming optional).
+2. Response API loads the conversation context and iteratively issues `tools/list` / `tools/call` requests to MCP Tools.
+3. Tool executions are capped by `RESPONSE_MAX_TOOL_DEPTH` and `TOOL_EXECUTION_TIMEOUT`.
+4. Final synthesis is delegated to LLM API and streamed back to the caller.
+
+### Media Handling
+1. Upload via `POST /media/v1/media` (remote URL or data URL) or request a presigned upload with `POST /media/v1/media/prepare-upload`.
+2. Media API deduplicates content, issues a `jan_*` ID, and stores metadata in PostgreSQL.
+3. Other services embed the `jan_*` ID; LLM API resolves them to presigned URLs right before inference.
+
+### MCP JSON-RPC
+1. Response API or external automation sends JSON-RPC requests to `POST /v1/mcp`.
+2. MCP Tools validates the method (`tools/list`, `tools/call`, `prompts/*`, `resources/*`) and dispatches to the Serper/SearXNG/SandboxFusion clients.
+3. Results are returned as SSE events (streaming) or plain JSON when the response fits a single chunk.
+
+## 5. Data & Network Topology
+
+- Docker Compose defines two primary networks: `jan-server_default` (Kong + core services + databases) and `jan-server_mcp-network` (MCP-only helpers such as SearXNG, vector store, SandboxFusion).
+- Production deployments should mirror this split using Kubernetes namespaces or NetworkPolicies.
+- Persistent data:
+  - `api-db` (LLM/Response/Media metadata) - each service uses its own schema.
+  - `keycloak-db` - Keycloak realm and client configuration.
+  - Object storage (S3, MinIO, etc.) - Media files and presigned URLs.
+
+## 6. Deployment Modes
+
+| Mode | Description | Commands |
+|------|-------------|----------|
+| **Local (recommended)** | `make quickstart` prompts for providers, writes `.env`, and runs `docker compose up` with all services. | `make quickstart` |
+| **Profiles** | Start a subset of services (API only, MCP only, GPU inference). | `make up-api`, `make up-mcp`, `make up-gpu` |
+| **Monitoring stack** | Optional Prometheus/Grafana/Jaeger. | `make monitor-up` |
+| **Kubernetes** | Use `k8s/jan-server` Helm chart. Values mirror `pkg/config` defaults. | `helm install jan ./k8s/jan-server -f values.yaml` |
+
+## 7. Change Impact Checklist
+
+When modifying the system architecture:
+1. Update the relevant service README and API docs.
+2. Reflect new ports/paths in Kong configuration.
+3. Adjust `docs/architecture/services.md` and `docs/architecture/data-flow.md`.
+4. Regenerate configuration artifacts (`make config-generate`) if `pkg/config` changes.
+5. Update Kubernetes values and Helm defaults as needed.
+
+---
+
+**Maintainer:** Jan Server Architecture Group - **Last Reviewed:** November 2025
+
+
+
diff --git a/docs/architecture/test-flows.md b/docs/architecture/test-flows.md
new file mode 100644
index 00000000..0bd61000
--- /dev/null
+++ b/docs/architecture/test-flows.md
@@ -0,0 +1,606 @@
+# Test Flows Architecture & Diagrams
+
+**Generated**: November 11, 2025
+
+This document provides visual representations of test flows, dependencies, and service interactions across the jan-server test suite. See [System Design](system-design.md) for the complete system architecture.
+
+---
+
+## Overview
+
+The jan-server test suite consists of **6** Postman collections with 100+ individual test cases covering:
+- Authentication flows (guest, JWT, API keys)
+- Conversation management and projects
+- Tool orchestration via MCP
+- Media file operations
+- Response generation with tool calling
+- Full regression sweep (`test-all.postman.json`)
+
+All tests follow dependency chains: Health -> Auth -> Setup -> Main Tests -> Cleanup
+
+For complete system architecture diagrams, see [System Design](system-design.md).
+
+---
+
+## Test Collections Overview
+
+### 1. Auth & LLM API Tests (`auth-postman-scripts.json`)
+
+**Focus**: Authentication flows and LLM API validation
+
+```
+Health Checks
+ v
+Setup [Guest Token] -> [Keycloak Admin] -> [Create User] -> [Set Password]
+ v
+Main Tests (Parallel)
++- LLM API - Guest Token [List Models, Get Details, Chat]
++- LLM API - User Token [List Models]
++- Guest Login Flow [Request Token, Upgrade Account]
++- JWT Login Flow [Keycloak Auth, User Management]
++- API Key Flow [Create, List, Use, Revoke]
+ v
+Cleanup [Delete User]
+```
+
+**8 Flows, 20+ Test Cases**
+
+---
+
+### 2. Conversations Tests (`conversations-postman-scripts.json`)
+
+**Focus**: Conversation and project management
+
+```
+Health & Auth Setup
+ v
+Model Discovery [List Available Models]
+ v
+Project Management (Parallel)
++- Create Projects (3 types)
++- CRUD Operations
++- List & Pagination
++- Update (Name, Instructions, Favorite, Archive)
++- Validation Tests
+ v
+Conversation Flow
++- Create Conversation
++- Verify Title
++- Start Chat (First Message)
++- Continue Chat (Follow-ups)
++- Get Details
++- List Conversations
+ v
+Cleanup [Delete All Resources]
+```
+
+**3 Flows, 30+ Test Cases**
+
+---
+
+### 3. MCP Tools Tests (`mcp-postman-scripts.json`)
+
+**Focus**: Model Context Protocol tool orchestration
+
+```
+Guest Authentication
+ v
+Tool Discovery [List Available Tools]
+ v
+Individual Tool Tests
++- Serper Search [Query with domain filters]
++- Web Scraping [Scrape URLs]
++- File Search Index [Index & Query documents]
++- Python Execution [Sandboxed code execution]
++- SearXNG Direct [Meta-search integration]
+```
+
+**2 Flows, 8+ Test Cases**
+
+---
+
+### 4. Media API Tests (`media-postman-scripts.json`)
+
+**Focus**: File upload, storage, and resolution
+
+```
+Authentication
+ v
+Upload Operations
++- Presigned URL Generation
++- Remote URL Ingestion
++- Data URL Ingestion
++- Deduplication Testing
+ v
+Resolution & Download
++- Payload Resolution (with jan_* placeholders)
++- Direct Stream Download
++- Error Cases (404, 400, 401)
+```
+
+**11 Test Cases**
+
+---
+
+### 5. Response API Tests (`responses-postman-scripts.json`)
+
+**Focus**: Response generation with tool orchestration
+
+```
+Authentication & Setup
+ v
+Health & Service Checks
++- Response API Health
++- MCP Tools Availability
++- LLM API Smoke Test
+ v
+Response Generation (Parallel)
++- Basic Text Responses [No tools]
++- Single Tool Calling [Search integration]
++- Multi-Step Tool Chains [Search + Scrape]
++- File Search Workflows [Index + Query]
++- Conversation Continuity [Multi-turn with context]
++- Error Handling [Invalid tools, missing params]
++- Complex Scenarios [Search + Scrape + Analyze]
+```
+
+**9 Flows, 25+ Test Cases**
+
+---
+
+### 6. Full Regression (`test-all.postman.json`)
+
+**Focus**: Executes all other collections sequentially for CI/regression.
+
+```
+Bootstrap (Health + Auth)
+ v
+[Auth Collection]
+ v
+[Conversations Collection]
+ v
+[Media Collection]
+ v
+[MCP Tools Collection]
+ v
+[Response API Collection]
+ v
+Cleanup + Report
+```
+
+**1 Flow, 100+ Assertions**
+
+Use this collection when running `make test-all` or in CI pipelines-it reuses the shared environment file and preserves the dependency order shown above.
+
+---
+
+## Test Flow Sequence Diagrams
+
+### Authentication Sequence
+
+```
+Test Runner Services
+ | |
+ +--> Health Check -> LLM API
+ <-- 200 OK <--+
+ | |
+ +--> Guest Login -> /auth/guest-login
+ <-- {access_token,...} <--+
+ | |
+ +--> Get Keycloak Token -> Keycloak
+ <-- {admin_token} <--+
+ | |
+ +--> Create User -> /admin/realms/{realm}/users
+ <-- 201 Created <--+
+ | |
+ +--> Set Password -> /admin/realms/{realm}/users/{id}
+ <-- 204 No Content <--+
+ | |
+ +--> Obtain User Token -> /realms/{realm}/token
+ <-- {user_token} <--+
+ | |
+ +--> Main Tests -> [Services]
+```
+
+---
+
+### Conversation Flow
+
+```
+Test Runner Services
+ | |
+ +--> Authenticate -> LLM API
+ <-- {access_token} <--+
+ | |
+ +--> List Models -> /v1/models
+ <-- [{model},...] <--+
+ | |
+ +--> Create Project -> /v1/projects
+ <-- {project_id} <--+
+ | |
+ +--> Create Conversation -> /v1/conversations
+ <-- {conversation_id} <--+
+ | |
+ +--> Start Chat -> /v1/chat/completions
+ | (with conversation) (with conversation.id)
+ <-- {message, choices} <--+
+ | |
+ +--> Continue Chat -> /v1/chat/completions
+ | (with history) (with prior messages)
+ <-- {message, choices} <--+
+ | |
+ +--> Cleanup -> [Delete Resources]
+```
+
+---
+
+### Response API with Tool Calling
+
+```
+Test Runner Services
+ | |
+ +--> Health Checks -> Response API, MCP Tools
+ <-- OK <--+
+ | |
+ +--> Create Response -> Response API
+ | (with tool config) /responses (POST)
+ <-- {response_id} <--+
+ | |
+ | Response Service Calls Tools (Internal)
+ | +-> MCP Tools
+ | | /tools/call (search)
+ | | <- {results}
+ | |
+ | +-> MCP Tools
+ | | /tools/call (scrape)
+ | | <- {content}
+ | |
+ | +-> LLM API
+ | /v1/chat/completions
+ | <- {final_response}
+ |
+ +--> Get Response -> /responses/{id}
+ <-- {id, content,...} <--+
+ | |
+ +--> Verify Results OK Success
+```
+
+---
+
+### Media Processing Flow
+
+```
+Test Runner Services
+ | |
+ +--> Authenticate -> LLM API
+ <-- {access_token} <--+
+ | |
+ +--> Get Presigned URL -> Media API
+ +--> /media/presign (Client uploads to S3/Object Storage)
+ <-- {presigned_url} <--+
+ | |
+ +--> Ingest from URL -> Media API
+ +--> /media/ingest (source=url)
+ <-- {media_id, hash} <--+
+ | |
+ +--> Test Deduplication -> Media API
+ +--> /media/ingest (same url)
+ <-- {media_id: same, deduped: true} <--+
+ | |
+ +--> Resolve Placeholder -> Media API
+ +--> /media/resolve ({{jan_media_{id}}})
+ <-- {content: resolved_url} <--+
+ | |
+ +--> Download -> Media API
+ /media/{id} (Stream binary data)
+```
+
+---
+
+### MCP Tools Workflow
+
+```
+Test Runner Services
+ | |
+ +--> List Tools -> MCP Tools
+ | /tools/list
+ <-- {tools: [...]} <--+
+ | |
+ +--> Execute Serper -> MCP Tools
+ | /tools/call (search) (calls Serper API)
+ <-- {results: [...]} <--+
+ | |
+ +--> Execute Scrape -> MCP Tools
+ | /tools/call (scrape) (fetches URL content)
+ <-- {content} <--+
+ | |
+ +--> Index Documents -> MCP Tools
+ | /tools/call (index) (builds search index)
+ <-- {indexed_id, chunks} <--+
+ | |
+ +--> Query Index -> MCP Tools
+ | /tools/call (query) (searches index)
+ <-- {results: [...]} <--+
+ | |
+ +--> Execute Python -> MCP Tools
+ /tools/call (exec) (sandboxed execution)
+```
+
+---
+
+## Test Dependency Matrix
+
+```
++----------------------------------------------------------------+
+| TEST DEPENDENCY HIERARCHY |
++----------------------------------------------------------------+
+
+Level 0: Health Checks
++- Verify all services are running
+
+Level 1: Authentication
++- Guest Token Generation
++- Keycloak Integration
++- JWT Token Generation
++- API Key Management
+
+Level 2: Resource Discovery & Setup
++- List Available Models
++- Create Projects
++- Create Conversations
++- Initialize Test Data
+
+Level 3: Functional Tests (Can run in parallel)
++- Conversation Operations
++- Chat Completions
++- Tool Calling & Orchestration
++- Media File Operations
++- Response Generation
+
+Level 4: Integration Tests
++- Multi-step Workflows
++- Cross-service Interactions
++- Conversation Continuity
++- Tool Chaining
+
+Level 5: Cleanup
++- Delete All Test Resources
+```
+
+---
+
+## Service Communication Map
+
+```
+ +----------------------+
+ | TEST RUNNER |
+ | (jan-cli api-test) |
+ +----------------------+
+ |
+ +---------------+---------------+
+ | | |
+ v v v
+ +------------+ +----------+ +----------+
+ | Kong | |Keycloak | |SearXNG |
+ | (Gateway) | |(Auth) | |(Search) |
+ +------+-----+ +----+-----+ +----+-----+
+ | | |
+ +----------+--------------+--------------+
+ | | |
+ v v v
++----------+ +----------+ +------------+
+| LLM API |<--> MCP Tools | (external) |
+|:8080 | |:8091 | |
++----+-----+ +-----+----+ +------------+
+ | |
+ | +--------+
+ | v
+ +-> Media API:8081
+ |
+ +-> Response API:8082
+ |
+ +-> PostgreSQL (persistent storage)
+```
+
+---
+
+## Test Data Flow
+
+```
++-----------------------------------------------------------------+
+| POSTMAN COLLECTION VARIABLES |
+| |
+| SETUP PHASE
+| +- guest_access_token <- /auth/guest-login
+| +- test_user_id <- /admin/realms/jan/users
+| +- kc_admin_access_token <- /realms/master/token
+| +- model_id <- /v1/models (first item)
+| |
+| LLM API PHASE
+| +- project_id_1,2,3 <- /v1/projects (POST)
+| +- conversation_id <- /v1/conversations (POST)
+| +- conversation_title <- GET /v1/conversations/{id}
+| |
+| RESPONSE API PHASE
+| +- response_id <- /responses (POST with tools)
+| +- tool_result <- MCP /tools/call
+| +- response_content <- GET /responses/{id}
+| |
+| MEDIA API PHASE
+| +- presigned_url <- /media/presign
+| +- media_id <- /media/ingest
+| +- resolved_content <- /media/resolve
+| |
+| CLEANUP PHASE
+| +- DELETE /v1/conversations/{id}
+| +- DELETE /v1/projects/{id}
+| +- DELETE /users/{test_user_id}
+| |
++-----------------------------------------------------------------+
+```
+
+---
+
+## Error Handling Architecture
+
+```
++------------------------+--------------+-------------------------+
+| Error Scenario | HTTP Status | Test Assertion |
++------------------------+--------------+-------------------------+
+| Service Unavailable | 503 | Retry or Fail Fast |
+| Invalid Token | 401 | Verify Rejection |
+| Insufficient Perms | 403 | Verify Denial |
+| Resource Not Found | 404 | Expected for cleanup |
+| Invalid Input | 400 or 422 | Validate Error Message |
+| Resource Conflict | 409 | Handle Duplicate Create |
+| Rate Limited | 429 | Implement Backoff |
+| Internal Error | 500 | Retry or Report |
+| Timeout | - | Extend Timeout |
+| Connection Refused | - | Ensure Service Running |
++------------------------+--------------+-------------------------+
+```
+
+---
+
+## Workflow State Machine
+
+```
+ START
+ |
+ v
++--------------+
+|Health Check |--NO---> FAIL (Service Down)
++------+-------+
+ |YES
+ v
++------------------+
+|Authenticate |--NO---> FAIL (Auth Error)
+|(Get Token) |
++------+-----------+
+ |YES
+ v
++------------------+
+|Setup Resources |--NO---> FAIL (Setup Error)
+|(Projects, Docs) |
++------+-----------+
+ |YES
+ v
++------------------------------------------+
+|Execute Main Tests (Parallel/Serial) |
+|- Conversations |
+|- Tool Calls |
+|- Media Operations |
+|- Error Scenarios |
++------+-----------------------------------+
+ |
+ +----+------+
+ |YES NO |
+ v v
++--------+ +--------+
+|CLEANUP | |CLEANUP |
+|SUCCESS | |& FAIL |
+| Tests | | Tests |
++----+---+ +----+---+
+ | |
+ +----+-----+
+ v
++------------------+
+|Generate Report |
+|- Pass/Fail |
+|- Assertions |
+|- Timing |
+|- Coverage |
++------+-----------+
+ v
+ +-----+
+ | END |
+ +-----+
+```
+
+---
+
+## Test Execution Timeline
+
+```
+Timeline: 0s 5s 10s 15s 20s
+
+Auth Flow: [Health ]->[Auth ]->[Models ]->[Setup ]
+ v
+Conversations: +-----------------[Project Mgmt ]
+ | v
+ | [Conversation Create->Get->Chat->Continue ]
+ v
+Media API: | [Ingest->Dedup->Resolve->Download ]
+ | v
+Response API: | [Basic Response][Tool Call][Multi-step]
+ | v
+MCP Tools: | [Search][Scrape][IndexQuery][Python]
+ |
+Cleanup: +---------------[Delete Resources]
+
+Total: ~15-25 seconds (depends on service latency)
+```
+
+---
+
+## Test Coverage Summary
+
+| Component | Tests | Coverage |
+|-----------|-------|----------|
+| Authentication | 8 | Guest, JWT, API Keys |
+| Conversations | 14 | CRUD, Pagination, Validation |
+| Projects | 8 | CRUD, State Management |
+| Chat Completion | 3 | Basic Usage, Conversation |
+| Models | 2 | Listing, Details |
+| Tool Calling | 8 | Search, Scrape, Index, Exec |
+| Media Upload | 3 | URL, DataURL, Dedup |
+| Media Download | 2 | Streaming, Error Cases |
+| Error Handling | 5 | Invalid Input, Missing Auth |
+| **TOTAL** | **100+** | **Comprehensive (see `test-all.postman.json`)** |
+
+---
+
+## Integration Points
+
+### With System Design
+See [System Design](system-design.md) for:
+- Architecture layers
+- Service responsibilities
+- Data flow patterns
+- Deployment strategies
+
+### With Services
+See [Services](services.md) for:
+- LLM API details
+- Response API details
+- Media API details
+- MCP Tools details
+
+### With Security
+See [Security](security.md) for:
+- Authentication mechanisms
+- Authorization patterns
+- API key management
+- Token validation
+
+### With Data Flow
+See [Data Flow](data-flow.md) for:
+- Request/response patterns
+- Data transformation
+- Persistence strategies
+
+---
+
+## Related Documentation
+
+- **Main Architecture Index**: See `/docs/architecture/README.md`
+
+---
+
+**Last Updated**: November 11, 2025 
+**Document Type**: Architecture Reference - Testing 
+**Target Audience**: QA Engineers, Developers, DevOps 
+**Maintainer**: Jan-Server Team
+
+
+
diff --git a/docs/configuration/README.md b/docs/configuration/README.md
new file mode 100644
index 00000000..57b555e1
--- /dev/null
+++ b/docs/configuration/README.md
@@ -0,0 +1,394 @@
+# Configuration System
+
+Jan Server uses a simple configuration system with default values that you can override.
+
+## Why This Matters
+
+- **Safe:** Catches configuration errors before startup
+- **Clear:** Easy to see what settings are available
+- **Flexible:** Works in development and production
+- **Validated:** Tells you exactly what's wrong if config is invalid
+
+## How It Works
+
+Configuration is loaded in this order (later overrides earlier):
+
+1. **YAML defaults** - Built-in sensible defaults (`config/defaults.yaml`)
+2. **Environment file** - Your specific settings (`config/production.yaml`)
+3. **Environment variables** - Highest priority (great for secrets)
+
+## Quick Commands
+
+```bash
+# Check if your configuration is valid
+jan-cli config validate
+
+# See all current settings
+jan-cli config export
+
+# See settings for one service
+jan-cli config show llm-api
+```
+
+For the full set of commands (for example `config generate`, `config k8s-values`, and `config drift`), see the [Jan CLI Guide](../guides/jan-cli.md).
+
+## Documentation Structure
+
+This directory contains configuration system implementation details:
+
+| Document | Description |
+|----------|-------------|
+| [precedence.md](precedence.md) | Configuration precedence rules and loading order |
+| [env-var-mapping.md](env-var-mapping.md) | Environment variable to config mapping |
+| [docker compose.md](docker compose.md) | Docker Compose integration |
+| [kubernetes.md](kubernetes.md) | Kubernetes Helm values generation |
+| [service-migration.md](service-migration.md) | Migrating services to new config system |
+
+**For user-facing documentation:**
+- **[Jan CLI Guide](../guides/jan-cli.md)** - Command-line tool for configuration management
+- **[Testing Guide](../guides/testing.md)** - Cross-platform testing procedures
+
+## Configuration Files
+
+### defaults.yaml
+
+Base configuration with sensible defaults for all environments:
+
+```yaml
+environment: development
+
+services:
+ llm-api:
+ http:
+ port: 8080
+ timeout: 30s
+ database:
+ dsn: postgres://jan_user:jan_password@localhost:5432/jan_llm_api
+ max_idle_conns: 10
+ max_open_conns: 30
+ auth:
+ enabled: true
+ issuer: http://localhost:8085/realms/jan
+```
+
+### Environment-specific YAMLs
+
+Optional overrides live in `config/environments/<environment>.yaml`. The directory is not created by default---add it as needed:
+
+```yaml
+# config/environments/production.yaml
+environment: production
+
+services:
+ llm-api:
+ http:
+ timeout: 60s
+ database:
+ max_idle_conns: 20
+ max_open_conns: 100
+ observability:
+ enabled: true
+ endpoint: https://otel-collector.prod.example.com
+```
+
+When you run the loader with `environment=production`, the stack becomes:
+1. Struct defaults (priority 100)
+2. `config/defaults.yaml` (priority 200)
+3. `config/environments/production.yaml` (priority 300) --- **create this file**
+4. Environment variables (priority 500)
+
+### Environment Files (`.env`)
+
+The repo ships with ready-made `.env` templates for Docker workflows:
+
+| File | Purpose |
+|------|---------|
+| `.env.template` | Base template used by `make quickstart` |
+| `.env` | Generated interactive configuration (git-ignored) |
+| `config/production.env.example` | Example values for production CI/CD |
+| `config/secrets.env.example` | Placeholder for sensitive values (copy to `config/secrets.env`) |
+
+Use `make quickstart` or `make setup` to populate `.env`; copy the example files when preparing staging/production pipelines.
+
+### Environment Variables
+
+Highest priority - override any YAML setting:
+
+```bash
+# Override HTTP port
+export LLM_API_HTTP_PORT=9090
+
+# Override database connection
+export LLM_API_DATABASE_DSN=postgres://user:pass@prod-db:5432/db
+
+# Override observability
+export LLM_API_OBSERVABILITY_ENABLED=true
+```
+
+## Common Tasks
+
+### Adding a New Configuration Field
+
+1. **Update Go struct** in `pkg/config/types.go`:
+ ```go
+ type HTTPConfig struct {
+ Port int `yaml:"port" env:"HTTP_PORT" default:"8080"`
+ Timeout time.Duration `yaml:"timeout" env:"HTTP_TIMEOUT" default:"30s"`
+ // Add new field
+ MaxBodySize int64 `yaml:"max_body_size" env:"HTTP_MAX_BODY_SIZE" default:"10485760"`
+ }
+ ```
+
+2. **Regenerate config files**:
+ ```bash
+ make config-generate
+ ```
+
+3. **Update defaults.yaml** (if auto-generated values aren't sufficient):
+ ```yaml
+ services:
+ llm-api:
+ http:
+ max_body_size: 10485760 # 10 MB
+ ```
+
+4. **Test your changes**:
+ ```bash
+ make config-test
+ jan-cli config validate
+ ```
+
+### Validating Configuration
+
+```bash
+# Validate current configuration
+jan-cli config validate
+
+# Validate with specific environment
+ENVIRONMENT=production jan-cli config validate
+
+# Check for configuration drift (CI/CD)
+make config-drift-check
+```
+
+### Generating Kubernetes Values
+
+```bash
+# Generate values for all environments
+jan-cli config k8s-values --env development > k8s/jan-server/values-development.yaml
+jan-cli config k8s-values --env production > k8s/jan-server/values-production.yaml
+
+# Generate with overrides
+jan-cli config k8s-values --env production \
+ --set services.llm-api.replicas=3 \
+ --set services.llm-api.resources.limits.memory=2Gi \
+ > k8s/values-prod-scaled.yaml
+```
+
+## Architecture
+
+### Package Structure
+
+```
+pkg/config/
++-- types.go # Configuration structs (source of truth)
++-- loader.go # YAML + env loading logic
++-- validation.go # Validation rules
++-- provenance.go # Track config source
++-- env.go # Environment variable helpers
++-- k8s/
+| +-- values_generator.go # Helm values generator
++-- testdata/ # Test fixtures
+
+config/
++-- defaults.yaml # Auto-generated base defaults
++-- development.yaml # Dev overrides (optional)
++-- staging.yaml # Staging overrides (optional)
++-- production.yaml # Production overrides (optional)
+
+cmd/jan-cli/
++-- main.go # CLI tool
+```
+
+### Design Principles
+
+1. **Single Source of Truth:** Go structs define all configuration
+2. **Auto-Generation:** YAML, JSON Schema, and docs generated from code
+3. **Fail Fast:** Validation at startup prevents runtime errors
+4. **Environment Parity:** Same config structure across all environments
+5. **Override by Exception:** Defaults work everywhere, override only what's different
+6. **Explicit Over Implicit:** No magic values or hidden defaults
+
+## Migration Guide
+
+Migrating from old environment-variable-only approach:
+
+### Before (Old Way)
+
+```go
+type Config struct {
+ HTTPPort int `env:"HTTP_PORT" envDefault:"8080"`
+ DatabaseURL string `env:"DATABASE_URL" envDefault:"postgres://..."`
+ LogLevel string `env:"LOG_LEVEL" envDefault:"info"`
+ AuthEnabled bool `env:"AUTH_ENABLED" envDefault:"false"`
+ //... 50+ more variables
+}
+```
+
+**Problems:**
+- 50+ environment variables per service
+- No validation until runtime
+- Hard to see effective configuration
+- Difficult to manage across environments
+- No documentation of what's actually used
+
+### After (New Way)
+
+```go
+import "jan-server/pkg/config"
+
+cfg, _:= config.Load()
+serviceCfg, _:= cfg.GetServiceConfig("llm-api")
+
+// Type-safe access
+port:= serviceCfg.HTTP.Port
+dbDSN:= serviceCfg.Database.DSN
+```
+
+**Benefits:**
+- ~10 environment variables per service (only overrides)
+- Validated at load time
+- `jan-cli config export` shows effective config
+- YAML files for environment differences
+- Auto-generated documentation
+
+See [service-migration.md](service-migration.md) for detailed migration steps.
+
+## Best Practices
+
+### 1. Use Defaults for Common Values
+
+Put shared defaults in `config/defaults.yaml`:
+
+```yaml
+# Good: Shared defaults
+services:
+ llm-api:
+ http:
+ timeout: 30s
+ port: 8080
+```
+
+### 2. Override Only What's Different
+
+Environment-specific files should be minimal:
+
+```yaml
+# config/production.yaml - Only overrides
+services:
+ llm-api:
+ database:
+ max_open_conns: 100 # Higher for production
+ observability:
+ enabled: true
+```
+
+### 3. Use Environment Variables for Secrets
+
+Never put secrets in YAML files:
+
+```bash
+#.env or CI/CD secrets
+export LLM_API_DATABASE_DSN=postgres://user:${DB_PASSWORD}@prod-db/db
+export LLM_API_AUTH_CLIENT_SECRET=${KEYCLOAK_CLIENT_SECRET}
+```
+
+### 4. Validate Early
+
+Add validation to your config loading:
+
+```go
+cfg, err:= config.Load()
+if err != nil {
+ log.Fatal("invalid configuration: %w", err)
+}
+```
+
+### 5. Use CLI Tools in CI/CD
+
+Prevent configuration drift:
+
+```yaml
+#.github/workflows/ci.yml
+- name: Validate configuration
+ run: |
+ make config-drift-check
+ jan-cli config validate
+```
+
+## Troubleshooting
+
+### Configuration Not Loading
+
+```bash
+# Check what's being loaded
+jan-cli config show llm-api
+
+# Validate configuration
+jan-cli config validate
+
+# Export effective config
+jan-cli config export
+```
+
+### Environment Variable Not Working
+
+Check the naming convention - use service prefix:
+
+```bash
+# Wrong
+export HTTP_PORT=9090
+
+# Correct
+export LLM_API_HTTP_PORT=9090
+```
+
+### Kubernetes Values Not Applying
+
+Regenerate values after config changes:
+
+```bash
+make config-generate
+jan-cli config k8s-values --env production > k8s/values-prod.yaml
+helm upgrade jan-server k8s/jan-server -f k8s/values-prod.yaml
+```
+
+## Reference
+
+### Documentation
+- **CLI Guide:** [docs/guides/jan-cli.md](../guides/jan-cli.md) - Installation, usage, and examples
+- **CLI Command Reference:** [cmd/jan-cli/README.md](../../cmd/jan-cli/README.md)
+- **Configuration Types:** [pkg/config/README.md](../../pkg/config/README.md)
+
+### Code References
+- **Go Package:** `pkg/config/` in workspace root
+- **Default Config:** [config/defaults.yaml](../../config/defaults.yaml)
+- **JSON Schema:** [config-schema.json](config-schema.json) (auto-generated)
+
+## Examples
+
+See working examples in:
+- **LLM API:** `services/llm-api/internal/config/`
+- **Template API:** `services/template-api/internal/config/` (shows both approaches)
+- **MCP Tools:** `services/mcp-tools/configs/`
+
+---
+
+**Need help?** See [service-migration.md](service-migration.md) or check existing service implementations for patterns.
+
+
+
+
+
+
+
diff --git a/docs/configuration/docker-compose.md b/docs/configuration/docker-compose.md
new file mode 100644
index 00000000..1cae0e42
--- /dev/null
+++ b/docs/configuration/docker-compose.md
@@ -0,0 +1,82 @@
+# Docker Compose Generation
+
+## Overview
+
+This document describes how docker-compose files are managed in relation to the configuration system.
+
+## Current Approach
+
+Instead of generating docker-compose files from YAML config, we maintain the compose files directly with references to the config system:
+
+1. **Infrastructure** (`docker/infrastructure.yml`) - PostgreSQL, Keycloak, Kong, shared networks/volumes.
+2. **API Services** (`docker/services-api.yml`) - llm-api, media-api, response-api.
+3. **MCP Services** (`docker/services-mcp.yml`) - mcp-tools, vector-store, sandbox helpers.
+4. **Observability** (`docker/observability.yml`) - Prometheus, Grafana, Jaeger, OTEL collector.
+5. **Inference** (`docker/inference.yml`) - vLLM GPU/CPU profiles.
+6. **Development overlay** (`docker/dev-full.yml`) - adds `host.docker.internal` mapping for hybrid workflows.
+
+The root `docker-compose.yml` stitches the profiles together (infrastructure + services + MCP + observability). Profiles such as `full`, `mcp`, `monitor`, and `dev-full` map directly to the files above.
+
+## Configuration Integration
+
+Each service in docker-compose references the standardized environment variables. Most values are pulled from `.env` (generated by `make quickstart`) and fall back to the defaults defined in `config/defaults.yaml` and `pkg/config/types.go`:
+
+```yaml
+services:
+ llm-api:
+ environment:
+ # Database - constructed DSN from config defaults
+ DB_POSTGRESQL_WRITE_DSN: "postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@api-db:5432/${POSTGRES_DB}?sslmode=disable"
+ 
+ # All other variables reference config/defaults.yaml structure
+ HTTP_PORT: ${HTTP_PORT:-8080}
+ LOG_LEVEL: ${LOG_LEVEL:-info}
+```
+
+## Why Direct Maintenance?
+
+1. **Simplicity** - Docker Compose is already declarative and easy to read
+2. **Flexibility** - Allows docker-specific optimizations (healthchecks, extra hosts, bind mounts)
+3. **Version Control** - Changes are clearly visible in git diffs
+4. **No Generation Overhead** - No build step required
+5. **Profiles** - Different dev/prod/monitoring stacks can be launched with a single `make` target
+
+## Future: Optional Generation
+
+If needed, a generator can be built using `pkg/config/compose/generator.go` that:
+- Reads `config/defaults.yaml`
+- Applies environment overrides
+- Generates docker-compose YAML files
+- Validates output
+
+## Validation
+
+To validate compose files:
+
+```bash
+# Validate syntax
+docker compose -f docker/infrastructure.yml config
+
+# Validate with current environment
+docker compose -f docker-compose.yml config
+
+# Dry-run full stack
+docker compose --profile full config
+
+# Verify dev overlay
+docker compose --profile dev-full config
+```
+
+Typical networks and volumes:
+- Networks: `jan-server_default` (core), `jan-server_mcp-network` (MCP helpers)
+- Volumes: `api-db-data`, `keycloak-db-data`, `vector-store-data`, `grafana-data`
+
+All of the above are declared in the compose snippets so they can be inspected with `docker compose config`.
+
+## Sprint 4 Status
+
+OK **COMPLETE** - Docker compose files are consistent with config system
+OK **COMPLETE** - All services use standardized environment variables 
+OK **COMPLETE** - Validation process documented
+
+**Rationale:** Direct maintenance is simpler and more maintainable than generation for this use case. The generator infrastructure exists in `pkg/config/compose/` if needed in the future.
diff --git a/docs/configuration/env-var-mapping.md b/docs/configuration/env-var-mapping.md
new file mode 100644
index 00000000..bbbfe7e2
--- /dev/null
+++ b/docs/configuration/env-var-mapping.md
@@ -0,0 +1,279 @@
+# Environment Variable Mapping
+
+This document maps centralized configuration (`pkg/config/types.go`) environment variables to service-specific variables, facilitating the Sprint 3 migration.
+
+## Infrastructure
+
+### Database (PostgreSQL)
+
+| Centralized Env Var | Type | Default | Services Using | Notes |
+|---------------------|------|---------|----------------|-------|
+| `POSTGRES_HOST` | string | `api-db` | llm-api | Replaces `DATABASE_URL` component |
+| `POSTGRES_PORT` | int | `5432` | llm-api | Replaces `DATABASE_URL` component |
+| `POSTGRES_USER` | string | `jan_user` | llm-api | Replaces `DATABASE_URL` component |
+| `POSTGRES_PASSWORD` | string | `jan_password` | llm-api | From secrets, replaces `DATABASE_URL` component |
+| `POSTGRES_DB` | string | `jan_llm_api` | llm-api | Replaces `DATABASE_URL` component |
+| `POSTGRES_SSL_MODE` | string | `disable` | llm-api | Replaces `DATABASE_URL` component |
+| `POSTGRES_MAX_CONNECTIONS` | int | `100` | llm-api | New standardized var |
+| `POSTGRES_MAX_IDLE_CONNS` | int | `5` | llm-api | New standardized var |
+| `POSTGRES_MAX_OPEN_CONNS` | int | `15` | llm-api | New standardized var |
+| `DB_CONN_MAX_LIFETIME` | duration | `30m` | llm-api | OK Already aligned |
+
+**Migration Notes:**
+- Services currently using `DATABASE_URL` should transition to component-based env vars
+- Connection URL is built from components: `postgres://user:password@host:port/database?sslmode=disable`
+- This allows better secret management (password separate from URL)
+
+### Authentication (Keycloak)
+
+| Centralized Env Var | Type | Default | Services Using | Notes |
+|---------------------|------|---------|----------------|-------|
+| `KEYCLOAK_BASE_URL` | string | `http://keycloak:8085` | llm-api | OK Already aligned |
+| `KEYCLOAK_REALM` | string | `jan` | llm-api | OK Already aligned |
+| `KEYCLOAK_HTTP_PORT` | int | `8085` | Infrastructure | New standardized var |
+| `KEYCLOAK_ADMIN` | string | `admin` | llm-api | OK Already aligned |
+| `KEYCLOAK_ADMIN_PASSWORD` | string | (secret) | llm-api | OK Already aligned |
+| `KEYCLOAK_ADMIN_REALM` | string | `master` | llm-api | OK Already aligned |
+| `KEYCLOAK_ADMIN_CLIENT_ID` | string | `admin-cli` | llm-api | OK Already aligned |
+| `BACKEND_CLIENT_ID` | string | `backend` | llm-api | OK Already aligned |
+| `BACKEND_CLIENT_SECRET` | string | (secret) | llm-api | OK Already aligned |
+| `CLIENT` | string | `jan-client` | llm-api | OK Already aligned |
+| `OAUTH_REDIRECT_URI` | string | `http://localhost:8000/auth/callback` | llm-api | OK Already aligned |
+| `JWKS_URL` | string | (computed) | llm-api | OK Already aligned |
+| `OIDC_DISCOVERY_URL` | string | (computed) | llm-api | New standardized var |
+| `ISSUER` | string | `http://localhost:8085/realms/jan` | llm-api | OK Already aligned |
+| `ACCOUNT` | string | `account` | llm-api | OK Already aligned |
+| `JWKS_REFRESH_INTERVAL` | duration | `5m` | llm-api | OK Already aligned |
+| `AUTH_CLOCK_SKEW` | duration | `60s` | llm-api | OK Already aligned |
+| `GUEST_ROLE` | string | `guest` | llm-api | OK Already aligned |
+| `KEYCLOAK_FEATURES` | []string | `token-exchange,preview` | Infrastructure | New standardized var |
+
+### Gateway (Kong)
+
+| Centralized Env Var | Type | Default | Services Using | Notes |
+|---------------------|------|---------|----------------|-------|
+| `KONG_HTTP_PORT` | int | `8000` | Infrastructure | New standardized var |
+| `KONG_ADMIN_PORT` | int | `8001` | Infrastructure | New standardized var |
+| `KONG_ADMIN_URL` | string | `http://kong:8001` | llm-api | OK Already aligned |
+| `KONG_LOG_LEVEL` | string | `info` | Infrastructure | New standardized var |
+
+## Services
+
+### LLM API
+
+| Centralized Env Var | Type | Default | Current Var | Status |
+|---------------------|------|---------|-------------|--------|
+| `HTTP_PORT` | int | `8080` | `HTTP_PORT` | OK Aligned |
+| `METRICS_PORT` | int | `9091` | `METRICS_PORT` | OK Aligned |
+| `LOG_LEVEL` | string | `info` | `LOG_LEVEL` | OK Aligned |
+| `LOG_FORMAT` | string | `json` | `LOG_FORMAT` | OK Aligned |
+| `AUTO_MIGRATE` | bool | `true` | `AUTO_MIGRATE` | OK Aligned |
+| `API_KEY_PREFIX` | string | `sk_live` | `API_KEY_PREFIX` | OK Aligned |
+| `API_KEY_DEFAULT_TTL` | duration | `2160h` | `API_KEY_DEFAULT_TTL` | OK Aligned |
+| `API_KEY_MAX_TTL` | duration | `2160h` | `API_KEY_MAX_TTL` | OK Aligned |
+| `API_KEY_MAX_PER_USER` | int | `5` | `API_KEY_MAX_PER_USER` | OK Aligned |
+| `MODEL_PROVIDER_SECRET` | string | `jan-model-provider-secret-2024` | `MODEL_PROVIDER_SECRET` | OK Aligned |
+| `MODEL_SYNC_ENABLED` | bool | `true` | `MODEL_SYNC_ENABLED` | OK Aligned |
+| `MODEL_SYNC_INTERVAL_MINUTES` | int | `60` | `MODEL_SYNC_INTERVAL_MINUTES` | OK Aligned |
+| `MEDIA_RESOLVE_URL` | string | `http://kong:8000/media/v1/media/resolve` | `MEDIA_RESOLVE_URL` | OK Aligned |
+| `MEDIA_RESOLVE_TIMEOUT` | duration | `5s` | `MEDIA_RESOLVE_TIMEOUT` | OK Aligned |
+
+**Provider Config:**
+| Centralized Env Var | Type | Default | Current Var | Status |
+|---------------------|------|---------|-------------|--------|
+| `JAN_PROVIDER_CONFIGS_FILE` | string | `config/providers.yml` | `JAN_PROVIDER_CONFIGS_FILE` | TODO Path may differ |
+| `JAN_PROVIDER_CONFIG_SET` | string | `default` | `JAN_PROVIDER_CONFIG_SET` | OK Aligned |
+| `JAN_PROVIDER_CONFIGS` | bool | `true` | `JAN_PROVIDER_CONFIGS` | OK Aligned |
+
+### MCP Tools
+
+| Centralized Env Var | Type | Default | Current Var | Status |
+|---------------------|------|---------|-------------|--------|
+| `MCP_TOOLS_HTTP_PORT` | int | `8091` | `HTTP_PORT` | TODO Need prefix |
+| `MCP_TOOLS_LOG_LEVEL` | string | `info` | `LOG_LEVEL` | TODO Need prefix |
+| `MCP_TOOLS_LOG_FORMAT` | string | `json` | `LOG_FORMAT` | TODO Need prefix |
+| `MCP_SEARCH_ENGINE` | string | `serper` | `SEARCH_ENGINE` | TODO Need prefix |
+| `SEARXNG_URL` | string | `http://searxng:8080` | `SEARXNG_URL` | OK Aligned |
+| `VECTOR_STORE_URL` | string | `http://vector-store:3015` | `VECTOR_STORE_URL` | OK Aligned |
+| `SANDBOXFUSION_URL` | string | `http://sandboxfusion:8080` | `SANDBOXFUSION_URL` | OK Aligned |
+| `MCP_SANDBOX_REQUIRE_APPROVAL` | bool | `true` | `SANDBOX_REQUIRE_APPROVAL` | TODO Need prefix |
+| `MCP_CONFIG_FILE` | string | `configs/mcp-providers.yml` | `MCP_CONFIG_FILE` | OK Aligned |
+
+**Migration Notes:**
+- Add `MCP_` or `MCP_TOOLS_` prefix to disambiguate from other services
+- HTTP_PORT collision with llm-api when running in same environment
+
+### Memory Tools
+
+| Centralized Env Var | Type | Default | Current Var | Status |
+|---------------------|------|---------|-------------|--------|
+| `MEMORY_TOOLS_PORT` | int | `8090` | `MEMORY_TOOLS_PORT` | OK Aligned |
+| `DB_POSTGRESQL_WRITE_DSN` | string | (computed) | `DATABASE_URL` | ✅ Migrated |
+| `DB_POSTGRESQL_READ1_DSN` | string | - | - | ✅ New (optional) |
+| `MEMORY_LOG_LEVEL` | string | `info` | `LOG_LEVEL` | TODO Need prefix |
+| `MEMORY_LOG_FORMAT` | string | `json` | `LOG_FORMAT` | TODO Need prefix |
+| `EMBEDDING_SERVICE_URL` | string | - | `EMBEDDING_SERVICE_URL` | OK Aligned |
+| `EMBEDDING_CACHE_TYPE` | string | `memory` | `EMBEDDING_CACHE_TYPE` | OK Aligned |
+| `EMBEDDING_CACHE_REDIS_URL` | string | `redis://redis:6379/3` | `EMBEDDING_CACHE_REDIS_URL` | OK Aligned |
+| `EMBEDDING_CACHE_KEY_PREFIX` | string | `emb:` | `EMBEDDING_CACHE_KEY_PREFIX` | OK Aligned |
+| `EMBEDDING_CACHE_MAX_SIZE` | int | `10000` | `EMBEDDING_CACHE_MAX_SIZE` | OK Aligned |
+| `EMBEDDING_CACHE_TTL` | duration | `1h` | `EMBEDDING_CACHE_TTL` | OK Aligned |
+
+**Migration Notes:**
+- Database configuration migrated from single `DATABASE_URL` to `DB_POSTGRESQL_WRITE_DSN` and optional `DB_POSTGRESQL_READ1_DSN`
+- Supports read/write splitting for better scalability
+- Can share database with other services or use separate database
+- Read replica is optional; falls back to write DSN if not configured
+
+### Media API
+
+| Centralized Env Var | Type | Default | Current Var | Status |
+|---------------------|------|---------|-------------|--------|
+| `MEDIA_API_PORT` | int | `8285` | `HTTP_PORT` | TODO Need rename |
+| `MEDIA_API_LOG_LEVEL` | string | `info` | `LOG_LEVEL` | TODO Need prefix |
+| `MEDIA_MAX_UPLOAD_BYTES` | int | `20971520` | `MAX_UPLOAD_SIZE` | TODO Rename needed |
+| `MEDIA_RETENTION_DAYS` | int | `30` | `RETENTION_DAYS` | TODO Need prefix |
+| `MEDIA_PROXY_DOWNLOAD` | bool | `true` | `PROXY_DOWNLOAD` | TODO Need prefix |
+| `MEDIA_REMOTE_FETCH_TIMEOUT` | duration | `15s` | `FETCH_TIMEOUT` | TODO Rename needed |
+| `MEDIA_S3_ENDPOINT` | string | `https://s3.menlo.ai` | `S3_ENDPOINT` | TODO Need prefix |
+| `MEDIA_S3_REGION` | string | `us-west-2` | `S3_REGION` | TODO Need prefix |
+| `MEDIA_S3_BUCKET` | string | `platform-dev` | `S3_BUCKET` | TODO Need prefix |
+| `MEDIA_S3_USE_PATH_STYLE` | bool | `true` | `S3_PATH_STYLE` | TODO Rename needed |
+| `MEDIA_S3_PRESIGN_TTL` | duration | `5m` | `PRESIGN_TTL` | TODO Need prefix |
+| `MEDIA_S3_ACCESS_KEY_ID` | string | (secret) | `AWS_ACCESS_KEY_ID` | TODO Rename for clarity |
+| `MEDIA_S3_SECRET_ACCESS_KEY` | string | (secret) | `AWS_SECRET_ACCESS_KEY` | TODO Rename for clarity |
+
+**Migration Notes:**
+- Most env vars need `MEDIA_` prefix to avoid conflicts
+- S3 vars should use `MEDIA_S3_` prefix for clarity
+- Consider AWS credential standardization
+
+### Response API
+
+| Centralized Env Var | Type | Default | Current Var | Status |
+|---------------------|------|---------|-------------|--------|
+| `RESPONSE_API_PORT` | int | `8082` | `HTTP_PORT` | TODO Need rename |
+| `RESPONSE_API_LOG_LEVEL` | string | `info` | `LOG_LEVEL` | TODO Need prefix |
+| `RESPONSE_LLM_API_URL` | string | `http://llm-api:8080` | `LLM_API_URL` | TODO Need prefix |
+| `RESPONSE_MCP_TOOLS_URL` | string | `http://mcp-tools:8091` | `MCP_TOOLS_URL` | TODO Need prefix |
+| `RESPONSE_MAX_TOOL_DEPTH` | int | `8` | `MAX_TOOL_DEPTH` | TODO Need prefix |
+| `RESPONSE_TOOL_TIMEOUT` | duration | `45s` | `TOOL_TIMEOUT` | TODO Need prefix |
+
+## Monitoring
+
+### OpenTelemetry
+
+| Centralized Env Var | Type | Default | Services Using | Status |
+|---------------------|------|---------|----------------|--------|
+| `OTEL_ENABLED` | bool | `false` | All services | OK Standard |
+| `OTEL_SERVICE_NAME` | string | `llm-api` | All services | TODO Service-specific |
+| `OTEL_EXPORTER_OTLP_ENDPOINT` | string | `http://otel-collector:4318` | All services | OK Standard |
+| `OTEL_HTTP_PORT` | int | `4318` | Infrastructure | New |
+| `OTEL_GRPC_PORT` | int | `4317` | Infrastructure | New |
+
+### Prometheus
+
+| Centralized Env Var | Type | Default | Services Using | Status |
+|---------------------|------|---------|----------------|--------|
+| `PROMETHEUS_PORT` | int | `9090` | Infrastructure | New |
+
+### Grafana
+
+| Centralized Env Var | Type | Default | Services Using | Status |
+|---------------------|------|---------|----------------|--------|
+| `GRAFANA_PORT` | int | `3331` | Infrastructure | New |
+| `GRAFANA_ADMIN_USER` | string | `admin` | Infrastructure | New |
+| `GRAFANA_ADMIN_PASSWORD` | string | (secret) | Infrastructure | New |
+
+### Jaeger
+
+| Centralized Env Var | Type | Default | Services Using | Status |
+|---------------------|------|---------|----------------|--------|
+| `JAEGER_UI_PORT` | int | `16686` | Infrastructure | New |
+
+## Inference
+
+### vLLM
+
+| Centralized Env Var | Type | Default | Services Using | Status |
+|---------------------|------|---------|-------------|--------|
+| `VLLM_ENABLED` | bool | `true` | Infrastructure | New |
+| `VLLM_PORT` | int | `8001` | llm-api | New |
+| `VLLM_MODEL` | string | `Qwen/Qwen2.5-0.5B-Instruct` | Infrastructure | New |
+| `VLLM_SERVED_NAME` | string | `qwen2.5-0.5b-instruct` | Infrastructure | New |
+| `VLLM_GPU_UTILIZATION` | float | `0.66` | Infrastructure | New |
+
+## Migration Priority
+
+### Phase 1: Critical (Sprint 3.1)
+OK **Already Aligned - No Changes Needed:**
+- llm-api authentication vars (Keycloak)
+- llm-api API key management
+- llm-api model sync
+- Database connection timeouts
+
+### Phase 2: High Priority (Sprint 3.2)
+TODO **Requires Prefix/Rename:**
+- Service-specific HTTP_PORT -> {SERVICE}_PORT
+- Service-specific LOG_LEVEL -> {SERVICE}_LOG_LEVEL
+- Database URL components (transition from DATABASE_URL)
+
+### Phase 3: Medium Priority (Sprint 3.3)
+TODO **New Variables - Add Support:**
+- Infrastructure monitoring ports (Prometheus, Grafana, Jaeger)
+- vLLM inference configuration
+- Kong gateway ports
+- Database connection pool settings
+
+### Phase 4: Low Priority (Sprint 3.4)
+TODO **Nice to Have:**
+- Media API S3 prefixing
+- Response API prefixing
+- MCP Tools prefixing
+
+## Testing Strategy
+
+### Per-Service Testing
+
+For each service after env var migration:
+
+1. **Unit Tests:** Verify config loading with new env vars
+2. **Integration Tests:** Test with Docker Compose
+3. **Precedence Tests:** Verify env vars override defaults
+4. **Backward Compatibility:** Old env vars still work (deprecation warnings)
+
+### Test Script Template
+
+```bash
+#!/bin/bash
+# Test service with new env vars
+
+# Set centralized env vars
+export POSTGRES_HOST=testdb
+export POSTGRES_PORT=5432
+export POSTGRES_USER=testuser
+export POSTGRES_PASSWORD=testpass
+export POSTGRES_DB=testdb
+export POSTGRES_SSL_MODE=disable
+
+# Run service
+./service-binary
+
+# Verify config loaded correctly
+curl http://localhost:8080/health
+```
+
+## Rollback Plan
+
+If migration causes issues:
+
+1. **Immediate:** Revert docker compose.yml to use old env vars
+2. **Service-Level:** Keep backward compatibility (read both old and new vars)
+3. **Gradual Migration:** Migrate one service at a time, not all at once
+
+## See Also
+
+- [Service Migration Strategy](./service-migration-strategy.md)
+- [Configuration Precedence](./precedence.md)
+- [Configuration Types Reference](../../pkg/config/types.go)
+
diff --git a/docs/configuration/kubernetes.md b/docs/configuration/kubernetes.md
new file mode 100644
index 00000000..1303c72e
--- /dev/null
+++ b/docs/configuration/kubernetes.md
@@ -0,0 +1,329 @@
+# Kubernetes Helm Values Generation
+
+Automatic generation of Kubernetes Helm `values.yaml` files from Jan Server configuration.
+
+## Overview
+
+The K8s values generator maps Jan Server's unified configuration to Helm chart values, enabling:
+
+- **Single source of truth**: Configuration drives both local and K8s deployments
+- **Environment-specific overrides**: Development, staging, production profiles
+- **Consistent deployment**: Same config structure across all environments
+- **Type safety**: Generated from Go structs with validation
+
+## Architecture
+
+```
+config/defaults.yaml
+ v
+ Config Loader
+ v
+Values Generator -----> values-dev.yaml (1 replica, minimal resources)
+ +------------> values-prod.yaml (3 replicas, full resources, persistence)
+(Add values-staging.yaml as needed by copying values-dev.yaml)
+```
+
+## Generated Structure
+
+### Global Values
+```yaml
+global:
+ environment: development
+ imagePullPolicy: IfNotPresent
+ labels:
+ app.kubernetes.io/name: jan-server
+ app.kubernetes.io/version: 1.0.0
+ app.kubernetes.io/environment: development
+```
+
+### Service Values
+Each service gets:
+- **Deployment**: replicas, image, resources
+- **Service**: type, ports
+- **Health Checks**: liveness/readiness probes
+- **Config**: ConfigMaps for non-sensitive config
+- **Secrets**: References to K8s secrets
+
+Example:
+```yaml
+services:
+ llm-api:
+ enabled: true
+ replicaCount: 2
+ image:
+ repository: jan-llm-api
+ tag: 1.0.0
+ service:
+ type: ClusterIP
+ port: 8080
+ targetPort: 8080
+ resources:
+ limits:
+ cpu: 1000m
+ memory: 1Gi
+ requests:
+ cpu: 500m
+ memory: 512Mi
+ healthChecks:
+ livenessProbe:
+ httpGet:
+ path: /health
+ port: 8080
+ initialDelaySeconds: 30
+ readinessProbe:
+ httpGet:
+ path: /health
+ port: 8080
+ initialDelaySeconds: 10
+ configMap:
+ LOG_LEVEL: info
+ LOG_FORMAT: json
+ secrets:
+ - database-credentials
+ - keycloak-credentials
+```
+
+### Infrastructure Values
+Database and auth configuration:
+```yaml
+infrastructure:
+ database:
+ postgres:
+ enabled: true
+ host: api-db
+ port: 5432
+ database: jan_llm_api
+ user: jan_user
+ passwordSecret: postgres-password
+ maxConnections: 100
+ resources:
+ limits:
+ cpu: 2000m
+ memory: 2Gi
+ persistence:
+ enabled: true
+ size: 10Gi
+ auth:
+ keycloak:
+ enabled: true
+ baseURL: http://keycloak:8085
+ adminUser: admin
+ passwordSecret: keycloak-admin-password
+ resources:
+ limits:
+ cpu: 1000m
+ memory: 1Gi
+```
+
+## Environment Profiles
+
+### Development
+- Single replica per service
+- Minimal resources (100m CPU, 128Mi RAM requests)
+- `imagePullPolicy: Never` (use local images)
+- Persistence disabled
+- Lower health check thresholds
+
+### Staging
+- 2 replicas per service
+- Moderate resources (250m CPU, 256Mi RAM requests)
+- `imagePullPolicy: IfNotPresent`
+- Persistence enabled (20Gi)
+- Production-like settings
+
+### Production
+- 3 replicas per service
+- Full resources (500m CPU, 512Mi RAM requests)
+- `imagePullPolicy: Always`
+- Persistence enabled (50Gi)
+- Strict health checks
+- Higher failure thresholds
+
+## Usage
+
+### Programmatic Generation
+
+```go
+package main
+
+import (
+ "context"
+ "github.com/janhq/jan-server/pkg/config"
+ "github.com/janhq/jan-server/pkg/config/k8s"
+)
+
+func main() {
+ // Load configuration
+ loader:= config.NewConfigLoader("development", "config/defaults.yaml")
+ cfg, _:= loader.Load(context.Background())
+
+ // Create generator
+ generator:= k8s.NewValuesGenerator(cfg)
+
+ // Generate base values
+ generator.GenerateToFile("values.yaml")
+
+ // Generate with environment overrides
+ values, _:= generator.GenerateWithOverrides("production")
+ // Use values...
+}
+```
+
+### Example Program
+
+See `pkg/config/k8s/examples/generate_values.go`:
+
+```bash
+cd pkg/config/k8s/examples
+go run generate_values.go
+```
+
+Generates:
+- `values-dev.yaml` - Development environment
+- `values-prod.yaml` - Production environment
+
+## Configuration Mapping
+
+| Config Path | Helm Values Path | Notes |
+|-------------|-----------------|-------|
+| `meta.version` | `global.labels["app.kubernetes.io/version"]`, `*.image.tag` | Version across all components |
+| `meta.environment` | `global.environment`, `global.labels["app.kubernetes.io/environment"]` | Environment name |
+| `services.llm_api.http_port` | `services.llm-api.service.port` | Service port mapping |
+| `services.llm_api.log_level` | `services.llm-api.configMap.LOG_LEVEL` | Config as ConfigMap |
+| `infrastructure.database.postgres.*` | `infrastructure.database.postgres.*` | Direct mapping |
+| `infrastructure.auth.keycloak.*` | `infrastructure.auth.keycloak.*` | Direct mapping |
+
+## Secret Management
+
+**Important**: The generator creates **references** to secrets, not the secrets themselves.
+
+Secret references in values:
+```yaml
+services:
+ llm-api:
+ secrets:
+ - database-credentials # -> maps to K8s Secret
+ - keycloak-credentials
+```
+
+You must create K8s secrets separately:
+```bash
+kubectl create secret generic database-credentials \
+ --from-literal=password=<db-password>
+
+kubectl create secret generic keycloak-credentials \
+ --from-literal=admin-password=<keycloak-password>
+```
+
+Secrets are managed by DevOps via Kubernetes Secrets, HashiCorp Vault, or environment variables.
+
+## Resource Sizing
+
+### Default Resources
+
+| Service | CPU Request | CPU Limit | Memory Request | Memory Limit |
+|---------|------------|-----------|----------------|--------------|
+| llm-api | 500m | 1000m | 512Mi | 1Gi |
+| mcp-tools | 250m | 500m | 256Mi | 512Mi |
+| media-api | 250m | 500m | 256Mi | 512Mi |
+| response-api | 250m | 500m | 256Mi | 512Mi |
+| postgres | 1000m | 2000m | 1Gi | 2Gi |
+| keycloak | 500m | 1000m | 512Mi | 1Gi |
+
+### Scaling Recommendations
+
+For production workloads:
+- **High traffic**: Increase replicas (3-5+)
+- **Heavy AI workloads**: Increase llm-api resources (2-4 CPU, 2-4Gi RAM)
+- **Large databases**: Increase postgres resources and persistence size
+
+## Health Checks
+
+### Liveness Probes
+- Detect crashed containers
+- Restart unhealthy containers
+- Higher failure threshold (3-5)
+
+### Readiness Probes
+- Detect startup completion
+- Remove from service during issues
+- Lower failure threshold (3)
+
+Generated probes:
+```yaml
+livenessProbe:
+ httpGet:
+ path: /health
+ port: 8080
+ initialDelaySeconds: 30
+ periodSeconds: 10
+ timeoutSeconds: 5
+ failureThreshold: 3
+readinessProbe:
+ httpGet:
+ path: /health
+ port: 8080
+ initialDelaySeconds: 10
+ periodSeconds: 5
+ timeoutSeconds: 3
+ failureThreshold: 3
+```
+
+## Integration with Helm
+
+### Deploy with Generated Values
+
+```bash
+# Generate values
+go run pkg/config/k8s/examples/generate_values.go
+
+# Install chart with generated values
+helm install jan-server k8s/jan-server \
+ -f pkg/config/k8s/examples/values-prod.yaml
+```
+
+### Override Specific Values
+
+```bash
+# Use generated base + custom overrides
+helm install jan-server k8s/jan-server \
+ -f values-prod.yaml \
+ --set services.llm-api.replicaCount=5
+```
+
+### CI/CD Integration
+
+```yaml
+#.github/workflows/deploy.yml
+- name: Generate Helm Values
+ run: |
+ go run pkg/config/k8s/examples/generate_values.go
+ 
+- name: Deploy to K8s
+ run: |
+ helm upgrade --install jan-server k8s/jan-server \
+ -f values-prod.yaml \
+ --namespace jan-server
+```
+
+## Limitations
+
+- **Not a replacement for Helm templating**: Generator creates values, Helm chart still needed
+- **Basic resource sizing**: May need tuning based on actual workload
+- **Secrets not generated**: K8s secrets must be created separately by DevOps team
+- **No autoscaling**: HPA configuration not generated (add manually)
+
+## Future Enhancements
+
+- [ ] Autoscaling (HPA) configuration
+- [ ] Ingress generation
+- [ ] Network policies
+- [ ] Pod disruption budgets
+- [ ] Service mesh integration (Istio, Linkerd)
+- [ ] Custom resource definitions (CRDs)
+
+## See Also
+
+- [Configuration System](README.md)
+- [Helm Chart Documentation](../../k8s/jan-server/README.md)
+- [Deployment Guide](../../docs/guides/deployment.md)
diff --git a/docs/configuration/precedence.md b/docs/configuration/precedence.md
new file mode 100644
index 00000000..451554fe
--- /dev/null
+++ b/docs/configuration/precedence.md
@@ -0,0 +1,498 @@
+# Configuration Precedence
+
+How Jan Server loads configuration values and decides which source wins.
+
+## Quick Summary
+
+| Priority | Source | Notes |
+|----------|--------|-------|
+| **600** | CLI flags (future) | Planned for jan-cli integrations |
+| **500** | Environment variables | Secrets, overrides, CI/CD |
+| **300** | `config/environments/*.yaml` | Per-environment overrides |
+| **200** | `config/defaults.yaml` | Generated defaults (`make config-generate`) |
+| **100** | Struct tags (`envDefault`) | Absolute fallback values |
+
+**Rule**: the highest priority number wins every time. Example: `POSTGRES_PORT=5433` (priority 500) beats `port: 5432` in `defaults.yaml` (priority 200).
+
+## Configuration Sources
+
+### 1. StructDefaultSource (Priority 100)
+
+**Lowest priority**. These are the hardcoded defaults embedded in Go struct tags in `pkg/config/types.go`.
+
+```go
+type PostgresConfig struct {
+ Port int `yaml:"port" json:"port" env:"POSTGRES_PORT" envDefault:"5432"`
+ // ^^^^^^^^^^^^^^^^
+ // Priority 100
+}
+
+// NOTE: Never store secrets in envDefault tags
+```
+
+
+**When to use:** Never set directly - these are fallback defaults.
+
+**Example:**
+```go
+// From types.go
+Host string `envDefault:"api-db"` // Priority 100: "api-db"
+```
+
+### 2. YAMLDefaultSource (Priority 200)
+
+**Second priority**. Auto-generated `config/defaults.yaml` created by `make config-generate`.
+
+```yaml
+infrastructure:
+ database:
+ postgres:
+ host: "api-db" # Priority 200
+ port: 5432 # Priority 200
+ user: "jan_user"
+```
+
+**When to use:** Baseline configuration for every environment. Generated via `make config-generate`; never edit manually.
+
+**Example:**
+```bash
+# Generate defaults.yaml
+make config-generate
+
+# This creates config/defaults.yaml with all default values
+```
+
+### 3. YAMLEnvSource (Priority 300)
+
+**Third priority**. Environment-specific configuration files in `config/environments/*.yaml`.
+
+```yaml
+# config/environments/production.yaml
+infrastructure:
+ database:
+ postgres:
+ host: "prod-db.example.com" # Priority 300: overrides defaults.yaml
+ port: 5432 # Priority 300: same as default, redundant
+ max_connections: 500 # Priority 300: production tuning
+```
+
+**When to use:**
+- Production/staging-specific settings
+- Infrastructure endpoints that differ per environment
+- Feature flags per environment
+
+**File naming:**
+- `config/environments/development.yaml`
+- `config/environments/staging.yaml`
+- `config/environments/production.yaml`
+
+**Example:**
+```bash
+# Load development environment
+loader:= config.NewConfigLoader("development", "config/defaults.yaml")
+cfg, err:= loader.Load(context.Background())
+
+# Loads in order:
+# 1. Struct defaults (100)
+# 2. config/defaults.yaml (200)
+# 3. config/environments/development.yaml (300) <- Environment-specific
+# 4. Environment variables (500)
+```
+
+### 4. EnvVarSource (Priority 500)
+
+**Fourth priority**. System environment variables with names matching `env` struct tags.
+
+```bash
+# Priority 500: Overrides everything except CLI flags
+export POSTGRES_HOST=override-db
+export POSTGRES_PORT=5433
+export AUTO_MIGRATE=false
+```
+
+**When to use:**
+- Docker/Kubernetes deployments
+- CI/CD pipelines
+- Local development overrides
+- Secrets and sensitive values (managed by DevOps)
+
+**Tag mapping:**
+```go
+// From types.go
+Host string `env:"POSTGRES_HOST"` // Set with: export POSTGRES_HOST=value
+Port int `env:"POSTGRES_PORT"` // Set with: export POSTGRES_PORT=5433
+```
+
+**Example:**
+```bash
+# Override database host for local testing
+export POSTGRES_HOST=localhost
+export POSTGRES_PORT=5433
+
+# Run service - will use localhost:5433 instead of config values
+./llm-api
+```
+
+### 5. CLI Flags (Priority 600) - Planned
+
+**Highest priority**. Command-line flags (not yet implemented, planned for Sprint 7+).
+
+```bash
+# Planned for future
+./llm-api --db-host=emergency-db --db-port=5434
+```
+
+**When to use:**
+- Emergency overrides
+- One-time testing
+- Troubleshooting in production
+
+## Conflict Resolution
+
+### Merge Strategy
+
+The configuration loader uses a **non-zero override** strategy:
+
+1. Start with an empty `Config` struct
+2. Load sources in ascending priority order (100 -> 200 -> 300 -> 500)
+3. For each source:
+ - Load configuration values
+ - **Only non-zero values** from the source override existing values
+ - Zero values are skipped (don't override with empty strings, 0, false)
+
+### Example: Port Precedence
+
+```go
+// Initial state (empty Config)
+Port: 0
+
+// Load StructDefaultSource (Priority 100)
+Port: 5432 // From envDefault tag
+
+// Load YAMLDefaultSource (Priority 200)
+Port: 5432 // defaults.yaml matches struct default, no change
+
+// Load YAMLEnvSource (Priority 300)
+Port: 5433 // production.yaml overrides to 5433
+
+// Load EnvVarSource (Priority 500)
+Port: 5434 // POSTGRES_PORT env var wins
+ // Final value: 5434
+```
+
+### Provenance Tracking
+
+The loader tracks which source provided each final value:
+
+```go
+loader:= config.NewConfigLoader("production", "config/defaults.yaml")
+cfg, _:= loader.Load(ctx)
+
+// Print configuration sources
+fmt.Println(loader.Provenance())
+
+// Output:
+// Configuration Sources (priority order):
+// [100] struct-defaults
+// [200] yaml-defaults
+// [300] yaml-env-production
+// [500] env-vars
+//
+// Loaded 127 configuration values
+```
+
+### Debug: Finding Value Origin
+
+To debug where a specific value came from:
+
+```go
+info, err:= loader.Provenance("infrastructure.database.postgres.port")
+if err == nil {
+ fmt.Printf("Port came from: %s (priority %d)\n", info.Source, info.Priority)
+ // Output: Port came from: env-vars (priority 500)
+}
+```
+
+## Common Scenarios
+
+### Scenario 1: Development Override
+
+**Goal:** Use localhost database for local development
+
+```bash
+# Set environment variables
+export POSTGRES_HOST=localhost
+export POSTGRES_PORT=5432
+
+# Run service
+go run./cmd/server
+```
+
+**Result:**
+- `POSTGRES_HOST`: localhost (priority 500, env var)
+- `POSTGRES_PORT`: 5432 (priority 500, env var)
+- All other settings: from defaults.yaml (priority 200)
+
+### Scenario 2: Production Deployment
+
+**Goal:** Use production database with secrets
+
+```yaml
+# config/environments/production.yaml (priority 300)
+infrastructure:
+ database:
+ postgres:
+ host: "prod-db.company.com"
+ ssl_mode: "require"
+```
+
+```bash
+# Kubernetes secret (priority 500)
+export POSTGRES_PASSWORD=<secret-from-vault>
+export POSTGRES_USER=<secret-from-vault>
+
+# Run with production environment
+./llm-api --environment=production
+```
+
+**Result:**
+- `host`: prod-db.company.com (priority 300, environment YAML)
+- `ssl_mode`: require (priority 300, environment YAML)
+- `user`, `password`: from env vars (priority 500, secrets)
+- All other settings: from defaults.yaml (priority 200)
+
+### Scenario 3: Temporary Override
+
+**Goal:** Test with different port without changing config
+
+```bash
+# One-time override for testing
+export POSTGRES_PORT=9999
+
+# Run test
+go test./...
+
+# Unset when done
+unset POSTGRES_PORT
+```
+
+**Result:**
+- `POSTGRES_PORT`: 9999 (priority 500) during test
+- All other services still use 5432 from defaults
+
+## Data Type Handling
+
+### Strings
+```bash
+export POSTGRES_HOST=localhost # Simple string
+export KEYCLOAK_REALM=jan # No quotes needed
+```
+
+### Integers
+```bash
+export POSTGRES_PORT=5433 # Parsed as int
+export POSTGRES_MAX_CONNECTIONS=200
+```
+
+### Booleans
+```bash
+export AUTO_MIGRATE=true # Accepts: true, false, 1, 0
+export OTEL_ENABLED=false
+```
+
+### Durations
+```bash
+export DB_CONN_MAX_LIFETIME=45m # Go duration format
+export MEDIA_S3_PRESIGN_TTL=10m # Supports: ns, us, ms, s, m, h
+```
+
+### Slices (comma-separated)
+```bash
+export KEYCLOAK_FEATURES=token-exchange,preview,admin-api
+# Parsed as: []string{"token-exchange", "preview", "admin-api"}
+```
+
+## Best Practices
+
+### OK DO
+
+1. **Use environment variables for secrets**
+ ```bash
+ export POSTGRES_PASSWORD=$VAULT_SECRET
+ export AWS_ACCESS_KEY_ID=$AWS_KEY
+ ```
+
+2. **Use environment YAML for per-environment infrastructure**
+ ```yaml
+ # config/environments/staging.yaml
+ infrastructure:
+ database:
+ postgres:
+ host: "staging-db.internal"
+ ```
+
+3. **Keep defaults.yaml comprehensive**
+ ```bash
+ # Regenerate after adding new config fields
+ make config-generate
+ ```
+
+4. **Document precedence in comments**
+ ```yaml
+ # config/environments/production.yaml
+ infrastructure:
+ database:
+ postgres:
+ # Override for production (priority 300)
+ # Can still be overridden by POSTGRES_HOST env var (priority 500)
+ host: "prod-db.company.com"
+ ```
+
+### [X] DON'T
+
+1. **Don't manually edit defaults.yaml**
+ ```bash
+ # [X] WRONG: Manual edits will be overwritten
+ vim config/defaults.yaml
+ 
+ # OK CORRECT: Edit types.go and regenerate
+ vim pkg/config/types.go
+ make config-generate
+ ```
+
+2. **Don't put secrets in YAML files**
+ ```yaml
+ # [X] WRONG: Secret in version control
+ infrastructure:
+ database:
+ postgres:
+ password: "super-secret-123" # DON'T DO THIS
+ 
+ # OK CORRECT: Use environment variable (managed by DevOps)
+ # export POSTGRES_PASSWORD=super-secret-123
+ # Or use K8s Secrets, Vault, etc.
+ ```
+
+3. **Don't override everything in environment YAML**
+ ```yaml
+ # [X] WRONG: Duplicating all defaults
+ infrastructure:
+ database:
+ postgres:
+ host: "prod-db.com"
+ port: 5432 # Redundant with default
+ user: "jan_user" # Redundant with default
+ database: "jan_llm_api" # Redundant with default
+ 
+ # OK CORRECT: Only override what's different
+ infrastructure:
+ database:
+ postgres:
+ host: "prod-db.com" # Only this is different
+ ```
+
+4. **Don't rely on zero-value overrides**
+ ```bash
+ # [X] WRONG: Trying to "unset" a value
+ export AUTO_MIGRATE= # Empty string won't override true
+ 
+ # OK CORRECT: Explicitly set to false
+ export AUTO_MIGRATE=false
+ ```
+
+## Testing Precedence
+
+### Unit Test Example
+
+```go
+func TestConfigPrecedence(t *testing.T) {
+ // Set environment variable (priority 500)
+ os.Setenv("POSTGRES_PORT", "9999")
+ defer os.Clearenv()
+ 
+ // Create YAML file (priority 200)
+ yaml:= `
+infrastructure:
+ database:
+ postgres:
+ port: 5433
+`
+ os.WriteFile("config/defaults.yaml", []byte(yaml), 0644)
+ 
+ // Load configuration
+ loader:= config.NewConfigLoader("development", "config/defaults.yaml")
+ cfg, err:= loader.Load(context.Background())
+ 
+ // Environment variable should win
+ assert.Equal(t, 9999, cfg.Infrastructure.Database.Postgres.Port)
+ 
+ // Check provenance
+ prov:= loader.Provenance()
+ assert.Contains(t, prov, "env-vars") // Confirm env vars were loaded
+}
+```
+```
+
+## Troubleshooting
+
+### Problem: Configuration not being applied
+
+**Symptom:** Set `POSTGRES_PORT=9999` but service still uses 5432
+
+**Solution:**
+1. Check environment variable name matches `env` tag:
+ ```go
+ Port int `env:"POSTGRES_PORT"` // Must be exact match
+ ```
+
+2. Verify environment variable is set in correct shell:
+ ```bash
+ printenv | grep POSTGRES_PORT
+ ```
+
+3. Check provenance to see what overrode it:
+ ```go
+ info, _:= loader.Provenance("infrastructure.database.postgres.port")
+ fmt.Printf("Source: %s (priority %d)\n", info.Source, info.Priority)
+ ```
+
+### Problem: Can't override YAML value
+
+**Symptom:** Changed `production.yaml` but using old value
+
+**Solution:**
+1. Check file path matches environment:
+ ```bash
+ ls config/environments/production.yaml # Must exist
+ ```
+
+2. Verify you're loading correct environment:
+ ```go
+ loader:= config.NewConfigLoader("production", "config/defaults.yaml")
+ // ^^^^^^^^^^^ Must match filename
+ ```
+
+3. Check for environment variable override (priority 500 > 300):
+ ```bash
+ unset POSTGRES_HOST # Remove higher-priority override
+ ```
+
+### Problem: Defaults not updating
+
+**Symptom:** Changed `envDefault` tag but `defaults.yaml` unchanged
+
+**Solution:**
+```bash
+# Regenerate defaults.yaml
+make config-generate
+
+# Verify CI drift detection passes
+go test -v./pkg/config -run TestConfigDrift
+```
+
+## See Also
+
+- [Configuration README](../../pkg/config/README.md) - Implementation details
+- [Config Types Reference](../../pkg/config/types.go) - All configuration fields
+- [Code Generation](../../pkg/config/codegen/) - Schema and YAML generators
+- [Loader Tests](../../pkg/config/loader_test.go) - Precedence test examples
diff --git a/docs/configuration/service-migration.md b/docs/configuration/service-migration.md
new file mode 100644
index 00000000..d410c584
--- /dev/null
+++ b/docs/configuration/service-migration.md
@@ -0,0 +1,165 @@
+# Sprint 3: Service Migration Strategy
+
+## Overview
+
+Sprint 3 involves migrating services (llm-api, mcp-tools, media-api, response-api) to use the centralized configuration system in `pkg/config`.
+
+## Challenge: Module Dependencies
+
+The jan-server project uses a **workspace** structure where each service is a separate Go module:
+- `services/llm-api` has its own `go.mod`
+- `services/mcp-tools` has its own `go.mod`
+- `pkg/config` is at the root workspace level
+
+**Problem:** Services cannot directly import `github.com/janhq/jan-server/pkg/config` without either:
+1. Restructuring into a monorepo with shared pkg/
+2. Publishing pkg/config as a separate module
+3. Using Go workspace features to share the package
+
+## Recommended Approach
+
+### Phase 1: Environment Variable Alignment (Immediate)
+
+**Goal:** Ensure all services use the same environment variable names as defined in `pkg/config/types.go`
+
+**Tasks:**
+1. Audit each service's env tags against pkg/config/types.go
+2. Update service env tags to match centralized naming
+3. Update Docker Compose files to use new env var names
+4. Test each service independently
+
+**Example:**
+```go
+// Before (llm-api):
+HTTPPort int `env:"HTTP_PORT"`
+
+// After (aligned with pkg/config/types.go):
+HTTPPort int `env:"HTTP_PORT"` // OK Already matches!
+
+// Before (llm-api):
+DatabaseURL string `env:"DATABASE_URL"`
+
+// After (should use components):
+// Build from POSTGRES_HOST, POSTGRES_PORT, POSTGRES_USER, etc.
+```
+
+### Phase 2: Configuration Bridge Pattern (Sprint 3-4)
+
+**Goal:** Create bridge functions that convert centralized config to service-specific config
+
+**Implementation:**
+```go
+// In services/llm-api/internal/config/bridge.go
+
+import centralconfig "github.com/janhq/jan-server/pkg/config"
+
+// FromCentralConfig converts pkg/config.Config to llm-api Config
+func FromCentralConfig(central *centralconfig.Config) *Config {
+ return &Config{
+ HTTPPort: central.Services.LLMAPI.HTTPPort,
+ DatabaseURL: buildDatabaseURL(central.Infrastructure.Database.Postgres),
+ //... map all fields
+ }
+}
+```
+
+**Benefits:**
+- Gradual migration (can still use env vars)
+- Backward compatibility
+- Clear mapping between old and new config
+
+### Phase 3: Direct Integration (Sprint 5+)
+
+**Goal:** Services directly use pkg/config types
+
+**Requires:**
+1. Go workspace configuration or monorepo restructuring
+2. Update Wire providers to inject centralconfig.Config
+3. Remove service-specific Config structs
+4. Update all service code to use central types
+
+## Current Status
+
+### Sprint 2 Complete OK
+- pkg/config foundation built
+- Precedence system (100-600) implemented
+- All tests passing
+- Documentation complete
+
+### Sprint 3 Next Steps
+
+**Option A: Environment Variable Alignment (Recommended for Sprint 3)**
+- OK Low risk, immediate value
+- OK No code restructuring needed
+- OK Can be done per-service incrementally
+- Tasks:
+ 1. Create env var mapping document
+ 2. Update docker compose.yml environment sections
+ 3. Update service env tags
+ 4. Test each service
+
+**Option B: Module Restructuring (Deferred to Sprint 4-5)**
+- WARNING Requires Go workspace setup or monorepo migration
+- WARNING Higher risk, more invasive
+- WARNING Blocks other development during migration
+- Tasks:
+ 1. Set up Go workspace in root
+ 2. Update all go.mod files
+ 3. Implement bridge pattern
+ 4. Migrate services one by one
+ 5. Comprehensive integration testing
+
+## Decision: Sprint 3 Scope
+
+**RECOMMENDATION:** Focus on Option A for Sprint 3
+
+**Rationale:**
+1. **Immediate Value:** Standardizing env vars provides immediate operational benefits
+2. **Low Risk:** No code changes, only configuration alignment
+3. **Foundation for Phase 2:** Makes bridge pattern easier in Sprint 4
+4. **Testable:** Can validate each service independently
+
+**Deliverables for Sprint 3:**
+1. Environment variable mapping document
+2. Updated docker compose.yml with aligned env vars
+3. Service-by-service env var audit
+4. Integration tests validating env var precedence
+
+## Implementation Plan
+
+### Task 3.1: Environment Variable Audit
+
+Create `docs/configuration/env-var-mapping.md` documenting:
+- All env vars from pkg/config/types.go
+- Current env vars in each service
+- Mapping/migration needed
+- Deprecation timeline
+
+### Task 3.2: Docker Compose Updates
+
+Update `docker compose.yml` and `docker/` files to use:
+- Standardized env var names
+- config/environments/*.yaml for environment-specific overrides
+
+### Task 3.3: Service Validation
+
+For each service (llm-api, mcp-tools, media-api, response-api):
+1. Update internal config env tags to match pkg/config
+2. Run unit tests
+3. Run integration tests
+4. Verify in Docker environment
+
+### Task 3.4: Documentation
+
+1. Update service READMEs with new env var names
+2. Create migration guide for operators
+3. Document any breaking changes
+
+## Sprint 4+ Preview
+
+Once Sprint 3 (env var alignment) is complete, Sprint 4 can tackle:
+- Go workspace setup
+- Bridge pattern implementation
+- Gradual service migration to use pkg/config directly
+
+This two-phase approach minimizes risk while delivering incremental value.
diff --git a/docs/conventions/architecture-patterns.md b/docs/conventions/architecture-patterns.md
new file mode 100644
index 00000000..7cba748a
--- /dev/null
+++ b/docs/conventions/architecture-patterns.md
@@ -0,0 +1,136 @@
+# Architecture & Structure Conventions
+
+> Use this file to understand how Jan Server is organised today. Every example below references the real repository structure under `services/<service>`.
+
+---
+
+## Repository Layout (Top Level)
+
+```
+jan-server/
++-- cmd/jan-cli/                # jan-cli sources + wrappers (jan-cli.sh / jan-cli.ps1)
++-- config/                     # Shared configuration defaults and templates
++-- docker/                     # Compose fragments (infra, services, observability)
++-- docs/                       # Documentation (guides, conventions, templates, etc.)
++-- k8s/                        # Helm chart + Kubernetes manifests
++-- services/
+�   +-- llm-api/
+�   +-- media-api/
+�   +-- response-api/
+�   +-- mcp-tools/
+�   +-- template-api/
++-- tests/                      # jan-cli api-test collections
++-- Makefile                    # Canonical automation entry point
++-- docker-compose.yml          # Root compose file wired to profiles
++-- docker-compose.dev-full.yml # Dev-Full overrides (host routing)
+```
+
+Each service folder contains the same structure:
+
+```
+services/<service>/
++-- cmd/
+�   +-- server/                 # Service entrypoint
+�   +-- gormgen/                # (llm-api) schema generator
++-- config/                     # Service-specific configuration helpers
++-- internal/
+�   +-- domain/                 # Business logic (no HTTP/DB imports)
+�   +-- infrastructure/         # Repositories, cache, provider clients
+�   +-- interfaces/httpserver/  # Gin routes, requests, responses, middlewares
++-- migrations/                 # SQL migrations
++-- swagger/ or docs/swagger/   # Generated OpenAPI files
++-- scripts/                    # Service utilities (optional)
++-- Makefile                    # Service-local helpers (e.g., `make gormgen`)
++-- go.mod / go.sum             # Module definition
+```
+
+> Paths in the other convention documents are relative to `services/<service>/`.
+
+---
+
+## Clean Architecture Layers
+
+```
+Interfaces (routes, cron, event consumers)
+        ?
+Domain (entities, services, validation)
+        ?
+Infrastructure (repositories, cache, providers)
+```
+
+**Rules:**
+- Domain packages only import other domain packages plus injected interfaces (e.g., repository interfaces).
+- Infrastructure implements those interfaces and may import external drivers (PostgreSQL, Redis, provider SDKs, etc.).
+- Interfaces (HTTP) bind requests to domain services. Do not place business logic in Gin handlers.
+
+---
+
+## File Placement Cheat Sheet
+
+| Task | Location | Example |
+|------|----------|---------|
+| New domain aggregate | `services/<svc>/internal/domain/<aggregate>/` | `services/llm-api/internal/domain/conversation/` |
+| New HTTP endpoint | `services/<svc>/internal/interfaces/httpserver/routes/<area>/` | `services/llm-api/internal/interfaces/httpserver/routes/v1/conversations/` |
+| New schema/table | `services/<svc>/internal/infrastructure/database/dbschema/` |
+| Repository implementation | `services/<svc>/internal/infrastructure/database/repository/<name>/` |
+| Cache / provider client | `services/<svc>/internal/infrastructure/<cache or provider>/` |
+| Shared helper | `services/<svc>/internal/utils/<category>/` |
+
+### Domain Entity Package
+
+```
+services/<svc>/internal/domain/<entity>/
++-- <entity>.go            # Entity struct + methods
++-- service.go             # Business logic / orchestrations
++-- filter.go              # Query filters (optional)
++-- dto.go                 # Converters if needed
+```
+
+### Infrastructure Repository Package
+
+```
+services/<svc>/internal/infrastructure/database/
++-- dbschema/              # Schema structs + EtoD/DToE helpers
++-- repository/
+�   +-- <entity>repo/      # Repository implementation
++-- gormgen/               # Generated query builders (llm-api)
+```
+
+### HTTP Interface Package
+
+```
+services/<svc>/internal/interfaces/httpserver/
++-- routes/v1/<group>/     # Route registration + handlers
++-- requests/<group>/      # Request DTOs + validation
++-- responses/<group>/     # Response DTOs
++-- middlewares/           # Shared middleware
+```
+
+---
+
+## When to Add New Packages
+
+1. **New domain concept** ? create `internal/domain/<concept>` with entity + service.
+2. **New transport handler** ? add to `internal/interfaces/httpserver/routes/v1/<area>` and create `requests/` and `responses/` entries as needed.
+3. **New persistence logic** ? add schema file under `dbschema/` and repository under `repository/<concept>repo/`. Run `make gormgen` afterwards.
+4. **New provider client** ? add package under `internal/infrastructure/<provider>/` and inject through the service constructors.
+
+---
+
+## Anti-Patterns To Avoid
+
+- **Direct DB access from handlers**: always go through domain services.
+- **Fat handlers**: route handlers should validate input, call domain services, and return responses�nothing more.
+- **Storing business logic in `internal/utils`**: keep helpers generic; domain rules belong in domain services.
+- **Creating interfaces �just in case�**: only introduce an interface when multiple implementations exist or tests require it.
+
+---
+
+## Quick Layer Checklist
+
+- Domain packages import only standard library and other domain packages.
+- Infrastructure packages never import HTTP routes.
+- Requests/responses convert to domain types immediately (`req.ToDomain()` / builders for responses).
+- GORM pointer rules enforced in `dbschema/` structs.
+
+See `design-patterns.md` for concrete code samples and `workflow.md` for the commands that keep code generation and testing in sync.
diff --git a/docs/conventions/conventions.md b/docs/conventions/conventions.md
new file mode 100644
index 00000000..1abcca61
--- /dev/null
+++ b/docs/conventions/conventions.md
@@ -0,0 +1,99 @@
+# CONVENTIONS - Quick Reference
+
+> Start here for the current Jan Server standards. Detailed references live in the other files inside `docs/conventions/`.
+
+---
+
+## Documentation Map
+
+| File | Purpose |
+|------|---------|
+| `conventions.md` (this file) | TL;DR and quick links |
+| `architecture-patterns.md` | Repository and service layout patterns |
+| `design-patterns.md` | Code-level guidance (DB, errors, entities) |
+| `workflow.md` | Daily workflow: git, testing, deployments |
+
+---
+
+## TL;DR Rules
+
+### Language & Tooling
+- **Go version:** `1.25` (matches the root `go.mod`). Run `go fmt ./...` and `go test ./...` before committing.
+- **Dependency hygiene:** `go mod tidy` inside the service you changed.
+
+### Architecture
+- Each service lives under `services/<name>/` and follows the same structure (`cmd/`, `internal/`, `migrations/`, etc.).
+- Clean architecture still applies: Interfaces (HTTP) ? Domain ? Infrastructure. Domain packages never import database or HTTP packages.
+
+### Database
+- GORM zero-value issue still exists. Use pointer fields (`*bool`, `*float64`, etc.) in schema structs and convert to plain types in domain models.
+- When schemas change, run `make gormgen` from the service directory (e.g., `cd services/llm-api && make gormgen`).
+
+### Error Handling
+- Trigger point (repository) creates errors via `platformerrors.NewError()`.
+- Handlers call `responses.HandleError()` so every response includes `requestID`.
+- Never log secrets or the raw error from external providers.
+
+### Naming
+- Exported symbols: `PascalCase`. Unexported: `camelCase`.
+- Database columns: `snake_case`.
+- Avoid stuttering (`provider.ID`, not `provider.ProviderID`).
+- Avoid single word naming, must meaningful, easy to read and understand
+
+### Git & Commits
+- Conventional commits: `feat:`, `fix:`, `docs:`, `test:`, `chore:`, etc.
+- Branches: `type/short-description` (e.g., `feat/dev-full-refresh`).
+
+### Security
+- Secrets only live in `.env`/environment variables. `.env` is created from `.env.template` via `make setup` and never committed.
+- Kong + Keycloak handle auth; do not bypass JWT/API-key validation in services.
+
+---
+
+## Common Commands
+
+```bash
+# Setup & environments
+make setup              # Copy .env.template -> .env and docker/.env
+make up-full            # Start infra + APIs + MCP in Docker
+make dev-full           # Start Docker stack with host routing for native services
+./jan-cli.sh dev run llm-api   # Run a service on host (macOS/Linux)
+.\jan-cli.ps1 dev run llm-api  # Same on Windows
+
+# Monitoring & tooling
+make monitor-up         # Prometheus + Grafana + Jaeger
+make monitor-clean      # Stop monitoring and remove volumes
+
+# Testing
+make test-all           # Run every jan-cli api-test collection
+make test-auth          # Focused suite (see Makefile for others)
+go test ./services/llm-api/...    # Service-level unit tests
+
+# Code generation
+(cd services/llm-api && make gormgen)   # Regenerate GORM queries after schema changes
+make swagger            # Rebuild Swagger docs for all services
+
+# Database helpers
+make db-console         # Open psql shell inside api-db
+make db-reset           # Drop + recreate llm-api database
+```
+
+---
+
+## Critical Checklist Before Pushing
+
+1. `go fmt ./...` in every service you touched.
+2. `go test ./...` (unit) and `make test-all` or the relevant jan-cli api-test suites if you changed APIs.
+3. `make swagger` if REST contracts changed.
+4. `(cd services/<name> && make gormgen)` if DB schemas changed.
+5. `.env`/secrets unchanged and never committed.
+6. Conventional commit message, CI passes locally (`make up-full && make health-check`).
+
+---
+
+## Need More Detail?
+- **Structure & file placement:** `architecture-patterns.md`
+- **Code patterns (DB, entities, errors):** `design-patterns.md`
+- **Daily workflow (git, CI/CD, deployment):** `workflow.md`
+
+Always keep docs and commands in sync with the Makefile, jan-cli, and the actual service directories. If a command does not exist locally, update the documentation first.
diff --git a/docs/conventions/design-patterns.md b/docs/conventions/design-patterns.md
new file mode 100644
index 00000000..faccd05d
--- /dev/null
+++ b/docs/conventions/design-patterns.md
@@ -0,0 +1,642 @@
+# Code Patterns & Best Practices
+
+> **When to read this:** Daily reference for AI agents and developers writing code.
+> 
+> **For structure:** See [architecture-patterns.md](architecture-patterns.md) 
+> **For workflow:** See [workflow.md](workflow.md) 
+> **Quick reference:** See [conventions.md](conventions.md)
+
+---
+
+## Table of Contents
+
+1. [Database Patterns](#database-patterns)
+2. [Error Handling](#error-handling)
+3. [Domain Entity Creation](#domain-entity-creation)
+4. [Performance Patterns](#performance-patterns)
+
+> **Path convention:** The examples below assume you are working inside a service directory such as `services/llm-api/`. Adjust paths accordingly for other services.
+
+---
+
+## Database Patterns
+
+### GORM Zero-Value Handling CRITICAL
+
+**Problem:** GORM's `.Save()` silently skips fields with zero values (`false`, `0`, `0.0`) to avoid overwriting database data with uninitialized struct fields.
+
+**Solution:** Use pointer types for fields that may legitimately be set to zero values.
+
+```go
+// Bad: Cannot set Enabled to false or Amount to 0.0
+type User struct {
+ BaseModel
+ Enabled bool `gorm:"not null;default:true"`
+ Amount float64 `gorm:"not null"`
+}
+
+// Good: Use pointers for zero-affected fields
+type User struct {
+ BaseModel
+ Enabled *bool `gorm:"not null;default:true"`
+ Amount *float64 `gorm:"not null"`
+}
+
+// Conversion pattern in NewSchemaUser() - create pointer from value
+func NewSchemaUser(u *user.User) *User {
+ enabled:= u.Enabled
+ amount:= u.Amount
+ return &User{
+ Enabled: &enabled, // Always non-nil pointer
+ Amount: &amount,
+ }
+}
+
+// Conversion pattern in EtoD() - dereference with nil-check
+func (u *User) EtoD() *user.User {
+ enabled:= false // Default value
+ if u.Enabled != nil {
+ enabled = *u.Enabled
+ }
+ amount:= 0.0 // Default value
+ if u.Amount != nil {
+ amount = *u.Amount
+ }
+ return &user.User{
+ Enabled: enabled, // Plain type in domain
+ Amount: amount,
+ }
+}
+```
+
+**When to use pointers:**
+- Boolean fields that need to be `false` (e.g., `Enabled`, `Active`, `IsPrivate`)
+- Numeric fields that can be `0` or `0.0` (e.g., `Amount`, `Credits`)
+- Fields that are always non-zero (e.g., IDs, timestamps)
+- Counters that only increment (e.g., `ViewCount`)
+
+**Why this works:** `*bool` zero value is `nil`, so `&false` is NOT a zero value -> GORM updates it 
+
+**Common scenarios fixed:**
+- Disabling API keys (`Enabled = false`)
+- Deactivating users/providers (`Active = false`)
+- Recording $0.00 transactions (`Amount = 0.0`)
+- Zero-credit operations (`Credits = 0`)
+
+---
+
+### Schema Definition Pattern
+
+```go
+// internal/infrastructure/database/dbschema/organization.go
+type Organization struct {
+ BaseModel // Must include BaseModel (ID, CreatedAt, UpdatedAt, DeletedAt)
+ PublicID string `gorm:"size:64;not null;uniqueIndex"`
+ Name string `gorm:"size:255;not null"`
+ Active *bool `gorm:"not null;default:true;index"` // Pointer for zero-value
+}
+
+func init() {
+ database.RegisterSchemaForAutoMigrate(Organization{})
+}
+
+// EtoD: Entity to Domain (method on schema struct)
+func (e *Organization) EtoD() *domain.Organization {
+ if e == nil {
+ return nil
+ }
+ active:= true // Default
+ if e.Active != nil {
+ active = *e.Active
+ }
+ return &domain.Organization{
+ ID: e.ID,
+ PublicID: e.PublicID,
+ Name: e.Name,
+ Active: active,
+ CreatedAt: e.CreatedAt,
+ UpdatedAt: e.UpdatedAt,
+ }
+}
+
+// NewSchemaOrganization: Domain to Entity (package-level function)
+func NewSchemaOrganization(d *domain.Organization) *Organization {
+ if d == nil {
+ return nil
+ }
+ active:= d.Active
+ return &Organization{
+ BaseModel: BaseModel{
+ ID: d.ID,
+ CreatedAt: d.CreatedAt,
+ UpdatedAt: d.UpdatedAt,
+ },
+ PublicID: d.PublicID,
+ Name: d.Name,
+ Active: &active,
+ }
+}
+```
+
+**Key points:**
+- `EtoD()` is a method (has receiver)
+- `NewSchema*()` is a function (no receiver)
+- Always nil-check to prevent panics
+- For pointers: create variable, then reference it
+
+---
+
+### Repository Pattern
+
+```go
+type OrganizationRepository struct {
+ db *transaction.Database
+}
+
+// Create returns the created entity with generated fields
+func (r *OrganizationRepository) Create(ctx context.Context, org *domain.Organization) (*domain.Organization, error) {
+ dbOrg:= dbschema.NewSchemaOrganization(org)
+ 
+ if err:= r.db.GetQuery(ctx).Organization.WithContext(ctx).Create(dbOrg); err != nil {
+ return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository,
+ platformerrors.ErrorTypeDatabaseError, "failed to create organization", err, 
+ "c1d2e3f4-a5b6-4c7d-8e9f-0a1b2c3d4e5f") // Unique UUID
+ }
+ 
+ return dbOrg.EtoD(), nil
+}
+
+// FindByID uses GORM gen for type-safe queries
+func (r *OrganizationRepository) FindByID(ctx context.Context, id string) (*domain.Organization, error) {
+ o:= r.db.GetQuery(ctx).Organization
+ dbOrg, err:= o.WithContext(ctx).Where(o.PublicID.Eq(id)).First()
+ 
+ if err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository,
+ platformerrors.ErrorTypeNotFound, "organization not found", err, 
+ "d4e5f6a7-b8c9-4d0e-1f2a-3b4c5d6e7f8a")
+ }
+ return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository,
+ platformerrors.ErrorTypeDatabaseError, "failed to find organization", err, 
+ "e7f8a9b0-c1d2-4e3f-4a5b-6c7d8e9f0a1b")
+ }
+ 
+ return dbOrg.EtoD(), nil
+}
+
+// Update uses.Save() - works correctly with pointer types
+func (r *OrganizationRepository) Update(ctx context.Context, org *domain.Organization) (*domain.Organization, error) {
+ dbOrg:= dbschema.NewSchemaOrganization(org)
+ 
+ if err:= r.db.GetQuery(ctx).Organization.WithContext(ctx).
+ Where(r.db.GetQuery(ctx).Organization.ID.Eq(org.ID)).
+ Save(dbOrg); err != nil {
+ return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository,
+ platformerrors.ErrorTypeDatabaseError, "failed to update organization", err, 
+ "f0a1b2c3-d4e5-4f6a-7b8c-9d0e1f2a3b4c")
+ }
+ 
+ return dbOrg.EtoD(), nil
+}
+
+// List with filters and pagination
+func (r *OrganizationRepository) List(ctx context.Context, filter *OrganizationFilter) ([]*domain.Organization, int64, error) {
+ o:= r.db.GetQuery(ctx).Organization
+ query:= o.WithContext(ctx)
+ 
+ // Apply filters
+ if filter.Active != nil {
+ query = query.Where(o.Active.Is(*filter.Active))
+ }
+ if filter.Name != nil {
+ query = query.Where(o.Name.Like("%" + *filter.Name + "%"))
+ }
+ 
+ // Count total
+ total, err:= query.Count()
+ if err != nil {
+ return nil, 0, platformerrors.NewError(ctx, platformerrors.LayerRepository,
+ platformerrors.ErrorTypeDatabaseError, "failed to count", err, "uuid-here")
+ }
+ 
+ // Apply pagination (cursor-based preferred)
+ if filter.Pagination != nil && filter.Pagination.LastID != "" {
+ query = query.Where(o.ID.Gt(filter.Pagination.LastID))
+ }
+ if filter.Pagination != nil && filter.Pagination.Limit > 0 {
+ query = query.Limit(filter.Pagination.Limit)
+ }
+ 
+ dbOrgs, err:= query.Find()
+ if err != nil {
+ return nil, 0, platformerrors.NewError(ctx, platformerrors.LayerRepository,
+ platformerrors.ErrorTypeDatabaseError, "failed to list", err, "uuid-here")
+ }
+ 
+ // Convert to domain
+ orgs:= make([]*domain.Organization, len(dbOrgs))
+ for i, dbOrg:= range dbOrgs {
+ orgs[i] = dbOrg.EtoD()
+ }
+ 
+ return orgs, total, nil
+}
+```
+
+**Filter signature:**
+```go
+type OrganizationFilter struct {
+ ID *uint
+ PublicID *string
+ Name *string
+ Active *bool
+ Pagination *PaginationFilter
+}
+
+type PaginationFilter struct {
+ Limit int
+ LastID string // For cursor-based pagination
+}
+```
+
+---
+
+### GORM Gen Usage
+
+```bash
+# After schema changes, regenerate queries
+cd services/llm-api
+make gormgen
+```
+
+**Generated queries** live in `services/llm-api/internal/infrastructure/database/gormgen/`
+
+**Type-safe queries:**
+```go
+// Good: Compile-time safe
+o:= query.Use(db).Organization
+orgs, err:= o.WithContext(ctx).
+ Where(o.Active.Is(true)).
+ Order(o.CreatedAt.Desc()).
+ Limit(100).
+ Find()
+
+// Bad: String-based (not type-safe)
+db.Where("active = ?", true).
+ Order("created_at DESC").
+ Find(&orgs)
+```
+
+---
+
+### Transactions
+
+```go
+// Use transaction wrapper
+err:= r.db.Transaction(func(tx *gorm.DB) error {
+ // All operations in this function are transactional
+ if err:= tx.Create(&org).Error; err != nil {
+ return err // Rolls back
+ }
+ 
+ if err:= tx.Create(&user).Error; err != nil {
+ return err // Rolls back
+ }
+ 
+ return nil // Commits
+})
+```
+
+---
+
+## Error Handling
+
+### Error Handling Philosophy
+
+**3-Layer Pattern:**
+1. **Repository (trigger point):** Use `NewError()` with unique UUID
+2. **Domain (business layer):** Use `AsError()` to add context OR pass through
+3. **Route (HTTP layer):** Use `HandleError()` or `HandleNewError()`
+
+### Trigger Point Pattern (Repository)
+
+```go
+func (r *UserRepository) FindByID(ctx context.Context, id string) (*domain.User, error) {
+ u:= query.Use(r.db.GetDB()).User
+ dbUser, err:= u.WithContext(ctx).Where(u.PublicID.Eq(id)).First()
+ 
+ if err != nil {
+ if errors.Is(err, gorm.ErrRecordNotFound) {
+ // NewError at trigger point with unique UUID
+ return nil, platformerrors.NewError(
+ ctx,
+ platformerrors.LayerRepository,
+ platformerrors.ErrorTypeNotFound,
+ "user not found",
+ err,
+ "3e47b618-b750-4064-9b22-ece9e244019d", // Generate unique UUID
+ )
+ }
+ return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository,
+ platformerrors.ErrorTypeDatabaseError, "database query failed", err, 
+ "7f29ac41-8d5e-4a73-b3c1-9e8f2d6a5c4b") // Different UUID per error
+ }
+ return dbUser.EtoD(), nil
+}
+```
+
+**UUID Generation:**
+```bash
+# VS Code: Install "UUID Generator" by netcorext
+# Command: Ctrl+Shift+P -> "Insert UUID"
+# CLI: uuidgen
+# Web: https://www.uuidgenerator.net/
+```
+
+### Domain Layer Pattern
+
+```go
+// Option 1: Add context with AsError()
+func (s *UserService) GetUser(ctx context.Context, id string) (*User, error) {
+ user, err:= s.repo.FindByID(ctx, id)
+ if err != nil {
+ return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, 
+ "failed to retrieve user")
+ }
+ return user, nil
+}
+
+// Option 2: Pass through (no additional context needed)
+func (s *UserService) GetUserSimple(ctx context.Context, id string) (*User, error) {
+ return s.repo.FindByID(ctx, id) // Just pass through
+}
+```
+
+### Route Layer Pattern
+
+```go
+// Use HandleError for errors from services
+func (r *UserRoute) GetUser(c *gin.Context) {
+ id:= c.Param("id")
+ 
+ user, err:= r.service.GetUser(c.Request.Context(), id)
+ if err != nil {
+ responses.HandleError(c, err) // Converts PlatformError to HTTP response
+ return
+ }
+ responses.Success(c, BuildUserResponse(user))
+}
+
+// Use HandleNewError for errors at route level
+func (r *UserRoute) CreateUser(c *gin.Context) {
+ var req CreateUserRequest
+ if err:= c.ShouldBindJSON(&req); err != nil {
+ responses.HandleNewError(c, platformerrors.LayerRoute,
+ platformerrors.ErrorTypeValidation, "invalid request", err)
+ return
+ }
+ 
+ user, err:= r.service.CreateUser(c.Request.Context(), req.ToDomain())
+ if err != nil {
+ responses.HandleError(c, err)
+ return
+ }
+ responses.Success(c, BuildUserResponse(user))
+}
+```
+
+---
+
+## Domain Entity Creation
+
+### Step-by-Step Pattern
+
+When creating a new entity (e.g., `organization`):
+
+#### 1. Domain Entity
+
+```go
+// internal/domain/organization/organization.go
+type Organization struct {
+ ID uint
+ PublicID string
+ Name string
+ Active bool
+ CreatedAt time.Time
+ UpdatedAt time.Time
+}
+
+// Entity validates itself
+func (o *Organization) Normalize() error {
+ o.Name = strings.TrimSpace(o.Name)
+ if o.Name == "" {
+ return errors.New("name required")
+ }
+ return nil
+}
+```
+
+#### 2. Domain Service
+
+```go
+// internal/domain/organization/organizationservice.go
+type OrganizationService struct {
+ repo *organizationrepo.OrganizationRepository
+ cache *organizationcache.OrganizationCache
+}
+
+// Work directly with domain entities
+func (s *OrganizationService) Create(ctx context.Context, org *Organization) (*Organization, error) {
+ if err:= org.Normalize(); err != nil {
+ return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain,
+ platformerrors.ErrorTypeValidation, "invalid organization", err, "uuid-here")
+ }
+ 
+ created, err:= s.repo.Create(ctx, org)
+ if err != nil {
+ return nil, err // Already wrapped at repository
+ }
+ 
+ return created, nil
+}
+```
+
+#### 3. Database Schema
+
+See [Schema Definition Pattern](#schema-definition-pattern) above
+
+#### 4. Repository
+
+See [Repository Pattern](#repository-pattern) above
+
+#### 5. HTTP Route
+
+```go
+// internal/interfaces/httpserver/routes/v1/management/organizations/route.go
+type OrganizationRoute struct {
+ service *organization.OrganizationService
+}
+
+func (r *OrganizationRoute) Create(c *gin.Context) {
+ var req CreateOrganizationRequest
+ if err:= c.ShouldBindJSON(&req); err != nil {
+ responses.HandleNewError(c, platformerrors.LayerRoute,
+ platformerrors.ErrorTypeValidation, "invalid request", err)
+ return
+ }
+ 
+ org, err:= r.service.Create(c.Request.Context(), req.ToDomain())
+ if err != nil {
+ responses.HandleError(c, err)
+ return
+ }
+ 
+ responses.Success(c, BuildOrganizationResponse(org))
+}
+```
+
+---
+
+## Performance Patterns
+
+### Avoid N+1 Queries
+
+```go
+// Bad: N+1 query problem
+users, _:= db.Find(&users)
+for _, user:= range users {
+ db.First(&profile, "user_id = ?", user.ID) // Query per user!
+}
+
+// Good: Preload relationships
+db.Preload("Profile").Find(&users)
+
+// Good: Use joins for filtering
+db.Joins("JOIN profiles ON profiles.user_id = users.id").
+ Where("profiles.verified = ?", true).
+ Find(&users)
+```
+
+### Cursor-Based Pagination
+
+```go
+// Good: Cursor-based (scales well)
+u:= query.Use(db).User
+users, err:= u.WithContext(ctx).
+ Where(u.ID.Gt(lastID)). // lastID from previous page
+ Limit(pageSize).
+ Find()
+
+// Acceptable for small datasets: Offset pagination
+users, err:= u.WithContext(ctx).
+ Offset(page * pageSize).
+ Limit(pageSize).
+ Find()
+```
+
+### Caching Pattern
+
+```go
+func (s *OrganizationService) GetByID(ctx context.Context, id string) (*Organization, error) {
+ // Try cache first
+ cacheKey:= fmt.Sprintf("org:%s", id)
+ if cached, err:= s.cache.Get(ctx, cacheKey); err == nil {
+ return cached, nil
+ }
+ 
+ // Cache miss: fetch from DB
+ org, err:= s.repo.FindByID(ctx, id)
+ if err != nil {
+ return nil, err
+ }
+ 
+ // Store in cache (fire and forget, don't block on cache errors)
+ go s.cache.Set(context.Background(), cacheKey, org, 5*time.Minute)
+ 
+ return org, nil
+}
+
+// Invalidate cache on updates
+func (s *OrganizationService) Update(ctx context.Context, org *Organization) (*Organization, error) {
+ updated, err:= s.repo.Update(ctx, org)
+ if err != nil {
+ return nil, err
+ }
+ 
+ // Invalidate cache
+ cacheKey:= fmt.Sprintf("org:%s", org.PublicID)
+ go s.cache.Delete(context.Background(), cacheKey)
+ 
+ return updated, nil
+}
+```
+
+### Context Timeouts
+
+```go
+// Set timeouts for external calls
+ctx, cancel:= context.WithTimeout(ctx, 10*time.Second)
+defer cancel()
+
+resp, err:= httpClient.Get(ctx, url)
+```
+
+### Batch Operations
+
+```go
+// Good: Batch insert
+db.CreateInBatches(users, 100) // Insert in batches of 100
+
+// Good: Bulk update with IN clause
+u:= query.Use(db).User
+u.WithContext(ctx).
+ Where(u.ID.In(ids...)).
+ Update(u.Active, false)
+```
+
+---
+
+## Common Patterns Reference
+
+### Request -> Domain Conversion
+
+```go
+type CreateOrganizationRequest struct {
+ Name string `json:"name" binding:"required"`
+}
+
+func (r *CreateOrganizationRequest) ToDomain() *organization.Organization {
+ return &organization.Organization{
+ Name: r.Name,
+ Active: true, // Default
+ }
+}
+```
+
+### Domain -> Response Conversion
+
+```go
+type OrganizationResponse struct {
+ ID string `json:"id"`
+ Name string `json:"name"`
+ Active bool `json:"active"`
+ CreateAt string `json:"created_at"`
+}
+
+func BuildOrganizationResponse(org *organization.Organization) *OrganizationResponse {
+ return &OrganizationResponse{
+ ID: org.PublicID,
+ Name: org.Name,
+ Active: org.Active,
+ CreatedAt: org.CreatedAt.Format(time.RFC3339),
+ }
+}
+```
+
+---
+
+**See also:**
+- [architecture-patterns.md](architecture-patterns.md) - Structure & layers
+- [workflow.md](workflow.md) - Git, testing, deployment
+- [conventions.md](conventions.md) - Quick TL;DR reference
diff --git a/docs/conventions/workflow.md b/docs/conventions/workflow.md
new file mode 100644
index 00000000..a40364b0
--- /dev/null
+++ b/docs/conventions/workflow.md
@@ -0,0 +1,205 @@
+# Development Workflow & Process
+
+> Daily workflow for working on Jan Server. These steps reflect the multi-service layout (`services/<service>/`) and the Makefile targets that exist today.
+
+---
+
+## Table of Contents
+
+1. [Environment & Local Setup](#environment--local-setup)
+2. [Git Workflow](#git-workflow)
+3. [Testing Strategy](#testing-strategy)
+4. [Code Generation](#code-generation)
+5. [Security Practices](#security-practices)
+6. [Logging Standards](#logging-standards)
+7. [Code Review Checklist](#code-review-checklist)
+8. [Common Commands](#common-commands)
+9. [Deployment Checklist](#deployment-checklist)
+
+---
+
+## Environment & Local Setup
+
+```bash
+# 1. Clone
+git clone https://github.com/janhq/jan-server.git
+cd jan-server
+
+# 2. Create .env and docker/.env, verify Docker, etc.
+make setup
+
+# 3. Start full stack (PostgreSQL, Keycloak, Kong, APIs, MCP)
+make up-full
+
+# 4. Check health
+make health-check
+```
+
+### Dev-Full Hybrid Workflow
+
+Use dev-full when you want to run a service natively with IDE tooling while the rest stay in Docker.
+
+```bash
+make dev-full                 # Boot stack with host.docker.internal routing
+./jan-cli.sh dev run llm-api  # macOS/Linux (stops container + runs host process)
+.\jan-cli.ps1 dev run llm-api # Windows PowerShell
+```
+
+Stop dev-full with `make dev-full-stop` (containers paused) or `make dev-full-down` (containers removed).
+
+### Environment Variables
+
+- `.env` (created by `make setup`) is the single source of truth. Copy from `.env.template` and set secrets (HF token, SERPER key, etc.).
+- `docker/.env` is automatically kept in sync so Docker Compose uses the same values.
+- When running a service natively, pass the environment via `jan-cli dev run <service> --env path/to/env` or export manually.
+
+---
+
+## Git Workflow
+
+### Branches & Commits
+
+- Branch format: `type/short-description` (e.g., `feat/dev-full-refresh`).
+- Use [Conventional Commits](https://www.conventionalcommits.org/) for messages (`feat:`, `fix:`, `docs:`, `test:`, `chore:`, `refactor:`, etc.).
+
+### Typical Flow
+
+```bash
+git checkout -b feat/new-flow
+# ...edit files...
+go fmt ./...
+go test ./services/llm-api/...
+make test-all
+
+git add <files>
+git commit -m "feat(llm-api): add new flow"
+git push origin feat/new-flow
+```
+
+### Pull Requests
+
+1. Keep PRs focused on one change.
+2. Mention any manual testing performed (e.g., `make test-all`, `make dev-full`, curl commands).
+3. Update documentation when commands/configuration change.
+4. Wait for CI (GitHub Actions) to pass before merging.
+
+---
+
+## Testing Strategy
+
+| Scope | Command |
+|-------|---------|
+| Unit tests (per service) | `go test ./services/<service>/...` |
+| Full jan-cli api-test suite | `make test-all` |
+| Focused integration suites | `make test-auth`, `make test-conversations`, `make test-media`, `make test-mcp-integration`, etc. |
+| Health checks | `make health-check` |
+
+Guidelines:
+- Run the service-level Go tests before committing.
+- Run the relevant jan-cli api-test collection (or `make test-all`) when changing API routes, Kong config, or MCP tools.
+- For MCP-only work, `make test-mcp-integration` plus manual curl checks against `http://localhost:8000/mcp`.
+
+---
+
+## Code Generation
+
+| Task | Command |
+|------|---------|
+| Swagger docs (all services) | `make swagger` |
+| GORM queries (llm-api) | `cd services/llm-api && make gormgen` |
+| Other services | Add service-local generators if needed (follow llm-api example) |
+
+Run generators whenever you change schema structs or API contracts, then commit the generated files.
+
+---
+
+## Security Practices
+
+- Secrets (`HF_TOKEN`, `SERPER_API_KEY`, `BACKEND_CLIENT_SECRET`, etc.) stay in `.env` only.
+- Never log access tokens, API keys, or PII. Use structured logging fields (`logger.With(...)`) instead.
+- Kong + Keycloak enforce JWT/API key validation�never bypass them in service routes.
+- Use HTTPS when transmitting secrets externally; assume `.env` values will be different in production.
+
+---
+
+## Logging Standards
+
+- Structured logs with `logger.With()` and key/value pairs.
+- Include `requestID` (propagated via middleware) in every log and error response.
+- Use log levels consistently: `Debug` for development noise, `Info` for state changes, `Warn` for recoverable issues, `Error` for failures.
+- Do not log request/response bodies unless behind a debug flag.
+
+---
+
+## Code Review Checklist
+
+**Architecture**
+- Domain packages contain business rules only.
+- Infrastructure code is injected and has no HTTP imports.
+- Routes remain thin; DTOs convert to domain structs immediately.
+
+**Database**
+- Schema structs use pointers for zero-value fields.
+- `EtoD()` / `DtoE()` cover nil vs default conversion.
+- Repositories rely on GORM-gen query builders.
+
+**Errors & Logging**
+- Trigger points create `platformerrors.NewError()` instances with UUIDs.
+- Errors bubble through `AsError()` and are rendered with `responses.HandleError()`.
+- request IDs logged and returned.
+
+**Testing**
+- Unit tests cover new code paths.
+- Integration/jan-cli api-test suites updated if APIs change.
+- Feature flags or env toggles documented.
+
+**Security**
+- No credentials committed or logged.
+- Input validation present on HTTP DTOs.
+- Kong/Keycloak configs updated if auth flow changes.
+
+**Docs**
+- README/guides updated when commands or endpoints change.
+- Swagger re-generated for API changes.
+
+---
+
+## Common Commands
+
+```bash
+# Services & infrastructure
+make up-full            # Start everything via Docker
+make down               # Stop and remove containers (keeps volumes)
+make dev-full           # Start Docker stack with host routing
+make dev-full-down      # Remove dev-full containers
+make monitor-up         # Observability stack
+make monitor-clean      # Remove monitoring volumes
+
+# Database helpers
+make db-console         # psql shell into api-db
+make db-reset           # Drop & recreate llm-api database
+
+# Testing
+make test-all           # All jan-cli api-test suites
+make test-auth          # Focused collection
+make test-mcp-integration
+
+# Lint/format (run inside the service module)
+go fmt ./...
+go test ./services/<service>/...
+```
+
+---
+
+## Deployment Checklist
+
+- [ ] `go fmt ./...`
+- [ ] `go test ./services/<service>/...`
+- [ ] `make test-all` (or relevant suites)
+- [ ] `make swagger` (if API contracts changed)
+- [ ] `(cd services/llm-api && make gormgen)` (if schemas changed)
+- [ ] `.env` changes documented but not committed
+- [ ] Docker/Kubernetes manifests updated if ports/env vars changed
+- [ ] PR reviewed and CI green
+
+Once everything passes, follow the platform release process (merge to `main`, tag if needed, then promote via the deployment pipeline).
diff --git a/docs/getting-started/README.md b/docs/getting-started/README.md
new file mode 100644
index 00000000..b8145f81
--- /dev/null
+++ b/docs/getting-started/README.md
@@ -0,0 +1,315 @@
+# Getting Started with Jan Server
+
+Welcome! This guide will help you get Jan Server up and running in minutes.
+
+> **Note:** This guide covers Docker Compose setup for local development. For Kubernetes deployment (production/staging), see:
+> - [Kubernetes Setup Guide](../../k8s/SETUP.md) - Complete step-by-step Kubernetes deployment
+> - [Deployment Guide](../guides/deployment.md) - All deployment options (Kubernetes, Docker Compose, Hybrid)
+
+## Prerequisites
+
+Before you begin, ensure you have:
+
+- **Docker Desktop** (Windows/macOS) or **Docker Engine + Docker Compose** (Linux)
+- **Make** (usually pre-installed on macOS/Linux, [install on Windows](https://gnuwin32.sourceforge.net/packages/make.htm))
+- **Git**
+- At least 8GB RAM available
+- For GPU inference: NVIDIA GPU with CUDA support
+
+Optional (for development):
+- Go 1.21+ 
+- Go 1.23+ (for jan-cli api-test)
+
+## Quick Setup
+
+### 1. Clone the Repository
+
+```bash
+git clone https://github.com/janhq/jan-server.git
+cd jan-server
+```
+
+### 2. Run the Setup Wizard (Recommended)
+
+```bash
+make quickstart
+```
+
+`make quickstart` launches the `jan-cli` wizard. It prompts for your LLM provider (local vLLM vs remote API), MCP search provider, and Media API preference, then writes `.env` plus `config/secrets.env`. When configuration finishes it automatically starts Docker Compose. Re-run the command anytime to update settings (answer **Y** when asked to overwrite `.env`).
+
+### Manual configuration (if you cannot run the wizard)
+
+```bash
+# Copy templates
+cp .env.template .env
+cp config/secrets.env.example config/secrets.env
+
+# Edit with your values
+nano .env
+nano config/secrets.env
+
+# Populate defaults and validate
+make setup
+```
+
+`make setup` uses `jan-cli` in non-interactive mode to check dependencies, ensure directories exist, and pull base images.
+
+**Configuration details:**
+- Canonical defaults live in `config/defaults.yaml` (generated from Go structs)
+- Secrets belong in `config/secrets.env` (copied from `config/secrets.env.example`)
+- Environment templates (Docker/Kubernetes) are documented in [Configuration System](../configuration/README.md)
+
+### 3. Start Services (skip if quickstart already did this)
+
+```bash
+# Start full stack (CPU inference)
+make up-full
+
+# Optional: start monitoring stack
+make monitor-up
+```
+
+Wait for all services to start (30-60 seconds). You can monitor progress with:
+```bash
+make logs
+```
+
+### 5. Verify Installation
+
+```bash
+make health-check
+```
+
+You should see all services reporting as healthy.
+
+## Access Services
+
+Once running, you can access:
+
+| Service | URL | Credentials |
+|---------|-----|-------------|
+| **API Gateway** | http://localhost:8000 | - |
+| **API Documentation** | http://localhost:8000/v1/swagger/ | - |
+| **LLM API** | http://localhost:8080 | `Authorization: Bearer <token>` |
+| **Response API** | http://localhost:8082 | `Authorization: Bearer <token>` |
+| **Media API** | http://localhost:8285 | `Authorization: Bearer <token>` |
+| **MCP Tools** | http://localhost:8091 | `Authorization: Bearer <token>` |
+| **Keycloak Console** | http://localhost:8085 | admin/admin |
+| **Grafana Dashboards** | http://localhost:3331 | admin/admin (after `make monitor-up`) |
+| **Prometheus** | http://localhost:9090 | - (after `make monitor-up`) |
+| **Jaeger Tracing** | http://localhost:16686 | - (after `make monitor-up`) |
+
+## Your First API Call
+
+### 1. Get a Guest Token via Kong
+
+```bash
+curl -X POST http://localhost:8000/llm/auth/guest-login
+```
+
+All traffic to `http://localhost:8000` flows through the Kong gateway, which validates Keycloak-issued JWTs or API keys (use `Authorization: Bearer <token>` or `X-API-Key: sk_*` headers).
+
+Response:
+```json
+{
+ "access_token": "eyJhbGci...",
+ "refresh_token": "eyJhbGci...",
+ "expires_in": 300
+}
+```
+
+### 2. Make a Chat Completion Request
+
+```bash
+curl -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer YOUR_ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "jan-v1-4b",
+ "messages": [
+ {"role": "user", "content": "What is the capital of France?"}
+ ],
+ "stream": false
+ }'
+```
+
+### 3. Try Streaming
+
+```bash
+curl -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer YOUR_ACCESS_TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model": "jan-v1-4b",
+ "messages": [
+ {"role": "user", "content": "Tell me a short story"}
+ ],
+ "stream": true
+ }'
+```
+
+### 4. Use MCP Tools
+
+```bash
+# List available tools
+curl -X POST http://localhost:8000/v1/mcp \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "id": 1,
+ "method": "tools/list"
+ }'
+
+# Google search
+curl -X POST http://localhost:8000/v1/mcp \
+ -H "Content-Type: application/json" \
+ -d '{
+ "jsonrpc": "2.0",
+ "id": 2,
+ "method": "tools/call",
+ "params": {
+ "name": "google_search",
+ "arguments": {
+ "q": "latest AI news"
+ }
+ }
+ }'
+```
+
+## Enable Monitoring (Optional)
+
+To enable full observability stack:
+
+```bash
+make monitor-up
+```
+
+Access:
+- **Grafana**: http://localhost:3331 (admin/admin)
+- **Prometheus**: http://localhost:9090
+- **Jaeger**: http://localhost:16686
+
+## Common Commands
+
+```bash
+# View logs
+make logs        # All services
+make logs-api    # API profile (LLM, Response, Media)
+make logs-mcp    # MCP Tools profile
+
+# Check status
+make health-check # Hit health endpoints
+docker compose ps # Container status
+
+# Restart services
+make restart-full   # Restart everything
+make restart-api    # Restart API profile
+
+# Stop services
+make down       # Stop all containers (keeps volumes)
+make down-clean # Stop containers and remove volumes
+```
+
+## Troubleshooting
+
+### Services won't start
+
+```bash
+# Check Docker
+docker --version
+docker compose version
+
+# Check status
+make health-check
+docker compose ps
+
+# View errors
+make logs
+
+# Full reset
+make down
+make down-clean
+make setup
+make up-full
+```
+
+### Port conflicts
+
+If you get port binding errors:
+
+```bash
+# Check what's using ports
+# Windows PowerShell:
+netstat -ano | findstr "8000 8080 8085"
+
+# macOS/Linux:
+lsof -i:8000
+lsof -i:8080
+lsof -i:8085
+
+# Kill conflicting processes or change ports in .env
+```
+
+### vLLM GPU issues
+
+```bash
+# Verify GPU availability
+docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi
+```
+
+If no GPU is detected:
+- Rerun `make quickstart` and choose the remote API option (skips local vLLM)
+- Or run `make up-vllm-cpu` to start the CPU-only vLLM profile when testing locally
+
+### Database connection errors
+
+```bash
+# Reset database
+make db-reset
+
+# Check database logs
+docker compose logs api-db
+
+# Verify connection
+make db-console
+```
+
+### API returns 401 Unauthorized
+
+- Check token hasn't expired (default: 5 minutes)
+- Get new guest token: `curl -X POST http://localhost:8000/llm/auth/guest-login`
+- Check `Authorization: Bearer <token>` header is set
+
+## What's Next?
+
+Now that you have Jan Server running:
+
+1. **Explore the API**:
+ - [API Reference](../api/README.md)
+ - [API Examples](../api/llm-api/examples.md)
+ - [Swagger UI](http://localhost:8000/v1/swagger/)
+
+2. **Learn Development**:
+ - [Development Guide](../guides/development.md)
+ - [Hybrid Mode](../guides/hybrid-mode.md) (recommended for development)
+ - [Testing Guide](../guides/testing.md)
+
+3. **Understand Architecture**:
+ - [Architecture Overview](../architecture/README.md)
+ - [System Design](../architecture/system-design.md)
+ - [Security Model](../architecture/security.md)
+
+4. **Deploy to Production**:
+ - [Deployment Guide](../guides/deployment.md)
+ - [Monitoring Guide](../guides/monitoring.md)
+
+## Need Help?
+
+- [Full Documentation](../README.md)
+- [Report Issues](https://github.com/janhq/jan-server/issues)
+- [Discussions](https://github.com/janhq/jan-server/discussions)
+- [Troubleshooting Guide](../guides/troubleshooting.md)
+
+---
+
+**Quick Reference**: `make help` | **All Commands**: `make help-all`
diff --git a/docs/guides/README.md b/docs/guides/README.md
new file mode 100644
index 00000000..af31df6b
--- /dev/null
+++ b/docs/guides/README.md
@@ -0,0 +1,122 @@
+# Guides
+
+Comprehensive how-to guides for working with Jan Server.
+
+## Available Guides
+
+### Development
+- **[Development Guide](development.md)** - Complete development workflow, setup, and best practices
+- **[VS Code Guide](ide/vscode.md)** - VS Code debugging, tasks, and environment configuration
+- **[Hybrid Mode](hybrid-mode.md)** - Run services natively for faster iteration and debugging
+- **[Testing Guide](testing.md)** - Unit tests, integration tests, and testing best practices
+
+### IDE Setup
+- **[VS Code](ide/vscode.md)** - Complete VS Code configuration, debugging, and tasks
+
+### Operations
+- **[Monitoring](monitoring.md)** - Observability, metrics, traces, and dashboards
+- **[Deployment](deployment.md)** - Kubernetes, Docker Compose, and hybrid deployment strategies
+- **[Troubleshooting](troubleshooting.md)** - Common issues and solutions
+
+### Special Topics
+- **[MCP Testing](mcp-testing.md)** - Testing MCP (Model Context Protocol) integration
+
+## Quick Links
+
+### For Developers
+| Task | Guide |
+|------|-------|
+| Setup local environment | [Development Guide](development.md) |
+| Debug with VS Code | [VS Code Guide](ide/vscode.md) |
+| Run services natively | [Hybrid Mode](hybrid-mode.md) |
+| Write and run tests | [Testing Guide](testing.md) |
+| Debug issues | [Troubleshooting](troubleshooting.md) |
+
+### For DevOps
+| Task | Guide |
+|------|-------|
+| Deploy to production | [Deployment Guide](deployment.md) |
+| Setup monitoring | [Monitoring](monitoring.md) |
+| Troubleshoot issues | [Troubleshooting](troubleshooting.md) |
+
+### For QA
+| Task | Guide |
+|------|-------|
+| Run integration tests | [Testing Guide](testing.md) |
+| Test MCP tools | [MCP Testing](mcp-testing.md) |
+
+## Common Tasks
+
+### Development Workflow
+```bash
+# 1. Setup development environment (.env + docker/.env)
+make setup
+
+# 2. Start everything in Docker
+make up-full
+
+# 3. Switch a service to native mode (optional)
+docker compose stop llm-api
+./jan-cli.sh dev run llm-api   # macOS/Linux
+.\jan-cli.ps1 dev run llm-api  # Windows
+
+# 4. Run automated tests
+make test-all                  # jan-cli api-test integration suites
+go test ./services/llm-api/... # Unit tests from source
+```
+
+Use [Development Guide](development.md) for the end-to-end workflow and [Dev-Full Mode](dev-full-mode.md) when you need host-native debugging.
+
+### Testing Workflow
+
+```bash
+# Integration tests (runs all Postman collections)
+make test-all
+
+# Specific test suites
+make test-auth
+make test-conversations
+make test-mcp
+
+# Unit tests from source
+go test ./...
+```
+
+See [Testing Guide](testing.md) for details.
+
+### Monitoring Setup
+
+```bash
+# Start monitoring stack
+make monitor-up
+
+# Access dashboards
+# - Grafana: http://localhost:3331
+# - Prometheus: http://localhost:9090
+# - Jaeger: http://localhost:16686
+
+# View logs
+make monitor-logs
+```
+
+See [Monitoring Guide](monitoring.md) for details.
+
+## Getting Help
+
+Each guide includes:
+- Step-by-step instructions
+- Code examples
+- Common pitfalls
+- Troubleshooting tips
+- Related resources
+
+### Need More Help?
+
+- Check the [Troubleshooting Guide](troubleshooting.md)
+- Review [Architecture Documentation](../architecture/README.md)
+- See [API Reference](../api/README.md)
+- Ask in [GitHub Discussions](https://github.com/janhq/jan-server/discussions)
+
+---
+
+**Back to**: [Documentation Home](../README.md) | **Next**: Choose a guide above
diff --git a/docs/guides/authentication.md b/docs/guides/authentication.md
new file mode 100644
index 00000000..eb028eff
--- /dev/null
+++ b/docs/guides/authentication.md
@@ -0,0 +1,83 @@
+# Authentication & Gateway
+
+This guide describes the Kong + Keycloak solution that fronts every `/llm/*` request in Jan Server. The implementation uses Kong OSS plugins (`jwt` + `keycloak-apikey`) so the edge accepts Keycloak-issued JWTs or scoped API keys before requests reach the microservices.
+
+## 1. Architectural Overview
+
+- **Kong gateway** (`http://localhost:8000`) is the sole public endpoint for Jan Server. Every API (LLM, Response, Media, MCP, auth) is exposed through Kong routes that perform JWT/API-key validation, rate limiting, request transformation, and header sanitation.
+- **Keycloak** (realm `jan`) issues OAuth2/OIDC tokens. Services and Kong both depend on the Keycloak JWKS endpoint (`http://keycloak:8085/realms/jan/protocol/openid-connect/certs`) for signature validation.
+- **LLM API** is responsible for guest onboarding, API key lifecycle endpoints, and the `/auth/validate-api-key` callback consumed by the Kong plugin.
+- **Custom auth plugin** (`keycloak-apikey`) replaces Kong consumers/credentials in DB-less mode by delegating API key validation to the service layer.
+
+## 2. Kong Authentication Flow
+
+| Plugin | Purpose | Key config |
+| ------ | ------- | ---------- |
+| `jwt` | Validates Keycloak JWTs | `key_claim_name: iss`, `claims_to_verify: ["exp","nbf"]`, `maximum_expiration: 3600`, `anonymous: kong-anon-jwt`, `secret_is_base64: false`, `run_on_preflight: false` |
+| `keycloak-apikey` | Validates API keys via LLM API | `validation_url: http://llm-api:8080/auth/validate-api-key`, `hide_credentials: true`, `validation_timeout: 5000`, `run_on_preflight: false` |
+| `request-termination` (anonymous fallback) | Returns 401 when neither plugin runs |
+
+The routes define **OR logic**: requests are accepted if either the JWT or API key plugin succeeds. Kong also injects `X-Auth-Method` (value `jwt` or `apikey`) and user context headers (`X-User-ID`, `X-User-Subject`, `X-User-Email`, `X-User-Username`) so downstream services know who authenticated the call.
+
+### Flowchart
+
+```
+Client
+ +--> Kong Gateway (`/llm/*`)
+ +-- JWT Plugin (Keycloak)
+ | +--> Valid token -> Add `X-Auth-Method: jwt`, inject user headers -> Upstream
+ +-- API Key Plugin (`keycloak-apikey`)
+ | +--> Forward `X-API-Key` to `llm-api/auth/validate-api-key`
+ | +--> LLM API hashes key, consults Keycloak -> Valid -> Inject headers + `X-Auth-Method: apikey`
+ +-- Request-termination (fallback) -> Return 401
+```
+
+## 3. Guest Tokens
+
+- **Endpoint**: `POST /llm/auth/guest-login` exposed through Kong (`/llm/auth/guest-login` route). This endpoint creates a temporary Keycloak user and returns `access_token`, `refresh_token`, and metadata. Guest tokens are meant for quick local testing; they honor rate limits and expire around 5 minutes.
+- **Temporary Email**: Guest users are automatically assigned a temporary email in the format `guest-{uuid}@temp.jan.ai` to satisfy Keycloak's email requirements. This temporary email is replaced with the real email when the guest account is upgraded via `POST /auth/upgrade`.
+- **Usage**: Include `Authorization: Bearer <token>` on `/v1/*` calls or sent via Kong using `curl -X POST http://localhost:8000/llm/auth/guest-login`. Kong forwards the request to `llm-api` and enforces the auth plugin (JWT may succeed immediately after issuance).
+- **Upgrade**: Call `POST /auth/upgrade` with the guest token to convert to a permanent account. The upgrade endpoint overwrites the temporary email with a real email and marks it as verified, and changes the `guest` attribute from `true` to `false`.
+- **Refresh**: Call `/llm/auth/refresh-token` or rely on Kong's JWT verification for new tokens in production flows.
+
+## 4. API Key Lifecycle
+
+- **Format**: Keys use the `sk_` prefix plus 32 random characters. The shared secret is shown only once (on creation). Services store only the SHA-256 hash inside Keycloak user attributes and PostgreSQL (`api_keys` table from `000001_init_schema.up.sql`).
+- **Endpoints** (require JWT auth):
+ - `POST /auth/api-keys` - Create a new API key tied to the authenticated user.
+ - `GET /auth/api-keys` - List active keys for the calling user.
+ - `DELETE /auth/api-keys/{id}` - Revoke a key.
+ - `POST /auth/validate-api-key` - Public validation endpoint called by Kong's plugin.
+- **Validation Flow**:
+ 1. Kong receives `X-API-Key` from the client.
+ 2. `keycloak-apikey` calls `http://llm-api:8080/auth/validate-api-key`.
+ 3. LLM API hashes the key, compares it against Keycloak attributes, and responds with user data (or `401` when invalid).
+ 4. Kong injects user headers and marks the request as authenticated (can now enforce rate limits per consumer).
+
+## 5. Keycloak Integration Notes
+
+- **JWKS**: The Kong `jwt` plugin fetches the Keycloak JWKS manually (no dynamic JWKS refresh). Rotate Keycloak signing keys via a manual Kong restart or redeploy the gateway.
+- **Admin API**: Credentials (JWT secrets) live only in the Kong Admin API and are never committed to Git. The gateway does not create consumers dynamically in DB-less mode, which keeps configuration declarative (`kong.yml`).
+- **Guest users**: Each guest login request creates a temporary Keycloak user with a temporary email (`guest-{uuid}@temp.jan.ai`) and the `guest` attribute set to `true`. These users can be upgraded to permanent accounts via `/auth/upgrade`, which replaces the temporary email with a real one and toggles the `guest` flag to `false`. Upgrade and refresh flows use the same `jan` realm policies as regular users.
+
+## 6. Environment & Deployment Guidance
+
+- **Overlays**: Use environment-specific Kong overlays (`docker`, `k8s/jan-server/templates`, etc.) to toggle TLS verification (`ssl_verify: false` in development, `true` plus CA bundles in staging/prod).
+- **Rate limiting**: Kong enforces per-IP limits at the gateway plus per-consumer bucketed limits where a consumer is resolved either from JWT claims (`iss` -> `keycloak-issuer`) or from API key metadata.
+- **Plugin loading**: Custom `keycloak-apikey` code lives in `kong/plugins/keycloak-apikey/` (handler + schema + README). Compose mounts `../kong/plugins:/usr/local/kong/plugins:ro` and sets `KONG_PLUGINS: bundled,keycloak-apikey`.
+- **Credentials**: The plugin uses `hide_credentials: true` so backend services never see the raw `X-API-Key`.
+
+## 7. Observability & Follow-up
+
+- **Metrics**: Expose plugin-specific stats for auth method usage and failure reasons. Consider adding Redis caching for `validate-api-key` responses to reduce latency.
+- **Logging**: Kong logs record which plugin succeeded; look for `X-Auth-Method` in `request-transformer`-injected headers.
+- **Tests**: jan-cli api-test suites verify `/auth/api-keys`, `/llm/auth/guest-login`, and the `validate-api-key` call. Run `make test-auth` in development.
+
+## 8. Security Hardening Summary
+
+- **Hashed secrets**: API key secrets are hashed with SHA-256 and stored inside Keycloak user attributes to avoid storing plaintext tokens.
+- **Single-use visibility**: Keys are shown only once (creation response) to prevent accidental leaks.
+- **Fallback response**: `request-termination` returns `401` when neither plugin authenticates, preventing unauthenticated requests from reaching services.
+- **Anonymous consumer**: The `kong-anon-jwt` anonymous consumer is configured purely for the OR logic gate; it has no access beyond the gateway.
+
+This document replaces the implementation roadmap formerly captured in `auth-todo.md`. Keep it updated whenever you add authentication routes, adjust Kong plugins, or change Keycloak realms.
diff --git a/docs/guides/background-mode.md b/docs/guides/background-mode.md
new file mode 100644
index 00000000..44b1b98e
--- /dev/null
+++ b/docs/guides/background-mode.md
@@ -0,0 +1,419 @@
+# Background Mode Implementation
+
+## Overview
+
+The Response API now supports OpenAI-compatible background mode for asynchronous response generation. This allows clients to submit long-running requests without holding open HTTP connections.
+
+## Architecture
+
+### Components
+
+1. **PostgreSQL-backed Queue**: Uses the `responses` table with `SELECT FOR UPDATE SKIP LOCKED` for reliable task distribution
+2. **Worker Pool**: Fixed-size pool of background workers (default: 4) that poll for queued tasks
+3. **Webhook Notifications**: HTTP POST callbacks when tasks complete or fail
+4. **Graceful Cancellation**: Queued tasks can be cancelled before execution begins
+
+### Task Lifecycle
+
+```
+Client Request (background=true, store=true)
+    ↓
+Create Response (status=queued, queued_at=now)
+    ↓
+Return Response Immediately
+    ↓
+Worker Dequeues Task
+    ↓
+Mark Processing (status=in_progress, started_at=now)
+    ↓
+Execute LLM Orchestration
+    ↓
+Update Status (completed/failed, completed_at=now)
+    ↓
+Send Webhook Notification (async, non-blocking)
+```
+
+## API Usage
+
+### Creating a Background Response
+
+**Request:**
+```http
+POST /responses
+Content-Type: application/json
+Authorization: Bearer <token>
+
+{
+  "model": "gpt-4",
+  "input": "Write a long article about...",
+  "background": true,
+  "store": true,
+  "metadata": {
+    "webhook_url": "https://example.com/webhooks/responses"
+  }
+}
+```
+
+**Response (201 Created):**
+```json
+{
+  "id": "resp_abc123",
+  "object": "response",
+  "status": "queued",
+  "background": true,
+  "store": true,
+  "queued_at": "2024-01-15T10:30:00Z",
+  "created": "2024-01-15T10:30:00Z",
+  "metadata": {
+    "webhook_url": "https://example.com/webhooks/responses"
+  }
+}
+```
+
+### Polling for Status
+
+**Request:**
+```http
+GET /responses/resp_abc123
+Authorization: Bearer <token>
+```
+
+**Response (In Progress):**
+```json
+{
+  "id": "resp_abc123",
+  "status": "in_progress",
+  "started_at": "2024-01-15T10:30:05Z",
+  ...
+}
+```
+
+**Response (Completed):**
+```json
+{
+  "id": "resp_abc123",
+  "status": "completed",
+  "output": "The article...",
+  "usage": {
+    "prompt_tokens": 150,
+    "completion_tokens": 500,
+    "total_tokens": 650
+  },
+  "started_at": "2024-01-15T10:30:05Z",
+  "completed_at": "2024-01-15T10:35:22Z",
+  ...
+}
+```
+
+### Cancelling a Background Task
+
+**Request:**
+```http
+POST /responses/resp_abc123/cancel
+Authorization: Bearer <token>
+```
+
+**Response:**
+```json
+{
+  "id": "resp_abc123",
+  "status": "cancelled",
+  "cancelled_at": "2024-01-15T10:31:00Z",
+  ...
+}
+```
+
+**Cancellation Behavior:**
+- If status is `queued`: Immediately marks cancelled, prevents worker pickup
+- If status is `in_progress`: Marks cancelled, but task may complete normally (cooperative cancellation)
+- If status is `completed` or `failed`: No-op, returns current state
+
+## Webhook Notifications
+
+### Webhook Payload (Completed)
+
+```json
+{
+  "id": "resp_abc123",
+  "event": "response.completed",
+  "status": "completed",
+  "output": "The response content...",
+  "metadata": {...},
+  "completed_at": "2024-01-15T10:35:22Z"
+}
+```
+
+### Webhook Payload (Failed)
+
+```json
+{
+  "id": "resp_abc123",
+  "event": "response.failed",
+  "status": "failed",
+  "error": {
+    "code": "execution_failed",
+    "message": "LLM provider timeout"
+  },
+  "metadata": {...}
+}
+```
+
+### Webhook Delivery
+
+- **Method**: HTTP POST
+- **Content-Type**: `application/json`
+- **Headers**:
+  - `User-Agent: jan-response-api/1.0`
+  - `X-Jan-Event: response.completed` (or `response.failed`)
+  - `X-Jan-Response-ID: resp_abc123`
+- **Retries**: Up to 3 attempts with 2-second delays
+- **Timeout**: 10 seconds per attempt
+- **Non-blocking**: Webhook failures are logged but don't affect task completion
+
+## Configuration
+
+### Environment Variables
+
+```bash
+# Background Task Processing
+BACKGROUND_WORKER_COUNT=4            # Number of concurrent workers
+BACKGROUND_TASK_TIMEOUT=600s         # Max execution time per task
+BACKGROUND_POLL_INTERVAL=2s          # How often workers poll for tasks
+
+# Webhook Configuration
+WEBHOOK_TIMEOUT=10s                  # HTTP request timeout
+WEBHOOK_MAX_RETRIES=3                # Number of retry attempts
+WEBHOOK_RETRY_DELAY=2s               # Delay between retries
+```
+
+### Recommended Settings
+
+**Development:**
+- `BACKGROUND_WORKER_COUNT=2`
+- `BACKGROUND_TASK_TIMEOUT=300s`
+
+**Production:**
+- `BACKGROUND_WORKER_COUNT=8`
+- `BACKGROUND_TASK_TIMEOUT=600s`
+- Monitor queue depth and adjust worker count as needed
+
+## Constraints
+
+1. **Store Requirement**: `background=true` requires `store=true`
+   - Returns `400 Bad Request` if violated
+   - Rationale: Background responses must be retrievable later
+
+2. **No Streaming**: Background mode never streams
+   - `stream` parameter is ignored for background tasks
+   - Rationale: Client receives response immediately, cannot consume stream
+
+3. **Task Timeout**: Tasks exceeding `BACKGROUND_TASK_TIMEOUT` are terminated
+   - Status marked as `failed` with timeout error
+   - Webhook notification sent
+
+## Database Schema
+
+### New Fields in `responses` Table
+
+```sql
+-- Indicates if response was created in background mode
+background BOOLEAN NOT NULL DEFAULT FALSE;
+
+-- Indicates if response should be stored (required for background)
+store BOOLEAN NOT NULL DEFAULT FALSE;
+
+-- Timestamp when task was queued
+queued_at TIMESTAMP;
+
+-- Timestamp when worker began processing
+started_at TIMESTAMP;
+
+-- Index for efficient queue queries
+CREATE INDEX idx_responses_status ON responses(status) WHERE background = TRUE;
+```
+
+### Queue Query
+
+Workers use this query to dequeue tasks:
+
+```sql
+SELECT * FROM responses
+WHERE status = 'queued'
+  AND background = TRUE
+ORDER BY queued_at ASC
+LIMIT 1
+FOR UPDATE SKIP LOCKED;
+```
+
+## Monitoring
+
+### Key Metrics
+
+1. **Queue Depth**: Count of tasks with `status='queued'`
+   ```sql
+   SELECT COUNT(*) FROM responses WHERE status = 'queued' AND background = TRUE;
+   ```
+
+2. **Average Processing Time**: 
+   ```sql
+   SELECT AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
+   FROM responses
+   WHERE status IN ('completed', 'failed')
+     AND background = TRUE
+     AND started_at IS NOT NULL;
+   ```
+
+3. **Worker Utilization**:
+   ```sql
+   SELECT COUNT(*) FROM responses WHERE status = 'in_progress' AND background = TRUE;
+   ```
+
+4. **Failure Rate**:
+   ```sql
+   SELECT 
+     COUNT(CASE WHEN status = 'failed' THEN 1 END) * 100.0 / COUNT(*) as failure_rate
+   FROM responses
+   WHERE background = TRUE AND status IN ('completed', 'failed');
+   ```
+
+### Logging
+
+Workers log structured events:
+
+```json
+{
+  "level": "info",
+  "component": "worker",
+  "worker_id": 2,
+  "response_id": "resp_abc123",
+  "user_id": "user_xyz",
+  "model": "gpt-4",
+  "message": "processing background task"
+}
+```
+
+## Error Handling
+
+### Common Errors
+
+| Error | HTTP Status | Description |
+|-------|-------------|-------------|
+| Missing Store | 400 | `background=true` without `store=true` |
+| Task Timeout | 500 | Task exceeded `BACKGROUND_TASK_TIMEOUT` |
+| LLM Provider Error | 500 | Upstream LLM API failure |
+| Tool Execution Error | 500 | MCP tool call failed |
+
+### Recovery
+
+- **Transient Failures**: Tasks remain queued, workers retry automatically
+- **Persistent Failures**: Status marked `failed`, error details in response
+- **Webhook Failures**: Logged but don't block task completion
+
+## Testing
+
+### jan-cli api-test Collection
+
+Run the Postman collection for background mode:
+
+```bash
+jan-cli api-test run tests/postman/responses-background-webhook.json \
+  --environment tests/postman/environments/local.json \
+  --delay-request 1000 \
+  --timeout-request 60000
+```
+
+### Manual Testing
+
+1. **Create Background Task**:
+   ```bash
+   curl -X POST http://localhost:8082/responses \
+     -H "Content-Type: application/json" \
+     -d '{
+       "model": "gpt-4",
+       "input": "Count to 10 slowly",
+       "background": true,
+       "store": true,
+       "metadata": {"webhook_url": "https://webhook.site/unique-id"}
+     }'
+   ```
+
+2. **Poll Status**:
+   ```bash
+   curl http://localhost:8082/responses/resp_abc123
+   ```
+
+3. **Cancel Task**:
+   ```bash
+   curl -X POST http://localhost:8082/responses/resp_abc123/cancel
+   ```
+
+## Migration Guide
+
+### Upgrading Existing Systems
+
+1. **Database Migration**: Run migrations to add new columns
+2. **Configuration**: Set environment variables for workers
+3. **Deployment**: Rolling update (workers start automatically)
+4. **Verification**: Check worker logs for startup
+
+### Backward Compatibility
+
+- Synchronous mode (`background=false`) unchanged
+- Existing endpoints and behavior preserved
+- Optional feature, no breaking changes
+
+## Troubleshooting
+
+### Queue Stuck
+
+**Symptom**: Tasks remain in `queued` status
+
+**Check**:
+1. Are workers running? Check logs for "worker started"
+2. Database connection healthy?
+3. Any database locks? Check `pg_locks`
+
+**Fix**:
+```bash
+# Restart workers
+docker restart response-api
+```
+
+### Webhooks Not Delivered
+
+**Symptom**: No webhook received despite completed task
+
+**Check**:
+1. Is `webhook_url` in metadata?
+2. Is webhook endpoint reachable?
+3. Check logs for "webhook notification failed"
+
+**Fix**:
+- Verify webhook URL is correct and accessible
+- Check firewall/network rules
+- Webhook failures don't affect task completion, manual retry needed
+
+### High Queue Depth
+
+**Symptom**: Growing number of queued tasks
+
+**Check**:
+1. Worker utilization (should be near `BACKGROUND_WORKER_COUNT`)
+2. Average processing time increasing?
+3. LLM provider throttling?
+
+**Fix**:
+```bash
+# Increase workers
+export BACKGROUND_WORKER_COUNT=8
+docker restart response-api
+```
+
+## Future Enhancements
+
+- [ ] Redis-backed queue for higher throughput
+- [ ] Priority queuing (high/low priority tasks)
+- [ ] Dead letter queue for failed tasks
+- [ ] Webhook retry with exponential backoff
+- [ ] Prometheus metrics export
+- [ ] Grafana dashboard templates
diff --git a/docs/guides/deployment.md b/docs/guides/deployment.md
new file mode 100644
index 00000000..82c4b6dd
--- /dev/null
+++ b/docs/guides/deployment.md
@@ -0,0 +1,434 @@
+# Deployment Guide
+
+Comprehensive guide for deploying Jan Server to various environments.
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Prerequisites](#prerequisites)
+- [Deployment Options](#deployment-options)
+  - [Kubernetes (Recommended)](#kubernetes-recommended)
+  - [Docker Compose](#docker compose)
+  - [Hybrid Mode](#hybrid-mode)
+- [Environment Configuration](#environment-configuration)
+- [Security Considerations](#security-considerations)
+- [Monitoring and Observability](#monitoring-and-observability)
+
+## Overview
+
+Jan Server supports multiple deployment strategies to accommodate different use cases:
+
+| Environment | Use Case | Orchestrator | Recommended For |
+|-------------|----------|--------------|-----------------|
+| **Kubernetes** | Production, Staging | Kubernetes/Helm | Scalable production deployments |
+| **Docker Compose** | Development, Testing | Docker Compose | Local development and testing |
+| **Hybrid Mode** | Development | Native + Docker | Fast iteration and debugging |
+
+## Prerequisites
+
+### All Deployments
+
+- Docker 24+ and Docker Compose V2
+- PostgreSQL 18+ (managed or in-cluster)
+- Redis 7+ (managed or in-cluster)
+- S3-compatible storage (for media-api)
+
+### Kubernetes Deployments
+
+- Kubernetes 1.27+
+- Helm 3.12+
+- kubectl configured
+- Sufficient cluster resources (see [Resource Requirements](#resource-requirements))
+
+## Deployment Options
+
+### Kubernetes (Recommended)
+
+Kubernetes deployment uses Helm charts for full orchestration and scalability.
+
+#### 1. Development (Minikube)
+
+For local development and testing:
+
+```bash
+# Prerequisites
+minikube start --cpus=4 --memory=8192 --driver=docker
+
+# Build and load images
+cd services/llm-api && go mod tidy && cd ../..
+cd services/media-api && go mod tidy && cd ../..
+cd services/mcp-tools && go mod tidy && cd ../..
+
+docker build -t jan/llm-api:latest -f services/llm-api/Dockerfile .
+docker build -t jan/media-api:latest -f services/media-api/Dockerfile .
+docker build -t jan/mcp-tools:latest -f services/mcp-tools/Dockerfile .
+
+# Load images into minikube
+minikube image load jan/llm-api:latest jan/media-api:latest jan/mcp-tools:latest
+minikube image load quay.io/keycloak/keycloak:24.0.5
+minikube image load bitnami/postgresql:latest bitnami/redis:latest
+
+# Deploy
+cd k8s
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace
+
+# Create databases
+kubectl exec -n jan-server jan-server-postgresql-0 -- bash -c "PGPASSWORD=postgres psql -U postgres << 'EOF'
+CREATE USER media WITH PASSWORD 'media';
+CREATE DATABASE media_api OWNER media;
+CREATE USER keycloak WITH PASSWORD 'keycloak';
+CREATE DATABASE keycloak OWNER keycloak;
+EOF"
+
+# Verify deployment
+kubectl get pods -n jan-server
+
+# Access services
+kubectl port-forward -n jan-server svc/jan-server-llm-api 8080:8080
+curl http://localhost:8080/healthz
+```
+
+**Complete guide:** See [k8s/SETUP.md](../../k8s/SETUP.md)
+
+#### 2. Cloud Kubernetes (AKS/EKS/GKE)
+
+For production cloud deployments:
+
+```bash
+# Option A: With cloud-managed databases (recommended)
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --set postgresql.enabled=false \
+  --set redis.enabled=false \
+  --set global.postgresql.host=your-managed-postgres.cloud \
+  --set global.redis.host=your-managed-redis.cloud \
+  --set ingress.enabled=true \
+  --set ingress.className=nginx \
+  --set ingress.hosts[0].host=jan.yourdomain.com \
+  --set llmApi.autoscaling.enabled=true \
+  --set llmApi.replicaCount=3 \
+  --set llmApi.image.pullPolicy=Always \
+  --set mediaApi.image.pullPolicy=Always \
+  --set mcpTools.image.pullPolicy=Always
+
+# Option B: With in-cluster databases
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --set postgresql.persistence.enabled=true \
+  --set postgresql.persistence.size=50Gi \
+  --set postgresql.persistence.storageClass=gp3 \
+  --set redis.master.persistence.enabled=true \
+  --set ingress.enabled=true \
+  --set llmApi.autoscaling.enabled=true
+```
+
+**Configuration guide:** See [k8s/README.md](../../k8s/README.md)
+
+#### 3. On-Premises Kubernetes
+
+For on-premises production:
+
+```bash
+# Use production values with external databases
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --values ./jan-server/values-production.yaml \
+  --set postgresql.enabled=false \
+  --set redis.enabled=false \
+  --set global.postgresql.host=postgres.internal \
+  --set global.redis.host=redis.internal
+```
+
+### Docker Compose
+
+For local development and integration testing.
+
+#### Development Mode
+
+```bash
+# Start infrastructure only (PostgreSQL, Keycloak, Kong)
+make up-infra
+
+# With API services (llm-api, media-api, response-api)
+make up-api
+
+# With MCP services (mcp-tools, vector-store)
+make up-mcp
+
+# Full stack with Kong + APIs + MCP
+make up-full
+
+# With GPU inference (local vLLM)
+make up-vllm-gpu
+```
+
+**Complete guide:** See [Development Guide](development.md)
+
+#### Testing Environment
+
+```bash
+cp .env.template .env                # ensure a clean env file
+# Edit .env and set: COMPOSE_PROFILES=infra,api,mcp
+
+make up-full                         # start stack under test
+make test-all                        # run jan-cli api-test suites
+```
+
+### Hybrid Mode
+
+For fast iteration during development:
+
+```bash
+make dev-full                 # start stack with host routing
+
+# Replace a service with a host-native process
+./jan-cli.sh dev run llm-api  # macOS/Linux
+.\jan-cli.ps1 dev run llm-api # Windows PowerShell
+
+# Stop dev-full when done
+make dev-full-stop            # keep containers
+make dev-full-down            # remove containers
+```
+
+**Complete guide:** See [Hybrid Mode Guide](hybrid-mode.md)
+
+## Environment Configuration
+
+### Required Environment Variables
+
+#### LLM API
+
+```bash
+# Database
+DATABASE_URL=postgres://jan_user:jan_password@localhost:5432/jan_llm_api?sslmode=disable
+
+# Keycloak/Auth
+KEYCLOAK_BASE_URL=http://localhost:8085
+BACKEND_CLIENT_ID=llm-api
+BACKEND_CLIENT_SECRET=your-secret
+CLIENT=jan-client
+
+# Optional
+JAN_DEFAULT_NODE_SETUP=false  # Disable if no Jan provider
+HTTP_PORT=8080
+LOG_LEVEL=debug
+```
+
+#### Media API
+
+```bash
+# Database
+DB_POSTGRESQL_WRITE_DSN=postgres://media:media@localhost:5432/media_api?sslmode=disable
+
+# S3 Storage (Required - AWS Standard Naming)
+MEDIA_S3_ENDPOINT=https://s3.amazonaws.com
+MEDIA_S3_REGION=us-east-1
+MEDIA_S3_BUCKET=your-bucket
+MEDIA_S3_ACCESS_KEY_ID=your-access-key-id
+MEDIA_S3_SECRET_ACCESS_KEY=your-secret-access-key
+MEDIA_S3_USE_PATH_STYLE=false
+
+# Server
+MEDIA_API_PORT=8285
+LOG_LEVEL=info
+```
+
+#### MCP Tools
+
+```bash
+# Server
+HTTP_PORT=8091
+LOG_LEVEL=info
+
+# Optional providers
+EXA_API_KEY=your-exa-key
+BRAVE_API_KEY=your-brave-key
+```
+
+### Configuration Files
+
+Environment-specific configuration files in `config/`:
+
+- `defaults.env` - Default values for all environments
+- `development.env` - Local development settings
+- `testing.env` - Test environment settings
+- `production.env.example` - Production template (copy and customize)
+- `secrets.env.example` - Secrets template (never commit actual secrets)
+
+## Security Considerations
+
+### Production Checklist
+
+- [ ] **Secrets Management**
+  - Use external secrets operator (e.g., AWS Secrets Manager, Azure Key Vault)
+  - Never commit secrets to version control
+  - Rotate credentials regularly
+
+- [ ] **Network Security**
+  - Enable network policies to restrict pod-to-pod communication
+  - Use TLS for all external endpoints
+  - Configure ingress with proper SSL certificates
+
+- [ ] **Authentication**
+  - Change default Keycloak admin password
+  - Configure proper realm settings
+  - Enable token exchange for client-to-client auth
+
+- [ ] **Database Security**
+  - Use managed database services when possible
+  - Enable SSL/TLS connections
+  - Implement backup and disaster recovery
+
+- [ ] **Pod Security**
+  - Apply pod security standards (restricted profile)
+  - Use non-root containers
+  - Enable security context constraints
+
+### Example: External Secrets
+
+```bash
+# Install external-secrets operator
+helm repo add external-secrets https://charts.external-secrets.io
+helm install external-secrets external-secrets/external-secrets \
+  --namespace external-secrets-system \
+  --create-namespace
+
+# Create SecretStore for AWS Secrets Manager
+kubectl apply -f - <<EOF
+apiVersion: external-secrets.io/v1beta1
+kind: SecretStore
+metadata:
+  name: aws-secretsmanager
+  namespace: jan-server
+spec:
+  provider:
+    aws:
+      service: SecretsManager
+      region: us-west-2
+EOF
+```
+
+## Resource Requirements
+
+### Minimum (Development)
+
+| Component | CPU | Memory |
+|-----------|-----|--------|
+| LLM API | 250m | 256Mi |
+| Media API | 250m | 256Mi |
+| MCP Tools | 250m | 256Mi |
+| PostgreSQL | 250m | 256Mi |
+| Redis | 100m | 128Mi |
+| Keycloak | 500m | 512Mi |
+| **Total** | **~1.5 CPU** | **~2Gi** |
+
+### Recommended (Production)
+
+| Component | CPU | Memory | Replicas |
+|-----------|-----|--------|----------|
+| LLM API | 1000m | 1Gi | 3 |
+| Media API | 500m | 512Mi | 2 |
+| MCP Tools | 500m | 512Mi | 2 |
+| PostgreSQL | 2000m | 4Gi | 1 (or managed) |
+| Redis | 500m | 1Gi | 3 (cluster) |
+| Keycloak | 1000m | 1Gi | 2 |
+
+### Storage Requirements
+
+- PostgreSQL: 50Gi minimum (100Gi+ for production)
+- Redis: 10Gi for persistence
+- PVCs for media uploads (if not using S3)
+
+## Monitoring and Observability
+
+### Enable Monitoring Stack
+
+```bash
+# Start monitoring services
+docker compose --profile monitoring up -d
+
+# Access dashboards
+# Prometheus: http://localhost:9090
+# Grafana: http://localhost:3331
+# Jaeger: http://localhost:16686
+```
+
+### Key Metrics to Monitor
+
+- **Service Health**: Endpoint availability, response times
+- **Database**: Connection pool usage, query performance
+- **Resource Usage**: CPU, memory, disk I/O
+- **Request Rates**: Throughput, error rates
+- **Authentication**: Token issuance, validation failures
+
+**Complete guide:** See [Monitoring Guide](monitoring.md)
+
+## Troubleshooting
+
+### Common Issues
+
+#### Pods Not Starting
+
+```bash
+# Check pod status
+kubectl get pods -n jan-server
+
+# View pod logs
+kubectl logs -n jan-server <pod-name>
+
+# Describe pod for events
+kubectl describe pod -n jan-server <pod-name>
+```
+
+#### Database Connection Failures
+
+```bash
+# Verify PostgreSQL is running
+kubectl exec -n jan-server jan-server-postgresql-0 -- psql -U postgres -c '\l'
+
+# Check database exists
+kubectl exec -n jan-server jan-server-postgresql-0 -- psql -U postgres -c '\l' | grep media_api
+
+# Test connection from service pod
+kubectl exec -n jan-server <service-pod> -- nc -zv jan-server-postgresql 5432
+```
+
+#### Image Pull Failures
+
+For minikube:
+```bash
+# Verify images are loaded
+minikube image ls | grep jan/
+
+# Reload if missing
+minikube image load jan/llm-api:latest
+```
+
+For production:
+```bash
+# Check image pull policy
+kubectl get deployment -n jan-server jan-server-llm-api -o yaml | grep pullPolicy
+
+# Should be "Always" or "IfNotPresent" for registry images
+```
+
+## Related Documentation
+
+- [Kubernetes Setup Guide](../../k8s/SETUP.md) - Complete k8s deployment steps
+- [Kubernetes Configuration](../../k8s/README.md) - Helm chart configuration reference
+- [Development Guide](development.md) - Local development setup
+- [Hybrid Mode](hybrid-mode.md) - Native service execution
+- [Monitoring Guide](monitoring.md) - Observability setup
+- [Architecture Overview](../architecture/README.md) - System architecture
+
+## Support
+
+For additional help:
+- Review [Getting Started](../getting-started/README.md)
+- Check [Troubleshooting Guide](troubleshooting.md)
+- See [Architecture Documentation](../architecture/README.md)
diff --git a/docs/guides/dev-full-mode.md b/docs/guides/dev-full-mode.md
new file mode 100644
index 00000000..ddc15ce2
--- /dev/null
+++ b/docs/guides/dev-full-mode.md
@@ -0,0 +1,84 @@
+# Dev-Full Mode
+
+Dev-Full mode is the officially supported hybrid workflow for Jan Server. It starts every dependency in Docker, configures Kong with host.docker.internal upstreams, and lets you replace any service with a native process via `jan-cli dev run <service>`.
+
+## Overview
+
+- All services start in Docker first (`make dev-full`)
+- Kong upstreams include both Docker targets and host targets
+- Stop any container and run the same service locally
+- Kong automatically routes requests to the host while `/healthz` stays healthy
+- Works on Windows, macOS, and Linux because the Makefile wraps Docker Compose
+
+## Quick Start
+
+```bash
+make setup         # once per machine
+make dev-full      # start infra + APIs + MCP with host routing
+```
+
+After the stack is up you can replace a service:
+
+```bash
+./jan-cli.sh dev run llm-api   # macOS/Linux
+.\jan-cli.ps1 dev run llm-api # Windows PowerShell
+```
+
+`jan-cli dev run` stops the matching container, loads environment variables from `.env` (override with `--env`), and runs `go run ./cmd/server` inside `services/<name>`.
+
+## Running Services Natively
+
+| Service | Port | Command |
+|---------|------|---------|
+| LLM API | 8080 | `jan-cli dev run llm-api` |
+| Media API | 8285 | `jan-cli dev run media-api` |
+| Response API | 8082 | `jan-cli dev run response-api` |
+| MCP Tools | 8091 | `jan-cli dev run mcp-tools` |
+
+Notes:
+- Use `--build` if you prefer to compile before running (`jan-cli dev run llm-api --build`)
+- Pass `--env config/hybrid.env` if you keep a dedicated env file for host processes
+- To hand control back to Docker, stop the host process and run `docker compose start <service>`
+
+## What make dev-full Does
+
+- Loads `.env` and copies it to `docker/.env` via `ensure-docker-env`
+- Runs `docker compose -f docker compose.yml -f docker compose.dev-full.yml --profile full up -d`
+- Prints URLs for PostgreSQL, Keycloak, Kong, and every API/MCP service
+- Keeps the `jan-network`/`jan-monitoring` networks around for fast restarts
+
+Inspect `docker compose.dev-full.yml` for the `extra_hosts: - "host.docker.internal:host-gateway"` entries and `kong/kong-dev-full.yml` for the dual-target upstream configuration.
+
+## IDE Integration
+
+- VS Code launch configurations can depend on a task that runs `make dev-full`
+- After the task finishes, `jan-cli dev run <service>` is a good preLaunchCommand
+- Debuggers simply connect to the local port (Kong still listens on 8000)
+- Hot reload tools (`air`, `reflex`, etc.) live inside `services/<name>`
+
+## Monitoring and Tooling
+
+You can bring up observability while using dev-full:
+
+```bash
+make monitor-up    # Prometheus + Grafana + Jaeger
+make monitor-logs  # follow collector/datasource logs
+```
+
+Those containers watch the same `jan-network`, so traces and metrics include both Docker and host services (as long as you set `OTEL_ENABLED=true`).
+
+## Cleanup
+
+```bash
+make dev-full-stop   # stop containers but keep them around
+make dev-full-down   # stop + remove containers
+make down            # if you want to switch back to standard docker workflow
+```
+
+If you need a pristine state, run `make down-clean` to remove volumes and networks, then start dev-full again.
+
+## See Also
+
+- [Hybrid Mode](hybrid-mode.md) - deep dive on routing, env files, and troubleshooting
+- [Development Guide](development.md) - complete local workflow overview
+- [Monitoring](monitoring.md) - optional observability stack
diff --git a/docs/guides/development.md b/docs/guides/development.md
new file mode 100644
index 00000000..79801c86
--- /dev/null
+++ b/docs/guides/development.md
@@ -0,0 +1,182 @@
+# Development Guide
+
+How to set up, run, and iterate on Jan Server locally. All commands below are available in the repository today (Makefile + jan-cli), so you can copy/paste them as-is.
+
+## Table of Contents
+
+1. [Prerequisites](#prerequisites)
+2. [Quick Start](#quick-start)
+3. [Access Points](#access-points)
+4. [Project Layout](#project-layout)
+5. [Development Workflows](#development-workflows)
+6. [Configuration](#configuration)
+7. [Database & Tooling](#database--tooling)
+8. [Testing](#testing)
+9. [Troubleshooting & Next Steps](#troubleshooting--next-steps)
+
+## Prerequisites
+
+Install these before running any commands:
+
+- **Docker Desktop 24+** with Docker Compose V2
+- **GNU Make** (built in on macOS/Linux, install via Chocolatey/Brew on Windows)
+- **Go 1.21+**  only required when editing Go code or using `jan-cli`
+
+> Tip: `make setup` uses `jan-cli dev setup` to verify Docker, copy `.env.template` to `.env`, and create `docker/.env` automatically.
+
+## Quick Start
+
+```bash
+# 1. Clone and enter the repo
+git clone https://github.com/janhq/jan-server.git
+cd jan-server
+
+# 2. Create .env, docker/.env, and Docker networks
+make setup
+
+# 3. Start the full stack (infra + APIs + MCP + optional vLLM)
+make up-full
+
+# 4. Verify everything is healthy
+make health-check
+
+# 5. Tail logs or iterate
+make logs          # all containers
+docker compose ps  # status
+```
+
+- **Stop containers**: `make down`
+- **Remove volumes**: `make down-clean`
+- **Restart a single service**: `make restart-api`, `make restart-kong`, `make restart-keycloak`
+
+## Access Points
+
+| Service | URL | Notes |
+|---------|-----|-------|
+| Kong Gateway | http://localhost:8000 | Single entry point for all APIs |
+| LLM API | http://localhost:8080 | OpenAI-compatible API, `/healthz` for checks |
+| Response API | http://localhost:8082 | Multi-step orchestration |
+| Media API | http://localhost:8285 | File upload/management service |
+| MCP Tools | http://localhost:8091 | Native MCP tool bridge |
+| Keycloak | http://localhost:8085 | Admin/Admin in development |
+| PostgreSQL | localhost:5432 | Database user `jan_user` / password from `.env` |
+| Grafana | http://localhost:3331 | Start with `make monitor-up` |
+| Prometheus | http://localhost:9090 | Monitoring profile |
+| Jaeger | http://localhost:16686 | Tracing profile |
+
+## Project Layout
+
+```
+jan-server/
++-- services/              # llm-api, media-api, response-api, mcp-tools, template-api
++-- cmd/jan-cli/           # jan-cli sources (`./jan-cli.sh`, `.\\jan-cli.ps1` wrappers)
++-- pkg/config/            # Single source of truth for config defaults and schema
++-- docker/                # Compose fragments (infrastructure, services, dev-full, observability)
++-- docker compose.yml     # Root compose file (includes docker/*.yml via profiles)
++-- docker compose.dev-full.yml # Extra compose overrides for dev-full
++-- kong/                  # Gateway configuration (kong.yml + kong-dev-full.yml)
++-- docs/                  # Documentation (guides, architecture, configuration)
++-- Makefile               # Canonical automation entry point
++-- .env.template          # Copy to .env and edit per environment
+```
+
+## Development Workflows
+
+### Full Docker Stack (default)
+
+```bash
+make up-full        # start everything defined by COMPOSE_PROFILES in .env
+make logs           # follow logs for every service
+make logs-api       # only API services
+make logs-mcp       # MCP stack
+make down           # stop and remove containers
+```
+
+Use this mode for integration testing and parity with CI. `COMPOSE_PROFILES` controls which Compose profiles (`infra,api,mcp,full`) loadedit `.env` if you want to disable GPU/vLLM locally.
+
+### Dev-Full Mode (hybrid debugging)
+
+`dev-full` keeps every dependency in Docker but allows you to stop any container and run the same service on your host.
+
+```bash
+make dev-full                 # start stack with host.docker.internal upstreams
+# stop the Docker container you want to replace
+docker compose stop llm-api
+# run the service from source (wrapper stops the container automatically on start)
+./jan-cli.sh dev run llm-api  # Linux/macOS
+.\jan-cli.ps1 dev run llm-api # Windows PowerShell
+```
+
+- Repeat the same flow for `media-api`, `response-api`, or `mcp-tools`
+- Kong automatically routes requests to `host.docker.internal:<port>` while the host process is healthy
+- Exit with `Ctrl+C`, then `docker compose start llm-api` to hand control back to Docker
+- Use `make dev-full-stop` to stop containers without removing them; `make dev-full-down` removes them
+
+See [Dev-Full Mode](dev-full-mode.md) for deeper explanations and IDE integration tips.
+
+### Running Services Directly (without dev-full)
+
+You can run a service completely outside Docker by providing the same environment variables from `.env`:
+
+```bash
+# Example for llm-api
+docker compose up -d api-db keycloak kong   # ensure infra is running
+cd services/llm-api
+export DB_DSN="postgres://jan_user:${POSTGRES_PASSWORD}@localhost:5432/jan_llm_api?sslmode=disable"
+export KEYCLOAK_BASE_URL="http://localhost:8085"
+export JWKS_URL="http://localhost:8085/realms/jan/protocol/openid-connect/certs"
+export ISSUER="http://localhost:8085/realms/jan"
+export HTTP_PORT=8080
+export LOG_LEVEL=debug
+
+go run ./cmd/server
+```
+
+> Windows users can run `.\jan-cli.ps1 dev run llm-api --env .env` to load variables automatically.
+
+## Configuration
+
+- Copy `.env.template` to `.env` (or run `make setup`) and edit secrets like `HF_TOKEN`, `SERPER_API_KEY`, and `POSTGRES_PASSWORD`
+- `make setup` also writes `docker/.env`, so Compose and jan-cli use the same values
+- `pkg/config/defaults.yaml` is the canonical configuration, generated from Go structs in `pkg/config/types.go`
+- Helpful jan-cli commands:
+
+```bash
+jan-cli config validate --file config/defaults.yaml
+jan-cli config show --path services.llm-api
+jan-cli config export --format env --output config/generated.env
+```
+
+## Database & Tooling
+
+```bash
+make db-migrate    # Apply Go migrations for llm-api
+make db-reset      # Drop + recreate tables (uses docker compose)
+make db-console    # Opens psql inside the api-db container
+
+# Direct Docker examples
+docker compose logs api-db
+psql "postgres://jan_user:${POSTGRES_PASSWORD}@localhost:5432/jan_llm_api?sslmode=disable"
+```
+
+For backups and restores use `make db-backup` / `make db-restore`. The Makefile targets wrap `docker compose` so they work on Windows, macOS, and Linux.
+
+## Testing
+
+- **Full integration suite**: `make test-all` (runs every Postman collection listed in the Makefile)
+- **Focused suites**: `make test-auth`, `make test-conversations`, `make test-response`, `make test-media`, `make test-mcp-integration`, `make test-e2e`
+- **Unit tests**: run them from each service directory (`go test ./...`)
+
+See [Testing Guide](testing.md) for platform details, CI coverage, and troubleshooting tips.
+
+## Troubleshooting & Next Steps
+
+1. `make health-check`  verifies infrastructure, API, MCP, and optional services
+2. `make logs` or `docker compose logs <service>`  inspect failures quickly
+3. `make restart-kong` / `make restart-keycloak`  common fixes for gateway/auth issues
+4. `make monitor-up`  bring up Grafana/Prometheus/Jaeger if you need observability while debugging
+
+Need more help? Review [Hybrid Mode](hybrid-mode.md), [Dev-Full Mode](dev-full-mode.md), [Testing](testing.md), [Troubleshooting](troubleshooting.md), and the configuration docs under `docs/configuration/`.
+
+
+
diff --git a/docs/guides/documentation-cleanup.md b/docs/guides/documentation-cleanup.md
new file mode 100644
index 00000000..695a1d34
--- /dev/null
+++ b/docs/guides/documentation-cleanup.md
@@ -0,0 +1,247 @@
+# Documentation Cleanup Summary
+
+**Date:** January 2025 
+**Status:** OK Complete
+
+---
+
+## Overview
+
+Consolidated and cleaned up Jan Server documentation to eliminate redundancy and improve navigation. The goal was to have "only one document in /docs and their README in dir only" as requested.
+
+---
+
+## Changes Made
+
+### 1. Created New Consolidated Documentation
+
+#### `docs/TESTING.md` (NEW)
+Consolidated three separate testing documents into one comprehensive guide:
+- **Merged from:**
+ - `docs/CROSS_PLATFORM_TESTING.md` (624 lines)
+ - `docs/UNIX_TESTING.md` (384 lines)
+ - `docs/PLATFORM_COMPATIBILITY.md` (242 lines)
+- **Result:** Single 500+ line comprehensive testing guide
+- **Contents:**
+ - Cross-platform testing (Windows, Linux, macOS)
+ - CI/CD testing with GitHub Actions
+ - Local testing scripts and procedures
+ - Docker integration testing
+ - Platform-specific fixes and troubleshooting
+ - Best practices
+
+#### `docs/JAN-CLI.md` (NEW)
+Moved jan-cli documentation to main docs directory:
+- **Moved from:** `docs/configuration/jan-cli.md` (934 lines)
+- **Result:** Comprehensive jan-cli guide in main docs
+- **Contents:**
+ - Installation and setup
+ - Command reference
+ - Configuration management
+ - Service operations
+ - Development tools
+ - Shell completion
+ - Troubleshooting
+ - Technical details
+
+### 2. Updated Existing Documentation
+
+#### `docs/configuration/README.md`
+Updated to reference main documentation:
+- Changed CLI section to reference `../JAN-CLI.md`
+- Added "Documentation Structure" section clearly showing:
+ - Implementation details stay in configuration/
+ - User-facing docs link to main docs/
+- Added references to `TESTING.md` and `JAN-CLI.md`
+
+#### `docs/index.md`
+Updated navigation to reflect new structure:
+- Added `JAN-CLI.md` to "For Developers" section
+- Updated `TESTING.md` references (replaced `guides/testing.md`)
+- Added "use the CLI tool" task with link to `JAN-CLI.md`
+- Updated file listing section
+
+### 3. Removed Redundant Files
+
+Deleted the following files (content consolidated elsewhere):
+- [X] `docs/CROSS_PLATFORM_TESTING.md` -> Merged into `TESTING.md`
+- [X] `docs/UNIX_TESTING.md` -> Merged into `TESTING.md`
+- [X] `docs/PLATFORM_COMPATIBILITY.md` -> Merged into `TESTING.md`
+- [X] `docs/configuration/jan-cli.md` -> Moved to `JAN-CLI.md`
+
+---
+
+## Final Documentation Structure
+
+### Main Documentation (`docs/`)
+
+```
+docs/
++-- index.md # Navigation hub
++-- JAN-CLI.md # Jan CLI tool guide (NEW)
++-- TESTING.md # Cross-platform testing (NEW)
++-- quickstart.md # Quick start guide
++-- README.md # Documentation overview
++-- services.md # Service overview
++-- api/ # API documentation
+| +-- README.md
++-- architecture/ # Architecture docs
+| +-- README.md
++-- configuration/ # Config system details
+| +-- README.md # Links to JAN-CLI.md
++-- conventions/ # Code conventions
+| +-- README.md
++-- getting-started/ # Getting started guide
+| +-- README.md
++-- guides/ # Various guides
+ +-- development.md
+ +-- deployment.md
+ +-- monitoring.md
+ +--...
+```
+
+### Configuration Directory (`docs/configuration/`)
+
+```
+docs/configuration/
++-- README.md # Overview + links to main docs
++-- precedence.md # Config precedence rules
++-- env-var-mapping.md # Environment variable mapping
++-- docker-compose-generation.md # Docker Compose integration
++-- k8s-values-generation.md # Kubernetes values generation
++-- service-migration-strategy.md # Service migration guide
+```
+
+**Key Principle:** `configuration/README.md` serves as directory overview and references main user-facing documentation.
+
+---
+
+## Benefits
+
+### Before Cleanup
+
+**Problems:**
+- 4 separate testing documents (CROSS_PLATFORM_TESTING.md, UNIX_TESTING.md, PLATFORM_COMPATIBILITY.md, guides/testing.md)
+- jan-cli documentation buried in configuration subdirectory
+- Unclear where to find testing or CLI information
+- Duplicate content across multiple files
+- Scattered cross-platform compatibility information
+
+### After Cleanup
+
+**Improvements:**
+OK **Single source of truth:** One TESTING.md for all testing, one JAN-CLI.md for CLI 
+OK **Better navigation:** Clear links from index.md to main guides 
+OK **Logical structure:** User-facing docs in main docs/, implementation details in subdirectories 
+OK **Easier maintenance:** Update one file instead of multiple 
+OK **Clear hierarchy:** Main docs -> subdirectory READMEs -> detailed docs 
+OK **No duplication:** Eliminated redundant content 
+
+---
+
+## Documentation Statistics
+
+### Files Removed
+- 4 files deleted (1,250+ lines consolidated)
+
+### Files Created
+- 2 new main documentation files (900+ lines)
+
+### Files Updated
+- 2 files updated (index.md, configuration/README.md)
+
+### Net Result
+- **Cleaner structure:** 4 fewer files to maintain
+- **Better organization:** Main docs in root, details in subdirectories
+- **Improved navigation:** Clear paths to all documentation
+- **No content loss:** All information preserved and consolidated
+
+---
+
+## Navigation Paths
+
+### For Testing Information
+
+**Old paths:**
+- `docs/CROSS_PLATFORM_TESTING.md`
+- `docs/UNIX_TESTING.md`
+- `docs/PLATFORM_COMPATIBILITY.md`
+- `docs/guides/testing.md`
+
+**New path:**
+- OK `docs/TESTING.md` (single comprehensive guide)
+
+### For jan-cli Information
+
+**Old paths:**
+- `docs/configuration/jan-cli.md` (hard to find)
+- `cmd/jan-cli/README.md` (technical, not user guide)
+
+**New path:**
+- OK `docs/JAN-CLI.md` (prominent location in main docs)
+
+### For Configuration Information
+
+**Old:**
+- Mixed user guide and implementation details in configuration/
+
+**New:**
+- OK User guide: `docs/JAN-CLI.md`
+- OK Implementation details: `docs/configuration/README.md` + subdocs
+- OK Clear separation of concerns
+
+---
+
+## Verification
+
+### Check Structure
+```powershell
+# Main docs
+ls docs/*.md
+# Should show: index.md, JAN-CLI.md, TESTING.md, README.md, etc.
+
+# Configuration directory
+ls docs/configuration/*.md
+# Should NOT have jan-cli.md anymore
+
+# Removed files should not exist
+Test-Path docs/CROSS_PLATFORM_TESTING.md # False
+Test-Path docs/UNIX_TESTING.md # False
+Test-Path docs/PLATFORM_COMPATIBILITY.md # False
+Test-Path docs/configuration/jan-cli.md # False
+```
+
+### Check Links
+All references updated in:
+- OK `docs/index.md` -> Points to TESTING.md and JAN-CLI.md
+- OK `docs/configuration/README.md` -> References../JAN-CLI.md
+- OK Internal cross-references updated
+
+---
+
+## Next Steps
+
+### For Users
+1. Use `docs/index.md` as navigation hub
+2. Find testing info in `docs/TESTING.md`
+3. Find jan-cli info in `docs/JAN-CLI.md`
+4. Browse subdirectory READMEs for specialized topics
+
+### For Maintainers
+1. Update `docs/TESTING.md` for testing changes (not multiple files)
+2. Update `docs/JAN-CLI.md` for CLI changes (not configuration/jan-cli.md)
+3. Keep subdirectory READMEs focused on implementation details
+4. Maintain clear separation: user docs in main, technical docs in subdirectories
+
+---
+
+## Summary
+
+OK **Goal achieved:** "only one document in /docs and their READMe in dir only"
+- OK Single TESTING.md (not 3+ separate testing docs)
+- OK Single JAN-CLI.md in main docs (not buried in subdirectory)
+- OK Configuration README links to main docs (clear hierarchy)
+- OK No duplicate content
+- OK Better navigation and maintainability
+
+**Result:** Cleaner, more maintainable documentation structure with clear paths to all information.
diff --git a/docs/guides/hybrid-mode.md b/docs/guides/hybrid-mode.md
new file mode 100644
index 00000000..ed83eab7
--- /dev/null
+++ b/docs/guides/hybrid-mode.md
@@ -0,0 +1,107 @@
+# Hybrid Development Mode Guide
+
+Hybrid mode keeps infrastructure in Docker while letting you run services directly from source. The supported flow is **`make dev-full` + `jan-cli dev run <service>`**, which mirrors production networking while giving you native tooling (Delve, VS Code, hot reload, etc.).
+
+## Why Use Hybrid Mode?
+
+- Fast iteration without rebuilding Docker images
+- Debug with breakpoints while Kong continues to enforce plugins and auth
+- Keep PostgreSQL, Keycloak, Kong, and monitoring inside Docker for parity
+- Instant rollback: stop your host process and Docker takes over again
+
+## Prerequisites
+
+1. Run `make setup` at least once (creates `.env` and `docker/.env`)
+2. Docker Desktop with Compose V2
+3. Go toolchain (1.21+) for the services you want to run
+4. `jan-cli` wrapper (`./jan-cli.sh` on macOS/Linux, `.\jan-cli.ps1` on Windows)
+
+## Workflow Overview
+
+1. **Start dev-full mode**
+   ```bash
+   make dev-full
+   ```
+   This bootstraps the regular stack plus the overrides in `docker-compose.dev-full.yml` and `kong/kong-dev-full.yml`. Kong exposes both Docker targets and `host.docker.internal:<port>` for every service.
+
+2. **Run a service on your host**
+   ```bash
+   ./jan-cli.sh dev run llm-api        # macOS/Linux
+   .\jan-cli.ps1 dev run llm-api      # Windows PowerShell
+   ```
+   `jan-cli dev run` stops the Docker container for the selected service, loads environment variables from `.env` (override with `--env path/to/file`), and runs `go run ./cmd/server` inside the service directory.
+
+3. **Iterate and debug**
+   - Launch Delve: `dlv debug ./cmd/server --headless --listen=:2345`
+   - Use `air` for hot reload inside `services/<name>`
+   - Observe requests through Kong at http://localhost:8000 exactly as clients would
+
+4. **Hand control back to Docker**
+   - Stop your host process (Ctrl+C)
+   - Restart the container if needed: `docker compose start llm-api`
+
+5. **Stop dev-full** when done:
+   ```bash
+   make dev-full-stop   # keep containers
+   make dev-full-down   # remove containers
+   ```
+
+## Service Reference
+
+| Service | Ports (host) | Run natively | Notes |
+|---------|--------------|--------------|-------|
+| llm-api | 8080 | `jan-cli dev run llm-api` | OpenAI-compatible API |
+| response-api | 8082 | `jan-cli dev run response-api` | Multi-step orchestration |
+| media-api | 8285 | `jan-cli dev run media-api` | Upload and media processing |
+| mcp-tools | 8091 | `jan-cli dev run mcp-tools` | MCP bridge and toolchain |
+
+`jan-cli dev run` accepts `--env` (defaults to `.env`) and `--build` if you prefer to build a binary before execution.
+
+## Environment Variables
+
+| Variable | Purpose |
+|----------|---------|
+| `DB_DSN` / `DATABASE_URL` | PostgreSQL connection string. Use `localhost` when running on host |
+| `KEYCLOAK_BASE_URL` / `ISSUER` / `JWKS_URL` | Auth endpoints (use `http://localhost:8085`) |
+| `HTTP_PORT` | Local service port (8080, 8082, 8285, 8091, etc.) |
+| `LOG_LEVEL` / `LOG_FORMAT` | Logging controls |
+| `MCP_*` / `SEARXNG_URL` / `VECTOR_STORE_URL` | Tool integrations for mcp-tools |
+| `OTEL_*` | Telemetry export (set `OTEL_ENABLED=true` to emit traces)
+
+Need a dedicated hybrid env file? create `config/hybrid.env`, copy values from `.env`, then run `jan-cli dev run llm-api --env config/hybrid.env`.
+
+## How Kong Routes to Your Host
+
+`kong/kong-dev-full.yml` and `docker-compose.dev-full.yml` add `host.docker.internal` targets for every service. When Docker shuts down `llm-api`, Kong automatically fails over to the host target:
+
+```yaml
+upstreams:
+  - name: llm-api-upstream
+    targets:
+      - target: llm-api:8080
+      - target: host.docker.internal:8080
+    healthchecks:
+      active:
+        http_path: /healthz
+```
+
+As soon as your host process stops responding to `/healthz`, Kong routes traffic back to the Docker container.
+
+## Debugging Tips
+
+- Run `make health-check` after switching services to confirm Kong sees them as healthy
+- Use `make logs-api` / `make logs-mcp` to monitor containerized dependencies while your host service runs
+- Need database access? `make db-console` opens `psql` using the same credentials set in `.env`
+- Monitoring works the same�`make monitor-up` gives you Grafana/Prometheus/Jaeger pointed at both Docker and host processes
+
+## Troubleshooting
+
+| Symptom | Fix |
+|---------|-----|
+| Kong keeps hitting the Docker container | Ensure the host service listens on the same port and returns 200 on `/healthz` |
+| Service cannot reach PostgreSQL | Update `DB_DSN` to use `localhost` instead of `api-db` when running on host |
+| Environment variables missing | Pass `--env .env` (default) or a custom env file to `jan-cli dev run` |
+| Port already in use | Stop the other listener or change `HTTP_PORT` before running the host service |
+| Want to debug multiple services | Run `jan-cli dev run ...` in multiple terminals; each command stops its corresponding container first |
+
+Need deeper coverage? Pair this guide with [Dev-Full Mode](dev-full-mode.md) for diagrams/IDE integration and [Development Guide](development.md) for the broader workflow.
diff --git a/docs/guides/jan-cli.md b/docs/guides/jan-cli.md
new file mode 100644
index 00000000..b24c5dd1
--- /dev/null
+++ b/docs/guides/jan-cli.md
@@ -0,0 +1,795 @@
+# Jan CLI - Complete Guide
+
+**Last Updated**: January 2025 
+**Status**: Production Ready OK 
+**Version**: 1.0.0
+
+Complete documentation for the Jan CLI tool - installation, usage, commands, and technical details.
+
+---
+
+## Table of Contents
+
+1. [Overview](#overview)
+2. [Quick Start](#quick-start)
+3. [Installation](#installation)
+4. [Commands Reference](#commands-reference)
+5. [Configuration Management](#configuration-management)
+6. [Service Operations](#service-operations)
+7. [Development Tools](#development-tools)
+8. [Troubleshooting](#troubleshooting)
+9. [Shell Completion](#shell-completion)
+10. [Technical Details](#technical-details)
+
+---
+
+## Overview
+
+Jan CLI is the official command-line interface for Jan Server, providing unified access to:
+
+- **Configuration Management** - Validate, export, and inspect configuration
+- **Service Operations** - List services, view logs, check status
+- **Development Tools** - Setup environment, scaffold services
+- **Shell Completion** - Auto-completion for all major shells
+
+Built with [Cobra framework](https://github.com/spf13/cobra), the industry standard used by kubectl, docker, and github CLI.
+
+### Key Features
+
+- OK **Unified Interface** - Single command for all Jan Server operations
+- OK **Professional Structure** - Industry-standard Cobra framework
+- OK **Extensible** - Easy to add new commands
+- OK **Well-Documented** - Comprehensive help and examples
+- OK **Cross-Platform** - Works on Windows, Linux, macOS
+- OK **Shell Completion** - Bash, Zsh, Fish, PowerShell support
+
+---
+
+## Quick Start
+
+### Install Globally (Recommended)
+
+```bash
+# From project root
+make cli-install
+```
+
+This will:
+1. Build the `jan-cli` binary
+2. Install to your user's local bin directory
+3. Display PATH setup instructions
+
+**Installation Locations:**
+- **Linux/macOS:** `~/bin/jan-cli`
+- **Windows:** `%USERPROFILE%\bin\jan-cli.exe`
+
+### Add to PATH
+
+**Windows (PowerShell):**
+```powershell
+# Temporary (current session)
+$env:PATH += ";$env:USERPROFILE\bin"
+
+# Permanent (add to PowerShell profile)
+notepad $PROFILE
+# Add this line:
+$env:PATH += ";$env:USERPROFILE\bin"
+```
+
+**Linux/macOS (Bash/Zsh):**
+```bash
+# Add to ~/.bashrc or ~/.zshrc
+export PATH="$PATH:$HOME/bin"
+
+# Reload your shell
+source ~/.bashrc # or source ~/.zshrc
+```
+
+### Verify Installation
+
+```bash
+jan-cli --version
+# Output: jan-cli version 1.0.0
+
+jan-cli --help
+# Output: Full help text with all commands
+```
+
+### First Commands
+
+```bash
+# List all services
+jan-cli service list
+
+# Validate configuration
+jan-cli config validate
+
+# Show help for any command
+jan-cli config --help
+```
+
+---
+
+## Installation
+
+### Method 1: Global Installation (Recommended)
+
+Use the Makefile target to build and install `jan-cli`:
+
+```bash
+# From project root
+make cli-install
+```
+
+**What it does:**
+1. Builds the binary with `go build`
+2. Creates `~/bin` or `%USERPROFILE%\bin` if needed
+3. Copies binary to bin directory
+4. Sets execute permissions (Unix)
+5. Checks if bin is in PATH
+6. Shows PATH setup instructions if needed
+
+**After installation:**
+```bash
+# Add to PATH (see instructions from install output)
+# Then use from anywhere
+jan-cli --version
+jan-cli config validate
+jan-cli service list
+```
+
+### Method 2: Wrapper Scripts (No Installation)
+
+Run directly from project root using wrapper scripts:
+
+```bash
+# Linux/macOS
+./jan-cli.sh config validate
+./jan-cli.sh service list
+
+# Windows PowerShell
+.\jan-cli.ps1 config validate
+.\jan-cli.ps1 service list
+```
+
+**Advantages:**
+- No installation needed
+- Auto-builds if binary missing or outdated
+- Always uses latest code
+- Good for development
+
+**Disadvantages:**
+- Must be run from project root
+- Requires file extension (.sh or.ps1)
+
+### Method 3: Manual Build
+
+```bash
+# Navigate to CLI directory
+cd cmd/jan-cli
+
+# Build
+go build
+
+# Run
+./jan-cli --help # Linux/macOS
+.\jan-cli.exe --help # Windows
+
+# Optional: Copy to a location in your PATH
+cp jan-cli ~/bin/ # Linux/macOS
+copy jan-cli.exe %USERPROFILE%\bin\ # Windows
+```
+
+### Makefile Targets
+
+```bash
+make cli-build # Build the binary
+make cli-install # Build and install to local bin
+make cli-clean # Remove the binary
+```
+
+**cli-build** - Builds binary in `cmd/jan-cli/`:
+- Linux/macOS: `cmd/jan-cli/jan-cli`
+- Windows: `cmd/jan-cli/jan-cli.exe`
+
+**cli-install** - Builds and installs:
+1. Calls `cli-build`
+2. Creates bin directory if needed
+3. Copies binary
+4. Shows PATH instructions
+
+**cli-clean** - Removes binary:
+- Useful for clean rebuilds
+- Frees disk space
+
+---
+
+## Commands Reference
+
+### Command Hierarchy
+
+```
+jan-cli (root)
++-- config (configuration management)
+| +-- validate - Validate configuration files
+| +-- export - Export configuration
+| +-- show - Display configuration values
+| +-- generate - Generate schemas and defaults
++-- service (service operations)
+| +-- list - List all services
+| +-- logs - Show service logs
+| +-- status - Check service status
++-- dev (development tools)
+| +-- setup - Initialize development environment
+| +-- scaffold - Generate new service from template
++-- swagger (API documentation)
+| +-- generate - Generate OpenAPI documentation
++-- completion (shell completions)
+ +-- bash
+ +-- zsh
+ +-- fish
+ +-- powershell
+```
+
+### Global Flags
+
+Available on all commands:
+
+- `-v, --verbose` - Enable verbose output
+- `--config-dir <path>` - Configuration directory (default: "config")
+- `-h, --help` - Show help
+- `--version` - Show version
+
+---
+
+## Configuration Management
+
+The `config` subcommand manages Jan Server configuration files.
+
+### config validate
+
+Validate configuration files against schema:
+
+```bash
+# Validate with default environment
+jan-cli config validate
+
+# Validate specific environment
+jan-cli config validate --env production
+jan-cli config validate --env development
+
+# Verbose validation
+jan-cli config validate --verbose
+```
+
+**Output:**
+- OK Configuration valid
+- [X] Validation errors with details
+
+### config export
+
+Export configuration in various formats:
+
+```bash
+# Export as environment variables
+jan-cli config export --format env
+
+# Export as Docker env file
+jan-cli config export --format docker-env --output.env
+
+# Export as JSON
+jan-cli config export --format json --output config.json
+
+# Export as YAML
+jan-cli config export --format yaml --output config.yaml
+
+# Export for specific environment
+jan-cli config export --env production --format env
+```
+
+**Formats:**
+- `env` - Shell environment variables (`KEY=value`)
+- `docker-env` - Docker Compose env file
+- `json` - JSON format
+- `yaml` - YAML format
+
+**Flags:**
+- `--format <format>` - Output format (required)
+- `--output <file>` - Output file (default: stdout)
+- `--env <environment>` - Environment to export
+
+### config show
+
+Display configuration values with path navigation:
+
+```bash
+# Show all configuration
+jan-cli config show
+
+# Show specific service
+jan-cli config show llm-api
+jan-cli config show media-api
+
+# Show as JSON
+jan-cli config show llm-api --format json
+
+# Show with specific environment
+jan-cli config show llm-api --env production
+```
+
+**Flags:**
+- `<service>` - Service name (optional)
+- `--format <format>` - Output format (yaml, json)
+- `--env <environment>` - Environment
+
+### config generate
+
+Generate JSON schemas and defaults.yaml:
+
+```bash
+# Generate all schemas
+jan-cli config generate
+
+# Generates:
+# - config/schema/config.schema.json
+# - config/schema/inference.schema.json
+# - config/schema/infrastructure.schema.json
+# - config/schema/monitoring.schema.json
+# - config/schema/services.schema.json
+# - config/defaults.yaml
+```
+
+---
+
+## Service Operations
+
+The `service` subcommand manages Jan Server services.
+
+### service list
+
+List all available services:
+
+```bash
+jan-cli service list
+```
+
+**Output:**
+```
+Available services:
+ llm-api:8080 LLM API - OpenAI-compatible chat completions
+ media-api:8285 Media API - File upload and management
+ response-api:8082 Response API - Multi-step orchestration
+ mcp-tools:8091 MCP Tools - Model Context Protocol tools
+```
+
+### service logs
+
+Show Docker Compose logs for a specific service:
+
+```bash
+# View logs for a service
+jan-cli service logs llm-api
+
+# Follow logs
+jan-cli service logs llm-api --follow
+
+# Show last N lines
+jan-cli service logs llm-api --tail 50
+```
+
+`jan-cli service logs` wraps `docker compose logs`, so it works on every platform where Docker Desktop is installed.
+
+### service status
+
+Check container status (and optionally health endpoints):
+
+```bash
+# Check all services via Makefile health check
+jan-cli service status
+
+# Check specific service
+jan-cli service status llm-api
+```
+
+- `jan-cli service status` without arguments runs `make health-check`
+- With a service argument it shows `docker compose ps <service>` and invokes the service-specific `/healthz` endpoint (PowerShell `Invoke-WebRequest` on Windows or `curl` on macOS/Linux)
+
+---
+
+## Development Tools
+
+The `dev` subcommand provides development utilities.
+
+### dev setup
+
+Initialize development environment:
+
+```bash
+jan-cli dev setup
+```
+
+**What it does:**
+1. Creates required directories (logs/, tmp/, uploads/)
+2. Creates Docker networks (jan-network, jan-dev)
+3. Generates.env file from templates
+4. Optional: Sets up Docker environment
+
+**Features:**
+- OK Cross-platform (Windows, Linux, macOS)
+- OK Docker optional (warns if not available)
+- OK Idempotent (safe to run multiple times)
+
+### dev scaffold
+
+Generate a new service from `services/template-api`:
+
+```bash
+# Create new API service
+jan-cli dev scaffold my-service
+
+# Specify template/port (future templates can be added later)
+jan-cli dev scaffold worker-service --template api --port 8999
+```
+
+What it does today:
+- Copies `services/template-api` to `services/<name>`
+- Replaces placeholders (module import paths, README text, comments)
+- Prints next steps (run `go mod tidy`, update docker-compose, add Kong routes)
+
+If the destination already exists the command aborts without touching files.
+
+---
+
+## Swagger Documentation
+
+The `swagger` subcommand generates OpenAPI documentation.
+
+### swagger generate
+
+Generate OpenAPI/Swagger documentation for services:
+
+```bash
+# Generate for specific service
+jan-cli swagger generate --service llm-api
+jan-cli swagger generate --service media-api
+
+# Generates:
+# - services/llm-api/docs/swagger.yaml
+# - services/llm-api/docs/swagger.json
+```
+
+**Requirements:**
+- Service must have Swagger annotations in code
+- `swag` CLI tool must be installed (`go install github.com/swaggo/swag/cmd/swag@latest`)
+
+---
+
+## Troubleshooting
+
+### "jan-cli: command not found" (Linux/macOS)
+
+**Problem:** The bin directory is not in your PATH.
+
+**Solution:**
+1. Check if `~/bin` exists:
+ ```bash
+ ls ~/bin/jan-cli
+ ```
+
+2. Add to PATH:
+ ```bash
+ export PATH="$PATH:$HOME/bin"
+ ```
+
+3. Make permanent by adding to `~/.bashrc` or `~/.zshrc`:
+ ```bash
+ echo 'export PATH="$PATH:$HOME/bin"' >> ~/.bashrc
+ source ~/.bashrc
+ ```
+
+### "jan-cli is not recognized" (Windows)
+
+**Problem:** The bin directory is not in your PATH.
+
+**Solution:**
+1. Check if file exists:
+ ```powershell
+ Test-Path $env:USERPROFILE\bin\jan-cli.exe
+ ```
+
+2. Add to PATH (temporary):
+ ```powershell
+ $env:PATH += ";$env:USERPROFILE\bin"
+ ```
+
+3. Make permanent:
+ ```powershell
+ notepad $PROFILE
+ # Add this line:
+ $env:PATH += ";$env:USERPROFILE\bin"
+ ```
+
+4. Restart PowerShell
+
+### "Permission denied" (Linux/macOS)
+
+**Problem:** The binary is not executable.
+
+**Solution:**
+```bash
+chmod +x ~/bin/jan-cli
+```
+
+The `make cli-install` target handles this automatically, but if you installed manually, you may need to set execute permissions.
+
+### Binary Not Updated After Code Changes
+
+**Problem:** Installed binary is outdated after modifying source code.
+
+**Solution:**
+```bash
+# Rebuild and reinstall
+make cli-install
+
+# Or clean and rebuild
+make cli-clean
+make cli-install
+```
+
+### Wrapper Scripts Don't Work
+
+**Problem:** Wrapper script shows errors or doesn't build.
+
+**Solution:**
+1. Ensure Go is installed:
+ ```bash
+ go version
+ ```
+
+2. Ensure in project root:
+ ```bash
+ pwd # Should show jan-server directory
+ ```
+
+3. Check script is executable (Linux/macOS):
+ ```bash
+ chmod +x jan-cli.sh
+ ```
+
+4. Try manual build:
+ ```bash
+ cd cmd/jan-cli && go build
+ ```
+
+---
+
+## Shell Completion
+
+Jan CLI supports shell completion for bash, zsh, fish, and PowerShell.
+
+### Generate Completion Script
+
+```bash
+# Bash
+jan-cli completion bash > /etc/bash_completion.d/jan-cli
+
+# Zsh
+jan-cli completion zsh > "${fpath[1]}/_jan-cli"
+
+# Fish
+jan-cli completion fish > ~/.config/fish/completions/jan-cli.fish
+
+# PowerShell
+jan-cli completion powershell > jan-cli.ps1
+# Then source it in your profile
+```
+
+### Enable Completion
+
+**Bash:**
+```bash
+# Add to ~/.bashrc
+source /etc/bash_completion.d/jan-cli
+```
+
+**Zsh:**
+```zsh
+# Add to ~/.zshrc
+autoload -U compinit
+compinit
+```
+
+**Fish:**
+```fish
+# Completion is auto-loaded from ~/.config/fish/completions/
+```
+
+**PowerShell:**
+```powershell
+# Add to $PROFILE
+. /path/to/jan-cli.ps1
+```
+
+---
+
+## Technical Details
+
+### Framework: Cobra
+
+Jan CLI uses [spf13/cobra](https://github.com/spf13/cobra) v1.8.1, the industry-standard CLI framework.
+
+**Why Cobra:**
+- Used by kubectl, docker, gh, helm
+- Auto-generated help text
+- Built-in completion generation
+- Nested subcommand support
+- Flag parsing and validation
+- POSIX-compliant
+
+**Dependencies:**
+```go
+require (
+ github.com/spf13/cobra v1.8.1
+ gopkg.in/yaml.v3 v3.0.1
+)
+```
+
+### Project Structure
+
+```
+cmd/jan-cli/
++-- main.go # Root command and initialization
++-- cmd_config.go # Configuration management
++-- cmd_service.go # Service operations
++-- cmd_dev.go # Development tools
++-- cmd_setup.go # Interactive setup wizard
++-- cmd_swagger.go # Swagger generation
++-- utils.go # Utility functions
++-- go.mod # Go module dependencies
++-- README.md # CLI documentation
+```
+
+### Build Details
+
+**Build Command:**
+```bash
+cd cmd/jan-cli
+go build -o jan-cli
+```
+
+**Cross-Platform Builds:**
+```bash
+# Linux
+GOOS=linux GOARCH=amd64 go build -o jan-cli-linux
+
+# macOS
+GOOS=darwin GOARCH=amd64 go build -o jan-cli-darwin
+
+# Windows
+GOOS=windows GOARCH=amd64 go build -o jan-cli.exe
+```
+
+**Binary Size:** ~10MB (includes dependencies)
+
+### Wrapper Scripts
+
+**PowerShell (jan-cli.ps1):**
+- Auto-builds if binary missing or outdated
+- Checks all `*.go` files for changes
+- Supports all jan-cli commands
+- Works on Windows PowerShell 5.1+
+
+**Bash (jan-cli.sh):**
+- Auto-builds if binary missing or outdated
+- Checks all `*.go` files for changes
+- Supports all jan-cli commands
+- Works on Linux/macOS with Bash 3.2+
+
+---
+
+## Examples
+
+### Configuration Workflow
+
+```bash
+# Generate schemas and defaults
+jan-cli config generate
+
+# Validate configuration
+jan-cli config validate --env production
+
+# Export as environment variables
+jan-cli config export --format env --env production >.env.production
+
+# Show specific service config
+jan-cli config show llm-api --format json
+```
+
+### Service Management
+
+```bash
+# List all services
+jan-cli service list
+
+# View logs
+jan-cli service logs llm-api --follow
+
+# Check health
+jan-cli service status
+```
+
+### Development Setup
+
+```bash
+# Setup environment
+jan-cli dev setup
+
+# Create new service from template
+jan-cli dev scaffold worker-service --template api
+
+# Generate API documentation
+jan-cli swagger generate --service llm-api
+```
+
+---
+
+## Best Practices
+
+### For Daily Use
+
+1. Install globally with `make cli-install`
+2. Add to PATH once
+3. Use `jan-cli` from anywhere
+4. Run `make cli-install` after pulling updates
+
+### For Development
+
+1. Use wrapper scripts (`./jan-cli.sh` or `.\jan-cli.ps1`)
+2. Always uses latest code
+3. Auto-builds if needed
+4. Good for testing changes
+
+### For CI/CD
+
+1. Use wrapper scripts (no installation needed)
+2. Or install and add to PATH
+3. Verify with `jan-cli --version`
+4. Run commands directly
+
+---
+
+## Summary
+
+**Quick Reference:**
+- **Build:** `make cli-build`
+- **Install:** `make cli-install`
+- **Clean:** `make cli-clean`
+- **Use:** `jan-cli <command>`
+
+**Recommended Workflow:**
+1. Run `make cli-install` once
+2. Add to PATH as instructed
+3. Use `jan-cli` from anywhere
+4. Run `make cli-install` again after updates
+
+**Key Commands:**
+- `jan-cli config validate` - Validate configuration
+- `jan-cli config generate` - Generate schemas
+- `jan-cli service list` - List services
+- `jan-cli dev setup` - Setup environment
+- `jan-cli swagger generate --service <name>` - Generate API docs
+
+---
+
+## Related Documentation
+
+- [Testing Guide](testing.md) - Cross-platform testing procedures
+- [Configuration System](../configuration/README.md) - Configuration management
+- [Development Guide](development.md) - Local development setup
+- [Architecture Overview](../architecture/README.md) - System design
+
+---
+
+**Status:** Production Ready OK 
+**Version:** 1.0.0 
+**Cross-Platform:** Windows, Linux, macOS
diff --git a/docs/guides/kong-plugins.md b/docs/guides/kong-plugins.md
new file mode 100644
index 00000000..c4bbff6a
--- /dev/null
+++ b/docs/guides/kong-plugins.md
@@ -0,0 +1,275 @@
+# Kong Custom Plugin Setup Guide
+
+This guide explains how to set up and use custom Kong plugins in the jan-server project.
+
+## Directory Structure
+
+```
+kong/
++-- kong.yml # Main Kong declarative config
++-- kong-dev-full.yml # Dev-Full/Hybrid mode config (host routing)
++-- plugins/ # Custom plugins directory
+ +-- keycloak-apikey/ # API key validation plugin
+ +-- handler.lua # Plugin logic
+ +-- schema.lua # Configuration schema
+ +-- README.md # Plugin documentation
+```
+
+## Plugin Loading
+
+### Docker Configuration
+
+Kong is configured to load custom plugins via environment variables:
+
+```yaml
+environment:
+ KONG_PLUGINS: bundled,keycloak-apikey # Load bundled + custom plugins
+ KONG_LUA_PACKAGE_PATH: /usr/local/kong/plugins/?.lua;; # Plugin search path
+
+volumes:
+ -../kong/plugins:/usr/local/kong/plugins:ro # Mount plugins directory
+```
+
+### Verification
+
+After starting Kong, verify plugins are loaded:
+
+```bash
+# List all enabled plugins
+curl http://localhost:8001/plugins/enabled
+
+# Should include:
+# - bundled plugins (jwt, rate-limiting, cors, etc.)
+# - keycloak-apikey (custom)
+```
+
+## Creating New Plugins
+
+### 1. Create Plugin Directory
+
+```bash
+mkdir -p kong/plugins/my-plugin
+```
+
+### 2. Create handler.lua
+
+```lua
+local MyPluginHandler = {
+ PRIORITY = 1000, -- Plugin execution priority
+ VERSION = "1.0.0",
+}
+
+function MyPluginHandler:access(conf)
+ -- Your plugin logic here
+ kong.log.info("My plugin executed!")
+end
+
+return MyPluginHandler
+```
+
+### 3. Create schema.lua
+
+```lua
+return {
+ name = "my-plugin",
+ fields = {
+ { config = {
+ type = "record",
+ fields = {
+ { my_setting = {
+ type = "string",
+ required = true,
+ default = "default_value",
+ }},
+ }
+ }},
+ },
+}
+```
+
+### 4. Register Plugin
+
+Update `docker/infrastructure.yml`:
+
+```yaml
+environment:
+ KONG_PLUGINS: bundled,keycloak-apikey,my-plugin # Add your plugin
+```
+
+### 5. Use in kong.yml
+
+```yaml
+plugins:
+ - name: my-plugin
+ tags: [custom]
+ config:
+ my_setting: "value"
+```
+
+## Plugin Development Tips
+
+### Debugging
+
+1. **Enable debug logging:**
+```yaml
+environment:
+ KONG_LOG_LEVEL: debug
+```
+
+2. **Watch logs in real-time:**
+```bash
+docker logs kong -f
+```
+
+3. **Add debug statements:**
+```lua
+kong.log.debug("Variable value: ", some_variable)
+kong.log.err("Error occurred: ", error_message)
+```
+
+### Testing Locally
+
+1. **Reload Kong after changes:**
+```bash
+docker restart kong
+```
+
+2. **Test plugin behavior:**
+```bash
+# Make test request
+curl -v http://localhost:8000/your-endpoint \
+ -H "X-Custom-Header: value"
+
+# Check response headers
+curl -I http://localhost:8000/your-endpoint
+```
+
+### Plugin Priority
+
+Kong executes plugins in priority order (higher = earlier):
+
+```
+2000+ - Pre-processing (e.g., request transformation)
+1000+ - Authentication (e.g., jwt: 1005, keycloak-apikey: 1002)
+500+ - Authorization
+100+ - Post-processing
+```
+
+Set priority in `handler.lua`:
+```lua
+local MyPluginHandler = {
+ PRIORITY = 1002, -- Your priority
+}
+```
+
+## Common Patterns
+
+### HTTP Requests
+
+```lua
+local http = require "resty.http"
+local httpc = http.new()
+
+local res, err = httpc:request_uri("http://service:8080/endpoint", {
+ method = "POST",
+ body = "data",
+ headers = {
+ ["Content-Type"] = "application/json",
+ },
+})
+
+if res.status == 200 then
+ kong.log.info("Request successful")
+end
+```
+
+### Header Manipulation
+
+```lua
+-- Read headers
+local api_key = kong.request.get_header("X-API-Key")
+
+-- Set request headers (to upstream)
+kong.service.request.set_header("X-User-ID", "123")
+
+-- Set response headers (to client)
+kong.response.set_header("X-Custom", "value")
+
+-- Remove headers
+kong.service.request.clear_header("Authorization")
+```
+
+### Authentication
+
+```lua
+-- Authenticate consumer for rate limiting
+kong.client.authenticate({
+ id = user_id,
+ custom_id = user_subject,
+})
+```
+
+### Error Responses
+
+```lua
+-- Return error to client
+return kong.response.exit(401, {
+ message = "Unauthorized"
+})
+```
+
+## Best Practices
+
+1. **Error Handling**: Always handle HTTP errors gracefully
+2. **Logging**: Use appropriate log levels (debug, info, warn, err)
+3. **Performance**: Cache expensive operations, reuse HTTP connections
+4. **Security**: Validate all inputs, sanitize data
+5. **Configuration**: Use schema.lua for type-safe config
+6. **Testing**: Test with both valid and invalid inputs
+
+## Resources
+
+- [Kong Plugin Development Guide](https://docs.konghq.com/gateway/latest/plugin-development/)
+- [Kong PDK Reference](https://docs.konghq.com/gateway/latest/plugin-development/pdk/)
+- [Lua Reference](https://www.lua.org/manual/5.1/)
+
+## Troubleshooting
+
+### Plugin Not Loaded
+
+**Symptom**: Plugin not in `/plugins/enabled`
+
+**Solutions**:
+1. Check `KONG_PLUGINS` includes your plugin name
+2. Verify plugin files are mounted correctly
+3. Check file permissions (must be readable)
+4. Restart Kong container
+
+### Syntax Errors
+
+**Symptom**: Kong fails to start
+
+**Solutions**:
+1. Check Kong logs: `docker logs kong`
+2. Validate Lua syntax: `luac -p handler.lua`
+3. Check schema format matches Kong requirements
+
+### Plugin Not Executing
+
+**Symptom**: Plugin loaded but not running
+
+**Solutions**:
+1. Verify plugin is configured in `kong.yml`
+2. Check route/service matches request
+3. Ensure priority doesn't conflict with other plugins
+4. Add debug logging to verify execution
+
+### Performance Issues
+
+**Symptom**: Slow response times
+
+**Solutions**:
+1. Profile plugin execution time
+2. Add caching for expensive operations
+3. Use connection pooling for HTTP requests
+4. Consider async operations if possible
diff --git a/docs/guides/mcp-testing.md b/docs/guides/mcp-testing.md
new file mode 100644
index 00000000..6098ec6b
--- /dev/null
+++ b/docs/guides/mcp-testing.md
@@ -0,0 +1,124 @@
+# MCP Testing Guide
+
+Validate the MCP (Model Context Protocol) toolchain end to end. Every command below maps directly to current Makefile targets and compose services, so you can run it without editing scripts.
+
+## 1. Prerequisites
+
+- `make up-full` (or `make up-mcp` + `make up-api`) so Kong, MCP Tools, and vector-store are running
+- `SERPER_API_KEY` and other MCP-related env vars set in `.env`
+- Services reachable on:
+  - Kong Gateway: http://localhost:8000
+  - MCP Tools: http://localhost:8091 (direct) or http://localhost:8000/mcp (via Kong)
+  - Vector Store: http://localhost:3015
+  - SandboxFusion (optional): http://localhost:3010
+
+Check health quickly:
+```bash
+make health-check       # full stack health summary
+curl http://localhost:8091/healthz
+curl http://localhost:3015/healthz || true   # returns 404 because the vector store uses custom routes
+```
+
+## 2. Automated Suite (jan-cli api-test)
+
+Run everything through the Makefile target:
+
+```bash
+make test-mcp-integration
+```
+
+The target executes:
+```bash
+jan-cli api-test run tests/automation/mcp-postman-scripts.json \
+  --env-var "kong_url=http://localhost:8000" \
+  --env-var "mcp_tools_url=http://localhost:8000/mcp" \
+  --verbose --reporters cli
+```
+
+Expectations:
+- Guest token requests succeed (`/llm/auth/guest-login`)
+- MCP search variants (domain filter, offline) return structured JSON
+- Tool list includes `google_search`, `scrape`, `file_search_index`, `file_search_query`, `python_exec`
+- File index/query flows return 200 and include the previously indexed document
+- SandboxFusion executions return stdout/stderr
+
+## 3. Manual Checks
+
+### 3.1 Kong -> MCP Tools
+
+```bash
+# list tools through Kong (authenticated by the gateway)
+curl -s http://localhost:8000/mcp -X POST -H "Content-Type: application/json" -d '{
+  "jsonrpc": "2.0",
+  "id": 1,
+  "method": "tools/list"
+}' | jq .
+
+# call python_exec via Kong
+curl -s http://localhost:8000/mcp -X POST -H "Content-Type: application/json" -d '{
+  "jsonrpc": "2.0",
+  "id": 2,
+  "method": "tools/call",
+  "params": {
+    "name": "python_exec",
+    "arguments": {
+      "code": "print(\"Hello from MCP\")",
+      "language": "python",
+      "approved": true
+    }
+  }
+}' | jq .
+```
+
+### 3.2 Direct Service Endpoints
+
+```bash
+# MCP Tools (direct port)
+curl -s http://localhost:8091/v1/mcp -H "Content-Type: application/json" -d '{
+  "jsonrpc": "2.0",
+  "id": 3,
+  "method": "tools/call",
+  "params": {"name": "file_search_index", "arguments": {"document_id": "cli-doc", "text": "CLI test"}}
+}' | jq .
+
+# Vector store
+curl -s http://localhost:3015/documents -X POST -H "Content-Type: application/json" -d '{
+  "document_id": "curl-doc",
+  "text": "Curl-based MCP test",
+  "metadata": {"owner": "qa"}
+}'
+
+curl -s http://localhost:3015/query -X POST -H "Content-Type: application/json" -d '{
+  "text": "MCP test",
+  "top_k": 3
+}' | jq .
+
+# SandboxFusion (optional)
+curl -s http://localhost:3010/run_code -H "Content-Type: application/json" -d '{
+  "code": "print(\"sandbox\")",
+  "language": "python"
+}' | jq .
+```
+
+## 4. Logs and Troubleshooting
+
+| Component | Logs | Notes |
+|-----------|------|-------|
+| Kong | `make logs` or `docker compose logs kong` | Confirms `/mcp` route, auth headers, upstream failures |
+| MCP Tools | `make logs-mcp` | Watch tool dispatch, vector store responses, sandbox output |
+| Vector Store | `docker compose logs vector-store` | Service name is `vector-store` in `docker/services-mcp.yml` |
+| SandboxFusion | `docker compose logs sandboxfusion` (if enabled) | Verify HTTP 200s and stdout capturing |
+
+Common fixes:
+- **401/403**: ensure guest token exists or provide API key headers when hitting Kong
+- **Timeouts to vector store**: confirm service is part of the `mcp` profile (`COMPOSE_PROFILES` includes `mcp`)
+- **Sandbox errors**: include the required `language` parameter; see `services/mcp-tools/internal/sandboxfusion`
+
+## 5. Summary Checklist
+
+- [ ] `make up-full` (or `make up-mcp` + `make up-api`) running
+- [ ] `make test-mcp-integration` passes locally
+- [ ] Manual curl checks through Kong and direct service succeed
+- [ ] Logs show healthy MCP tool executions
+
+Document these results in your PR or QA notes so MCP coverage stays verifiable.
diff --git a/docs/guides/monitoring.md b/docs/guides/monitoring.md
new file mode 100644
index 00000000..0c701f01
--- /dev/null
+++ b/docs/guides/monitoring.md
@@ -0,0 +1,354 @@
+# Observability & Monitoring Stack
+
+This document describes the optional observability stack for Jan Server, which provides comprehensive monitoring, metrics, and distributed tracing capabilities.
+
+## Overview
+
+The monitoring stack is completely optional and runs separately from the main Jan Server services. It consists of:
+
+- **OpenTelemetry Collector**: Telemetry data collection and forwarding
+- **Prometheus**: Metrics storage and querying
+- **Jaeger**: Distributed tracing backend
+- **Grafana**: Unified visualization dashboard
+
+## Quick Start
+
+### Start Monitoring Stack
+```bash
+make monitor-up
+```
+
+This command will:
+1. Start all monitoring services (Prometheus, Jaeger, Grafana, OpenTelemetry Collector)
+2. Display access URLs for each dashboard
+3. Run in the background
+
+### Access Dashboards
+
+- **Grafana** (Unified Dashboard): http://localhost:3331
+ - Username: `admin`
+ - Password: `admin`
+ - Pre-configured with Prometheus and Jaeger datasources
+
+- **Prometheus** (Metrics): http://localhost:9090
+ - Direct PromQL queries
+ - Service discovery status
+ - Target health monitoring
+
+- **Jaeger** (Traces): http://localhost:16686
+ - Distributed trace search
+ - Service dependency graph
+ - Performance analysis
+
+### Stop Monitoring Stack
+```bash
+# Stop but keep data
+make monitor-down
+
+# Stop and remove all data volumes (fresh start)
+make monitor-clean
+```
+
+### View Logs
+```bash
+make monitor-logs
+```
+
+## Architecture
+
+```
++-------------------------------------------------------------+
+| Jan Server Services |
+| (llm-api, mcp-tools, etc.) |
++----------------+--------------------------------------------+
+ | OpenTelemetry Protocol (OTLP)
+ | Ports: 4318 (HTTP), 4317 (gRPC)
+ v
++-------------------------------------------------------------+
+| OpenTelemetry Collector |
+| - Receives metrics and traces from services |
+| - Processes and enriches telemetry data |
+| - Exports to Prometheus (metrics) and Jaeger (traces) |
+| - Uses OTLP exporter for Jaeger (not deprecated Jaeger) |
++------------+------------------------------+-----------------+
+ | |
+ | Metrics | Traces (OTLP)
+ v v
++------------------------+ +--------------------------------+
+| Prometheus | | Jaeger |
+| - Time-series DB | | - Trace storage |
+| - 15s scrape interval | | - Service dependency graph |
+| - PromQL queries | | - Performance insights |
++------------+-----------+ +------------+-------------------+
+ | |
+ +--------------+---------------+
+ v
+ +------------------------+
+ | Grafana |
+ | - Unified dashboards |
+ | - Metrics + Traces |
+ | - Alerting |
+ +------------------------+
+```
+
+## Configuration
+
+### Environment Variables
+
+Set these in your `.env.docker` or `.env.local` file:
+
+```bash
+# Prometheus
+PROMETHEUS_PORT=9090
+
+# Jaeger
+JAEGER_UI_PORT=16686
+
+# Grafana
+GRAFANA_PORT=3331
+GRAFANA_ADMIN_USER=admin
+GRAFANA_ADMIN_PASSWORD=admin
+
+# OpenTelemetry
+OTEL_GRPC_PORT=4317
+OTEL_HTTP_PORT=4318
+```
+
+### Enable Telemetry in Services
+
+To send metrics and traces from Jan Server services:
+
+```bash
+# In llm-api environment
+OTEL_ENABLED=true
+OTEL_SERVICE_NAME=llm-api
+OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
+```
+
+### Prometheus Configuration
+
+The `monitoring/prometheus.yml` file defines scrape targets:
+
+```yaml
+scrape_configs:
+ - job_name: 'otel-collector'
+ static_configs:
+ - targets: ['otel-collector:8889']
+ 
+ - job_name: 'llm-api'
+ static_configs:
+ - targets: ['llm-api:8080']
+ 
+ - job_name: 'mcp-tools'
+ static_configs:
+ - targets: ['mcp-tools:8091']
+```
+
+### Grafana Datasources
+
+Datasources are auto-provisioned from `monitoring/grafana/provisioning/datasources/datasources.yml`:
+
+- **Prometheus**: Default datasource for metrics
+- **Jaeger**: Datasource for distributed traces
+
+## Usage
+
+### Viewing Metrics in Prometheus
+
+1. Navigate to http://localhost:9090
+2. Use the "Graph" tab for queries
+3. Example PromQL queries:
+ ```promql
+ # Request rate
+ rate(http_requests_total[5m])
+ 
+ # Error rate
+ rate(http_requests_total{status=~"5.."}[5m])
+ 
+ # Response time (95th percentile)
+ histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))
+ ```
+
+### Viewing Traces in Jaeger
+
+1. Navigate to http://localhost:16686
+2. Select a service (e.g., `llm-api`)
+3. Search for traces by:
+ - Time range
+ - Duration
+ - Tags
+4. Click on a trace to view:
+ - Span timeline
+ - Service dependencies
+ - Tags and logs
+
+### Creating Grafana Dashboards
+
+1. Navigate to http://localhost:3331 (admin/admin)
+2. Click "+" -> "Create Dashboard"
+3. Add panels with queries from Prometheus or Jaeger
+4. Save the dashboard
+
+To persist dashboards:
+1. Export as JSON
+2. Save to `monitoring/grafana/provisioning/dashboards/json/`
+3. Restart Grafana: `make monitor-down && make monitor-up`
+
+## Data Persistence
+
+The monitoring stack uses Docker volumes for data persistence:
+
+- `prometheus-data`: Stores metrics time-series data
+- `grafana-data`: Stores dashboards, users, and settings
+
+### Backup Data
+
+```bash
+# Backup Prometheus data
+docker run --rm -v jan-server_prometheus-data:/data -v $(pwd):/backup alpine tar czf /backup/prometheus-backup.tar.gz -C /data.
+
+# Backup Grafana data
+docker run --rm -v jan-server_grafana-data:/data -v $(pwd):/backup alpine tar czf /backup/grafana-backup.tar.gz -C /data.
+```
+
+### Restore Data
+
+```bash
+# Restore Prometheus data
+docker run --rm -v jan-server_prometheus-data:/data -v $(pwd):/backup alpine sh -c "cd /data && tar xzf /backup/prometheus-backup.tar.gz"
+
+# Restore Grafana data
+docker run --rm -v jan-server_grafana-data:/data -v $(pwd):/backup alpine sh -c "cd /data && tar xzf /backup/grafana-backup.tar.gz"
+```
+
+## Troubleshooting
+
+### Monitoring Stack Won't Start
+
+```bash
+# Check if services are running
+docker compose -f docker/observability.yml ps
+
+# View logs
+make monitor-logs
+
+# Restart with fresh data
+make monitor-clean && make monitor-up
+```
+
+### No Metrics in Prometheus
+
+1. Check if OpenTelemetry Collector is running:
+ ```bash
+ docker compose -f docker/observability.yml ps otel-collector
+ ```
+
+2. Verify Prometheus targets are healthy:
+ - Navigate to http://localhost:9090/targets
+ - All targets should show "UP" status
+
+3. Ensure services are exporting metrics:
+ - Set `OTEL_ENABLED=true` in service environment
+ - Restart the service
+
+### No Traces in Jaeger
+
+1. Check Jaeger is receiving data:
+ ```bash
+ make monitor-logs | grep jaeger
+ ```
+
+2. Verify OpenTelemetry Collector is exporting to Jaeger:
+ ```bash
+ make monitor-logs | grep "jaeger.*exporter"
+ ```
+
+3. Ensure services are generating traces:
+ - Check service logs for trace IDs
+ - Verify OTLP endpoint is correct
+
+### Grafana Datasources Not Working
+
+1. Check datasource configuration:
+ - Login to Grafana
+ - Go to Configuration -> Data Sources
+ - Test each datasource
+
+2. Verify provisioning:
+ ```bash
+ docker compose -f docker/observability.yml exec grafana ls -la /etc/grafana/provisioning/datasources
+ ```
+
+3. Restart Grafana:
+ ```bash
+ docker compose -f docker/observability.yml restart grafana
+ ```
+
+## Advanced Configuration
+
+### Custom Prometheus Retention
+
+Edit `docker/observability.yml`:
+
+```yaml
+prometheus:
+ command:
+ - '--storage.tsdb.retention.time=30d' # Keep data for 30 days
+ - '--storage.tsdb.retention.size=10GB' # Max 10GB storage
+```
+
+### Custom Grafana Plugins
+
+Edit `docker/observability.yml`:
+
+```yaml
+grafana:
+ environment:
+ GF_INSTALL_PLUGINS: 'grafana-clock-panel,grafana-simple-json-datasource'
+```
+
+### Enable Jaeger Sampling
+
+Edit `docker/observability.yml`:
+
+```yaml
+jaeger:
+ environment:
+ COLLECTOR_OTLP_ENABLED: "true"
+ SAMPLING_STRATEGIES_FILE: /etc/jaeger/sampling.json
+ volumes:
+ -./docs/jaeger-sampling.json:/etc/jaeger/sampling.json:ro
+```
+
+## Production Recommendations
+
+1. **Change default Grafana password**:
+ ```bash
+ GRAFANA_ADMIN_PASSWORD=<secure-password>
+ ```
+
+2. **Configure retention policies**:
+ - Prometheus: Set appropriate retention based on storage
+ - Jaeger: Configure sampling to reduce data volume
+
+3. **Set up alerting**:
+ - Configure Prometheus alert rules
+ - Set up Grafana alert notifications (email, Slack, etc.)
+
+4. **Secure access**:
+ - Use reverse proxy (nginx/traefik) with TLS
+ - Implement authentication/authorization
+ - Restrict network access to monitoring ports
+
+5. **Scale for production**:
+ - Use external storage for Prometheus (remote write)
+ - Use production-grade Jaeger backend (Elasticsearch, Cassandra)
+ - Enable Grafana HA mode
+
+## Resources
+
+- [Prometheus Documentation](https://prometheus.io/docs/)
+- [Grafana Documentation](https://grafana.com/docs/)
+- [Jaeger Documentation](https://www.jaegertracing.io/docs/)
+- [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/)
+- [PromQL Cheat Sheet](https://promlabs.com/promql-cheat-sheet/)
diff --git a/docs/guides/prompt-orchestration.md b/docs/guides/prompt-orchestration.md
new file mode 100644
index 00000000..37681d54
--- /dev/null
+++ b/docs/guides/prompt-orchestration.md
@@ -0,0 +1,448 @@
+# Prompt Orchestration
+
+## Overview
+
+The Prompt Orchestration system is a pipeline component within the LLM API service that dynamically composes and enhances prompts before they are sent to inference providers. It applies conditional modules based on context, user preferences, and conversation history.
+
+**Architecture Decision**: Prompt orchestration is implemented as a **processor within the LLM API service**, not as an isolated microservice. This gives you dynamic control at runtime to add memory, tools, templates, customize tone, and assemble final prompts automatically.
+
+---
+
+## What is a Prompt Orchestration Processor?
+
+A **Prompt Orchestration Processor** is a processing layer within LLM API that:
+
+1. Takes a user's raw input (before it reaches the inference engine)
+2. Checks conditions (flags, context, user settings, memory, etc.)
+3. Composes a final prompt programmatically by applying conditional modules
+4. Passes that composed prompt to the inference provider (vLLM or remote)
+
+The processor sits in the request pipeline within `llm-api`, between the HTTP handler and the inference provider client.
+
+---
+
+## Architecture
+
+```
+HTTP Request (POST /v1/chat/completions)
+    ↓
+Gin Handler (llm-api/internal/interfaces/httpserver/handlers/chathandler)
+    ↓
+Prompt Orchestration Processor
+    - Check context & user preferences
+    - Check conversation memory
+    - Apply conditional rules
+    - Retrieve and inject memory
+    - Add tool instructions
+    - Apply templates
+    - Assemble final system + user prompts
+    ↓
+Inference Provider Client (internal/infrastructure/inference)
+    ↓
+vLLM or Remote Provider
+```
+
+### Package Structure
+
+```
+services/llm-api/internal/domain/prompt/
+├── types.go          # Core interfaces and types
+├── modules.go        # Built-in module implementations
+├── processor.go      # Main processor implementation
+└── processor_test.go # Unit tests
+```
+
+---
+
+## Features & Capabilities
+
+### What the Processor Can Do
+
+The processor can automatically attach optional modules as part of the LLM API request pipeline:
+
+#### Memory
+If user enables memory, insert memory instructions into prompt.
+
+#### Tool Usage
+Conditionally include instructions like:
+- "use the retrieval tool when needed"
+- "use the calculator tool if numbers appear"
+
+#### Templates / Prompt Patterns
+For example:
+- Chain-of-Thought structure
+- Output format
+- Persona / role descriptions
+- "First think step-by-step, then answer"
+
+#### Safety Rules
+Add system-level constraints when specific topics appear.
+
+#### Output Shapers
+Like "respond in JSON", "respond concisely", "use a teacher tone", etc.
+
+#### Conditional Behaviors
+- If question is about code → add code assistant template
+- If question mentions "summarize" → add summary template
+- If user speaks Vietnamese → switch language automatically
+
+---
+
+## Built-in Modules
+
+The processor includes several built-in modules that are automatically applied based on context:
+
+### 0. Persona Module (Always Active)
+- **Purpose**: Ensures a base system prompt/persona is present
+- **Activation**: Always registered; uses `X-Prompt-Persona` header or `PROMPT_ORCHESTRATION_PERSONA`
+- **Adds**: Persona/system instructions to the system prompt
+- **Priority**: 0 (runs first)
+
+### 1. Memory Module (Optional)
+- **Purpose**: Injects user-specific memory/preferences into prompts
+- **Activation**: Enabled via `PROMPT_ORCHESTRATION_MEMORY=true`
+- **Adds**: Memory hints stitched into the system prompt
+- **Priority**: 10
+
+### 2. Tool Instructions Module (Optional)
+- **Purpose**: Adds instructions for tool usage
+- **Activation**: `PROMPT_ORCHESTRATION_TOOLS=true` and preferences indicate tool usage (tools present on request or `use_tools` preference)
+- **Adds**: Tool selection and usage guidelines
+- **Priority**: 20
+
+### 3. Code Assistant Module (Template-Gated)
+- **Purpose**: Enhances prompts for code-related questions
+- **Activation**: `PROMPT_ORCHESTRATION_TEMPLATES=true` and code keywords present
+- **Adds**: Code formatting guidelines, best practices, error handling tips
+- **Priority**: 30
+
+### 4. Chain-of-Thought Module (Template-Gated)
+- **Purpose**: Encourages step-by-step reasoning for complex questions
+- **Activation**: `PROMPT_ORCHESTRATION_TEMPLATES=true` and reasoning signals (why/how/long form questions)
+- **Adds**: Instructions to break down problems and think systematically
+- **Priority**: 40
+
+---
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `PROMPT_ORCHESTRATION_ENABLED` | `true` | Enable/disable the processor |
+| `PROMPT_ORCHESTRATION_MEMORY` | `false` | Enable memory injection |
+| `PROMPT_ORCHESTRATION_TEMPLATES` | `true` | Enable template-based prompts (CoT + code assistant) |
+| `PROMPT_ORCHESTRATION_TOOLS` | `false` | Enable tool usage instructions |
+| `PROMPT_ORCHESTRATION_PERSONA` | `helpful assistant` | Default assistant persona |
+
+### YAML Configuration
+
+In `config/defaults.yaml`:
+
+```yaml
+services:
+  llm_api:
+    prompt_orchestration:
+      enabled: true
+      enable_memory: false
+      enable_templates: true
+      enable_tools: false
+      default_persona: helpful assistant
+```
+
+### Wire Integration
+
+The processor is integrated via dependency injection in `services/llm-api/cmd/server/wire.go`:
+
+```go
+// Prompt processor configuration
+wire.Bind(new(prompt.Processor), new(*prompt.ProcessorImpl)),
+prompt.NewProcessor,
+```
+
+---
+
+## Implementation Details
+
+### Module Interface
+
+Each module implements the `Module` interface:
+
+```go
+type Module interface {
+    Name() string
+    ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool
+    Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error)
+}
+```
+
+### Processing Flow
+
+1. **Context Building**: Create a `prompt.Context` with user ID, conversation ID, preferences, and memory (headers, conversation metadata, or recent turns)
+2. **Module Evaluation**: Each registered module checks if it should apply via `ShouldApply()`
+3. **Module Application**: Applicable modules modify messages via `Apply()` in priority order
+4. **Result**: Enhanced messages are passed to the inference provider
+
+### Module Priority System
+
+Modules are executed in priority order to ensure correct composition:
+- **Priority 0**: Persona Module (creates base system prompt)
+- **Priority 10**: Memory Module (adds user context)
+- **Priority 20**: Tool Instructions (adds tool capabilities)
+- **Priority 30**: Code Assistant (adds code-specific guidance)
+- **Priority 40**: Chain-of-Thought (adds reasoning structure)
+
+---
+
+## Usage Example
+
+The processor is automatically integrated into the chat completion flow:
+
+```go
+// In ChatHandler.CreateChatCompletion()
+promptCtx := &prompt.Context{
+    UserID:         userID,
+    ConversationID: conversationID,
+    Language:       strings.TrimSpace(reqCtx.GetHeader("Accept-Language")),
+    Preferences: map[string]interface{}{
+        "persona":   reqCtx.GetHeader("X-Prompt-Persona"),
+        "use_tools": len(request.Tools) > 0 || request.ToolChoice != nil,
+    },
+    Memory: h.collectPromptMemory(conv, reqCtx), // header X-Prompt-Memory, conversation metadata, or recent turns
+}
+
+processedMessages, err := h.promptProcessor.Process(ctx, promptCtx, request.Messages)
+if err != nil {
+    // Log and continue with original messages
+    log.Warn().Err(err).Msg("prompt processing failed")
+} else {
+    request.Messages = processedMessages
+    reqCtx.Header("X-Applied-Prompt-Modules", strings.Join(promptCtx.AppliedModules, ","))
+}
+```
+
+---
+
+## Example Transformations
+
+### Before Processing
+```json
+{
+  "messages": [
+    {"role": "user", "content": "How do I implement binary search in Go?"}
+  ]
+}
+```
+
+### After Processing
+*With Persona + Code Assistant + Memory modules applied:*
+
+```json
+{
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant. Follow the rules strictly.\n\nUse the following personal memory for this user:\n- User prefers detailed code examples\n- User is learning Go\n\nWhen providing code assistance:\n1. Provide clear, well-commented code\n2. Explain your approach and reasoning\n3. Include error handling where appropriate\n4. Follow best practices and conventions\n5. Suggest testing approaches when relevant"
+    },
+    {"role": "user", "content": "How do I implement binary search in Go?"}
+  ]
+}
+```
+
+### Another Example: Combined Prompt
+
+With multiple modules enabled, the final system prompt might look like:
+
+```
+You are a helpful assistant.
+
+Use the following memory for this user:
+- wife prefers female voice
+- avoid parentheses in Mermaid diagrams
+
+Respond in a structured style:
+1. Explanation
+2. Output
+3. Notes
+
+When providing code assistance:
+1. Provide clear, well-commented code
+2. Explain your approach and reasoning
+3. Include error handling where appropriate
+4. Follow best practices and conventions
+5. Suggest testing approaches when relevant
+
+User request:
+"How do I build a pricing model for my SaaS?"
+```
+
+---
+
+## Module Examples
+
+### Base System Prompt (Persona Module)
+```
+You are a helpful assistant. Follow the rules strictly.
+```
+
+### Memory Module
+```
+Use the following personal memory for this user:
+{{memory}}
+```
+
+### Tool Instructions Module
+```
+You have access to the following tools: {{tools}}
+Always choose the best tool for the task.
+```
+
+### Style / Persona Module
+```
+Respond in friendly tone unless user asks otherwise.
+```
+
+### Task Templates
+- Writing template
+- Analysis template
+- Translation template
+- Technical breakdown template
+
+---
+
+## Conditional Logic Pattern
+
+The processor applies modules conditionally based on context:
+
+```python
+# Pseudocode example
+prompt = BASE_SYSTEM_PROMPT
+
+if use_memory:
+    prompt += MEMORY_MODULE.replace("{{memory}}", retrieved_memory)
+
+if question_is_code:
+    prompt += CODE_ASSISTANT_TEMPLATE
+
+if user_language == "vi":
+    prompt += VIETNAMESE_STYLE_TEMPLATE
+
+if use_tools:
+    prompt += TOOL_INSTRUCTIONS_MODULE
+```
+
+---
+
+## Testing
+
+Run the test suite:
+
+```bash
+cd services/llm-api
+go test ./internal/domain/prompt/... -v
+```
+
+Tests cover:
+- Individual module behavior
+- Module conditional logic
+- Processor integration
+- Configuration handling
+- Persona defaults and template gating
+- Module priority ordering
+
+---
+
+## Observability
+
+The processor emits:
+- **OTEL events**: `processing_prompts`, `prompts_processed`
+- **Logs**: Applied module list, processing errors, module priority order
+- **HTTP header**: `X-Applied-Prompt-Modules` (comma-separated) for debugging
+
+Example log output:
+```json
+{
+  "level": "debug",
+  "component": "prompt-processor",
+  "conversation_id": "conv-123",
+  "applied_modules": ["persona", "memory", "code_assistant"],
+  "message": "applied prompt orchestration modules"
+}
+```
+
+---
+
+## Disabling Modules
+
+Users can disable specific modules via preferences:
+
+```go
+promptCtx := &prompt.Context{
+    Preferences: map[string]interface{}{
+        "disable_modules": []string{"chain_of_thought", "code_assistant"},
+    },
+}
+```
+
+Or via the helper function:
+```go
+promptCtx = prompt.WithDisabledModules(promptCtx, []string{"memory"})
+```
+
+---
+
+## Future Enhancements
+
+Potential additions to the processor:
+
+1. **Template Library**: Pre-built templates for common tasks (writing, analysis, translation)
+2. **User Memory Store**: Persistent storage for user preferences and memory
+3. **Dynamic Persona**: Adjust assistant personality based on context
+4. **Language Detection**: Automatically adapt to user's language
+5. **Safety Filters**: Add content moderation and safety rules
+6. **A/B Testing**: Compare different prompt strategies
+7. **Custom Module Registry**: Allow users to register custom modules
+8. **Module Composition Rules**: Define dependencies and conflicts between modules
+9. **Prompt Versioning**: Track and version prompt templates
+10. **Performance Optimization**: Cache compiled prompts for common scenarios
+
+---
+
+## Related Documentation
+
+- [Data Flow Reference](../architecture/data-flow.md)
+- [LLM API Documentation](../api/llm-api/README.md)
+- [Development Guide](./development.md)
+- [Testing Guide](./testing.md)
+
+---
+
+## Troubleshooting
+
+### Modules Not Applying
+
+**Check:**
+1. Is `PROMPT_ORCHESTRATION_ENABLED=true`?
+2. Are specific module flags enabled (`MEMORY`, `TEMPLATES`, `TOOLS`)?
+3. Does the module's `ShouldApply()` logic match your request?
+4. Check logs for `X-Applied-Prompt-Modules` header
+
+### Module Order Issues
+
+**Solution:**
+Modules execute in priority order (0, 10, 20, 30, 40). Persona always runs first to establish the base system prompt.
+
+### Memory Not Loading
+
+**Check:**
+1. Is memory provided via `X-Prompt-Memory` header or conversation metadata?
+2. Is `promptCtx.Memory` populated with items?
+3. Is `PROMPT_ORCHESTRATION_MEMORY=true`?
+
+### Performance Concerns
+
+**Optimization:**
+- Modules are sorted once during processor initialization
+- Each module only applies if `ShouldApply()` returns true
+- Consider caching compiled prompts for frequently used patterns
diff --git a/docs/guides/services-template.md b/docs/guides/services-template.md
new file mode 100644
index 00000000..39ee6bd4
--- /dev/null
+++ b/docs/guides/services-template.md
@@ -0,0 +1,21 @@
+# Service Template Overview
+
+The `services/template-api` directory contains a production-ready skeleton for new Jan microservices. Highlights:
+
+- Go module with config/logger/observability/http packages mirroring established patterns.
+- GORM/PostgreSQL wiring (connection pool, migrations, seed data, repository example).
+- Optional Keycloak JWT guard controlled via `AUTH_ENABLED`.
+- Makefile + Dockerfile for local dev and CI.
+- Wire entrypoint plus example env and docs.
+- Use `jan-cli dev scaffold <service-name>` to copy the template with placeholders replaced.
+
+## Getting Started
+1. Run `jan-cli dev scaffold my-service` (or copy `services/template-api` manually).
+2. Update `go.mod`, the service section inside `.env.template`, and `cmd/server/server.go` with your service-specific names and dependencies.
+3. Configure the database DSN (`DB_POSTGRESQL_WRITE_DSN`) and run `go run ./cmd/server` once so migrations seed the database.
+4. Decide whether to enable JWT auth (`AUTH_ENABLED`, `AUTH_ISSUER`, `ACCOUNT`, `AUTH_JWKS_URL`).
+5. Register your handlers inside `internal/interfaces/httpserver`.
+6. Add domain packages and migrations as needed.
+7. Update root `.env.template`, README, and deployment manifests to include your service.
+
+This guide provides a detailed checklist covering both greenfield and migration workflows for creating new services.
diff --git a/docs/guides/testing.md b/docs/guides/testing.md
new file mode 100644
index 00000000..85683940
--- /dev/null
+++ b/docs/guides/testing.md
@@ -0,0 +1,506 @@
+# Testing Guide
+
+**Last Updated**: January 2025
+
+How to test Jan Server on Windows, Linux, and macOS.
+
+---
+
+## Quick Summary
+
+Jan Server works on all major platforms:
+- **Windows** - PowerShell scripts and build tools
+- **Linux** - Full Docker support
+- **macOS** - Full Docker support
+
+### What We Test
+
+- Command-line tools (jan-cli)
+- Build process (Makefile)
+- Docker containers
+- Authentication (tokens, API keys)
+- Cross-platform compatibility
+
+---
+
+## Quick Start
+
+### Manual Testing
+
+**Unix/Linux/macOS:**
+```bash
+# Basic jan-cli commands
+./jan-cli.sh --help
+./jan-cli.sh dev setup
+./jan-cli.sh config generate
+./jan-cli.sh config validate
+./jan-cli.sh config show
+./jan-cli.sh service list
+./jan-cli.sh swagger generate --service llm-api
+
+# Makefile targets
+make setup
+make build-llm-api
+make build-media-api
+make build-mcp
+make clean-build
+```
+
+**Windows:**
+```powershell
+
+### Windows
+
+**Supported:**
+- OK jan-cli commands (all)
+- OK Makefile build targets
+- OK Configuration management
+- OK Local Docker Desktop
+
+**Not Supported in CI:**
+- [X] Docker integration (GitHub Actions limitation)
+- Docker tests run on Ubuntu CI instead
+
+**Shell:** PowerShell 5.1+ or Git Bash
+
+### Linux (Ubuntu, Debian, etc.)
+
+**Supported:**
+- OK jan-cli commands (all)
+- OK Makefile targets (all)
+- OK Docker integration (native)
+- OK Full authentication testing
+
+**Shell:** Bash 4.0+
+
+### macOS
+
+**Supported:**
+- OK jan-cli commands (all)
+- OK Makefile targets (all)
+- OK Docker integration (via Docker Desktop or Colima)
+
+**Limitations in CI:**
+- Docker setup on GitHub Actions runners is optional (may fail)
+- Primary Docker testing happens on Ubuntu
+- macOS CI focuses on CLI/build verification
+
+**Shell:** Bash 3.2+ or Zsh
+
+---
+
+## Integration Testing
+
+### jan-cli api-test Collections
+
+**Authentication Tests:**
+# Basic jan-cli commands
+.\jan-cli.ps1 --help
+.\jan-cli.ps1 dev setup
+.\jan-cli.ps1 config generate
+.\jan-cli.ps1 config validate
+.\jan-cli.ps1 config show
+.\jan-cli.ps1 service list
+.\jan-cli.ps1 swagger generate --service llm-api
+
+# Makefile targets (requires Git Bash or WSL)
+make setup
+make build-llm-api
+make build-media-api
+make build-mcp
+make clean-build
+```
+
+---
+
+## Manual Testing
+
+### Testing Checklist
+
+#### jan-cli Commands (All Platforms)
+
+| Command | Windows | Linux | macOS | Notes |
+|---------|---------|-------|-------|-------|
+| `jan-cli --help` | OK | OK | OK | Shows all commands |
+| `jan-cli dev setup` | OK | OK | OK | Creates directories, networks,.env |
+| `jan-cli config generate` | OK | OK | OK | Generates schemas and defaults.yaml |
+| `jan-cli config validate` | OK | OK | OK | Validates YAML configuration |
+| `jan-cli config show` | OK | OK | OK | Displays merged configuration |
+| `jan-cli config export --format env` | OK | OK | OK | Exports as environment variables |
+| `jan-cli service list` | OK | OK | OK | Lists all services with ports |
+| `jan-cli swagger generate --service llm-api` | OK | OK | OK | Generates OpenAPI docs |
+
+#### Makefile Targets (All Platforms)
+
+| Target | Windows | Linux | macOS | Notes |
+|--------|---------|-------|-------|-------|
+| `make setup` | OK | OK | OK | Delegates to jan-cli dev setup |
+| `make config-generate` | OK | OK | OK | Uses jan-cli config generate |
+| `make build-llm-api` | OK | OK | OK | Cross-platform build |
+| `make build-media-api` | OK | OK | OK | Cross-platform build |
+| `make build-mcp` | OK | OK | OK | Cross-platform build |
+| `make clean-build` | OK | OK | OK | Platform-specific cleanup |
+
+---
+
+## Docker Testing
+
+### Full Stack Tests (Linux/macOS)
+
+**Authentication Tests:**
+```bash
+make test-auth
+```
+
+Tests:
+- JWT token generation and validation
+- API key authentication
+- OAuth/OIDC flows with Keycloak
+- Token refresh endpoint
+
+**Conversation Tests:**
+```bash
+make test-conversations
+```
+
+Tests:
+- Create, read, update, delete conversations
+- Message history
+- Conversation metadata
+
+**Response API Tests:**
+```bash
+make test-response
+```
+
+**Media API Tests:**
+```bash
+make test-media
+```
+
+**MCP Integration Tests:**
+```bash
+make test-mcp-integration
+```
+
+### Docker Setup by Platform
+
+#### Linux (Ubuntu/Debian)
+
+```bash
+# Install Docker Engine
+curl -fsSL https://get.docker.com -o get-docker.sh
+sudo sh get-docker.sh
+
+# Add user to docker group
+sudo usermod -aG docker $USER
+newgrp docker
+
+# Install Docker Compose
+sudo apt-get update
+sudo apt-get install docker-compose-plugin
+```
+
+#### macOS
+
+**Option 1: Docker Desktop (Recommended for local development)**
+- Download: https://www.docker.com/products/docker-desktop
+- Pros: Easy to use, integrated Kubernetes
+- Cons: Resource heavy, requires license for enterprise
+
+**Option 2: Colima (Lightweight alternative)**
+```bash
+# Install via Homebrew
+brew install docker colima
+
+# Start with appropriate resources
+colima start --cpu 4 --memory 8 --disk 100
+
+# For CI/CD (conservative settings)
+colima start \
+ --cpu 2 \
+ --memory 4 \
+ --disk 20 \
+ --vm-type=vz \
+ --mount-type=virtiofs
+
+# Verify
+docker info
+docker compose version
+```
+
+#### Windows
+
+**Local Development:**
+- Install Docker Desktop: https://www.docker.com/products/docker-desktop
+- Requires WSL2 for best performance
+
+**GitHub Actions CI:**
+- Docker not available on Windows runners
+- Full stack tests run on Ubuntu instead
+- Windows CI focuses on CLI and build tests
+
+---
+
+## Platform-Specific Fixes
+
+### 1. Makefile Path Separators
+
+**Issue:** GitHub Actions Windows runners use Git bash, which doesn't support backslash paths.
+
+**Fix:** Use forward slashes universally (works in bash, PowerShell, and CMD):
+
+```makefile
+build-llm-api:
+	@echo "Building LLM API..."
+ifeq ($(OS),Windows_NT)
+	@cd services/llm-api && go build -o bin/llm-api.exe./cmd/server
+else
+	@cd services/llm-api && go build -o bin/llm-api./cmd/server
+endif
+```
+
+**Key Insight:** Forward slashes work on all platforms in modern shells.
+
+### 2. Clean Target Platform Commands
+
+**Issue:** `rm -rf` doesn't exist on Windows.
+
+**Fix:** Platform-specific directory removal:
+
+```makefile
+clean-build:
+	@echo "Cleaning build artifacts..."
+ifeq ($(OS),Windows_NT)
+	@if exist services\llm-api\bin rd /s /q services\llm-api\bin >nul 2>&1
+	@if exist services\media-api\bin rd /s /q services\media-api\bin >nul 2>&1
+	@if exist services\mcp-tools\bin rd /s /q services\mcp-tools\bin >nul 2>&1
+else
+	@rm -rf services/llm-api/bin
+	@rm -rf services/media-api/bin
+	@rm -rf services/mcp-tools/bin
+endif
+```
+
+### 3. Auto-Rebuild Detection
+
+**Issue:** Wrapper scripts only checked `main.go`, missing changes in other source files.
+
+**Fix:** Check all `*.go` files recursively:
+
+**Windows (jan-cli.ps1):**
+```powershell
+$needsRebuild = $false
+Get-ChildItem -Path $CLIDir -Filter "*.go" -Recurse | ForEach-Object {
+ if ($_.LastWriteTime -gt $binaryTime) {
+ $needsRebuild = $true
+ }
+}
+```
+
+**Unix (jan-cli.sh):**
+```bash
+if find "$CLI_DIR" -name "*.go" -type f -newer "$BINARY" | grep -q.; then
+ echo "Detected changes in source files. Rebuilding..."
+fi
+```
+
+### 4. Cross-Platform Sleep Commands
+
+**Issue:** Interactive setup needs platform-specific sleep commands.
+
+**Fix:** Platform detection in Go:
+
+```go
+func execCommandSilent(name string, args...string) error {
+	cmd:= exec.Command(name, args...)
+	return cmd.Run()
+}
+
+// Platform-specific sleep
+if isWindows() {
+	execCommandSilent("powershell", "-Command", "Start-Sleep -Seconds 2")
+} else {
+	execCommandSilent("sleep", "2")
+}
+```
+
+### 5. Optional Docker Dependency
+
+**Issue:** Docker not available on Windows CI and macOS CI may have Colima startup failures.
+
+**Fix:** Made Docker checks optional in `cmd/jan-cli/cmd_dev.go`:
+
+```go
+dockerAvailable:= isDockerAvailable()
+if !dockerAvailable {
+	fmt.Println("WARNING WARNING: Docker is not available")
+	fmt.Println(" Some features will be skipped")
+	// Continue with CLI-only setup
+}
+
+// Conditionally create Docker network
+if dockerAvailable {
+	createDockerNetwork()
+}
+```
+
+---
+
+## Troubleshooting
+
+### Permission Denied on jan-cli.sh
+
+```bash
+chmod +x jan-cli.sh
+```
+
+### Docker Commands Fail
+
+```bash
+# Check Docker is running
+docker ps
+
+# Linux: Add user to docker group
+sudo usermod -aG docker $USER
+# Then logout and login
+
+# macOS: Start Docker Desktop or Colima
+open -a Docker # Docker Desktop
+colima start # Colima
+```
+
+### Go Not Found
+
+```bash
+# Check Go installation
+which go
+go version
+
+# If not installed:
+# Linux: sudo apt install golang-go
+# macOS: brew install go
+# Windows: Download from https://go.dev/dl/
+```
+
+### Build Failures
+
+```bash
+# Clean and rebuild
+make clean-build
+make build-llm-api
+
+# Check Go modules
+go mod download
+go mod verify
+```
+
+### Makefile Not Found (Windows)
+
+Make requires Git Bash or WSL on Windows:
+- Install Git for Windows: https://git-scm.com/download/win
+- Or use WSL: https://docs.microsoft.com/en-us/windows/wsl/install
+
+### Path Issues on Windows
+
+GitHub Actions Windows runners use Git bash. Ensure:
+- Use forward slashes in Makefile paths
+- Use `ifeq ($(OS),Windows_NT)` branches for Windows-specific commands
+- Binary names include `.exe` extension for Windows
+
+---
+
+## Best Practices
+
+### 1. Use jan-cli for Complex Operations
+
+Prefer `jan-cli` commands for:
+- File system operations (creating directories, copying files)
+- Interactive prompts
+- Complex conditional logic
+
+**Why:** Go code is inherently cross-platform, while Makefile requires platform-specific branches.
+
+### 2. Use Makefile for Docker Operations
+
+Prefer `Makefile` for:
+- Docker Compose commands (`up-infra`, `up-full`, `down`)
+- Service orchestration
+- Testing with jan-cli api-test
+- Health checks
+
+**Why:** These operations are already cross-platform via Docker CLI.
+
+### 3. Test Both Systems
+
+When adding new functionality:
+1. Test on Windows PowerShell first (most restrictive)
+2. Test on Linux/macOS
+3. Verify wrapper scripts auto-rebuild correctly
+4. Check that both `jan-cli` and `make` interfaces work
+
+### 4. Manual Testing Before Pushing
+
+**Unix/Linux/macOS:**
+```bash
+# Test basic commands
+./jan-cli.sh --help
+./jan-cli.sh config validate
+make build-llm-api
+```
+
+**Windows:**
+```powershell
+# Test basic commands
+.\jan-cli.ps1 --help
+.\jan-cli.ps1 config validate
+make build-llm-api
+```
+
+---
+
+## Summary
+
+### OK What Works
+
+- All core `jan-cli` commands on Windows, Linux, macOS
+- Makefile build targets (cross-platform)
+- Docker integration on Linux/macOS (and Windows local)
+- Configuration management and validation
+- Service orchestration and health checks
+
+### WARNING Platform Limitations
+
+**Windows:**
+- Requires Git Bash or WSL for Makefile
+- Binary names need `.exe` extension
+
+**macOS:**
+- Requires Docker Desktop or Colima for Docker support
+
+**Linux:**
+- Full compatibility, no known limitations
+
+### Docs Testing Coverage
+
+- OK CLI commands: All platforms
+- OK Build targets: All platforms
+- OK Docker integration: Linux/macOS/Windows (with Docker Desktop)
+- OK Authentication: jan-cli api-test collections
+- OK API integration: jan-cli api-test collections
+
+---
+
+## Related Documentation
+
+- [Jan CLI Guide](jan-cli.md) - Complete jan-cli command reference
+- [Configuration System](../configuration/README.md) - Configuration management
+- [Development Guide](development.md) - Local development setup
+- [Architecture Overview](../architecture/README.md) - System design
+
+---
+
+**Tested Platforms:**
+- OK Windows 11 PowerShell 5.1
+- OK Ubuntu 22.04+
+- OK macOS 14+
diff --git a/docs/guides/troubleshooting.md b/docs/guides/troubleshooting.md
new file mode 100644
index 00000000..0ad38618
--- /dev/null
+++ b/docs/guides/troubleshooting.md
@@ -0,0 +1,469 @@
+# Troubleshooting Guide
+
+Common problems and how to fix them.
+
+## Quick Fixes
+
+1. [Services won't start](#service-startup-issues)
+2. [Database errors](#database-issues)
+3. [API not responding](#api-issues)
+4. [Login problems](#authentication-issues)
+5. [Docker problems](#docker-issues)
+6. [Kubernetes problems](#kubernetes-issues)
+7. [Slow performance](#performance-issues)
+
+## Service Startup Issues
+
+### Port Already in Use
+
+**Error**: `Address already in use` or port conflict
+
+**Solutions**:
+```bash
+# Find what's using the port (Linux/macOS)
+lsof -i:8080
+lsof -i:8082
+lsof -i:8285
+lsof -i:8091
+
+# Find what's using the port (Windows)
+netstat -ano | findstr:8080
+taskkill /PID <PID> /F
+
+# Or change ports in.env
+HTTP_PORT=8081
+RESPONSE_API_PORT=8083
+MEDIA_API_PORT=8286
+```
+
+### Service Crashes on Startup
+
+**Problem**: Container starts then immediately stops
+
+**How to diagnose**:
+```bash
+# Check the logs
+make logs-api
+make logs-mcp
+
+# Or use Docker directly
+docker logs <container-id>
+```
+
+**Common causes**:
+- Missing required settings in .env file
+- Database isn't ready yet
+- Wrong configuration values
+
+**Fix**:
+```bash
+# Check your .env file has all required values
+cat .env
+
+# Make sure database is running
+make health-check
+
+# Restart the service
+docker restart <container-name>
+```
+
+### Services Won't Connect
+
+**Error**: `connection refused` or `cannot reach service`
+
+**Solutions**:
+```bash
+# Verify all services are running
+docker ps
+
+# Check network connectivity
+docker network ls
+docker network inspect jan-server_default
+
+# Test connectivity between services
+docker exec llm-api curl http://media-api:8285/healthz
+
+# Verify DNS resolution
+docker exec llm-api nslookup media-api
+```
+
+## Database Issues
+
+### Database Connection Failed
+
+**Error**: `dial tcp: connect: connection refused` or `database connection error`
+
+**Solutions**:
+```bash
+# Check if PostgreSQL is running
+docker ps | grep postgres
+
+# Check database credentials in.env
+cat.env | grep DATABASE
+
+# Test connection
+docker exec api-db psql -U jan_user -d jan_llm_api -c "SELECT 1"
+
+# Verify database exists
+docker exec api-db psql -U postgres -l | grep jan
+```
+
+### Missing Database
+
+**Error**: `database "jan_llm_api" does not exist`
+
+**Solutions**:
+```bash
+# Create database
+docker exec api-db psql -U postgres -c "CREATE DATABASE jan_llm_api"
+
+# Or run migrations
+docker exec llm-api /app/llm-api migrate
+
+# Or with make
+make db-migrate
+```
+
+### Table Migration Failed
+
+**Error**: Migration errors or schema mismatch
+
+**Solutions**:
+```bash
+# View migrations
+docker exec api-db psql -U jan_user -d jan_llm_api \
+ -c "SELECT name, version FROM schema_migrations"
+
+# Reset database (WARNING: destroys data!)
+make db-reset
+
+# Re-migrate
+make db-migrate
+```
+
+### Database Disk Full
+
+**Error**: `No space left on device`
+
+**Solutions**:
+```bash
+# Check disk usage
+df -h
+
+# Clean up Docker volumes
+docker system prune -a --volumes
+
+# Or manually remove volume
+docker volume ls
+docker volume rm <volume-name>
+```
+
+## API Issues
+
+### 401 Unauthorized
+
+**Error**: All requests return 401
+
+**Solutions**:
+```bash
+# Get a guest token
+curl -X POST http://localhost:8000/llm/auth/guest-login
+
+# Use token in requests
+curl -H "Authorization: Bearer <token>" \
+ http://localhost:8000/v1/models
+
+# Check Keycloak is running
+docker ps | grep keycloak
+
+# Verify token is valid
+jwt decode <token> # requires jwt-cli
+```
+
+### 404 Not Found
+
+**Error**: Endpoints return 404
+
+**Check**:
+```bash
+# Verify service is running and healthy
+curl http://localhost:8080/healthz # LLM API
+curl http://localhost:8082/healthz # Response API
+curl http://localhost:8285/healthz # Media API
+curl http://localhost:8091/healthz # MCP Tools
+
+# Verify Kong is routing correctly
+curl http://localhost:8000/ # Kong health
+curl http://localhost:8000/v1/models # Via Kong
+```
+
+### Timeout Errors
+
+**Error**: `408 Request Timeout` or connection hangs
+
+**Solutions**:
+```bash
+# Increase timeout in .env
+TOOL_EXECUTION_TIMEOUT=120s
+MEDIA_REMOTE_FETCH_TIMEOUT=30s
+
+# Check service performance
+docker stats llm-api media-api response-api mcp-tools
+
+# Look for stuck processes
+make logs-api | grep -i timeout
+```
+
+### 500 Internal Server Error
+
+**Error**: Unexpected server error
+
+**Debug**:
+```bash
+# View detailed logs
+docker logs <service-name> --tail=100 -f
+
+# Check if service crashed
+docker inspect <service-name>
+
+# Restart service
+docker restart <service-name>
+
+# Or full restart
+make down && make up-full
+```
+
+## Authentication Issues
+
+### Keycloak Not Responding
+
+**Error**: `Failed to connect to Keycloak` or auth endpoints fail
+
+**Solutions**:
+```bash
+# Check if Keycloak is running
+docker ps | grep keycloak
+
+# Verify it's accessible
+curl http://localhost:8085/admin
+
+# Check logs
+docker logs keycloak
+
+# Restart Keycloak
+docker restart keycloak
+```
+
+### Invalid JWT Token
+
+**Error**: Token is expired or invalid
+
+**Solutions**:
+```bash
+# Get new token
+curl -X POST http://localhost:8000/llm/auth/guest-login
+
+# Check token expiration
+jwt decode <token>
+
+# For user auth, check credentials
+curl -X POST http://localhost:8085/auth/realms/jan/protocol/openid-connect/token \
+ -d "client_id=llm-api&grant_type=password&username=admin&password=admin"
+```
+
+## Docker Issues
+
+### Out of Memory
+
+**Error**: `OOMKilled` or memory errors
+
+**Solutions**:
+```bash
+# Check memory usage
+docker stats
+
+# Increase Docker memory limit
+# In Docker Desktop: Settings > Resources > Memory
+
+# Or reduce services running
+make down
+```
+
+### Disk Space Low
+
+**Error**: `no space left on device`
+
+**Solutions**:
+```bash
+# Clean up unused images and volumes
+docker system prune -a --volumes
+
+# Remove old containers
+docker container prune
+
+# Check image sizes
+docker images --format "table {{.Repository}}\t{{.Size}}"
+```
+
+### Network Issues
+
+**Error**: Services can't communicate
+
+**Solutions**:
+```bash
+# Verify network exists
+docker network ls
+
+# Check network configuration
+docker network inspect jan-server_default
+
+# Recreate network if needed
+docker network rm jan-server_default
+docker network create jan-server_default
+```
+
+## Kubernetes Issues
+
+### Pod Stuck in Pending
+
+**Error**: Pod stays in Pending state
+
+**Debug**:
+```bash
+# Check events
+kubectl describe pod -n jan-server <pod-name>
+
+# Check node resources
+kubectl top nodes
+
+# Check available storage
+kubectl get pvc -n jan-server
+```
+
+### ImagePullBackOff
+
+**Error**: Can't pull image
+
+**Solutions**:
+```bash
+# Verify image exists
+minikube image ls | grep jan
+
+# Rebuild image
+cd services/llm-api
+docker build -t jan/llm-api:latest.
+minikube image load jan/llm-api:latest
+
+# Or update imagePullPolicy in values.yaml
+imagePullPolicy: Never # For minikube
+```
+
+### Service Not Accessible
+
+**Error**: Service endpoints not working
+
+**Debug**:
+```bash
+# Check service exists
+kubectl get svc -n jan-server
+
+# Port forward for access
+kubectl port-forward -n jan-server svc/jan-server-llm-api 8080:8080
+
+# Check service endpoints
+kubectl get endpoints -n jan-server
+```
+
+## Performance Issues
+
+### High Memory Usage
+
+**Symptoms**: Services use lots of memory
+
+**Solutions**:
+```bash
+# Monitor memory
+docker stats llm-api
+
+# Reduce batch size or concurrency
+# Check service configuration in docker compose
+
+# Look for memory leaks in logs
+docker logs llm-api | grep -i memory
+```
+
+### High CPU Usage
+
+**Symptoms**: CPU usage maxed out
+
+**Solutions**:
+```bash
+# Monitor CPU
+docker stats
+
+# Reduce concurrent requests
+# Set rate limits in configuration
+
+# Check for infinite loops or busy waits
+docker logs llm-api | grep -i error
+```
+
+### Slow Responses
+
+**Symptoms**: API requests are slow
+
+**Solutions**:
+```bash
+# Check database performance
+docker exec api-db psql -U jan_user -d jan_llm_api \
+ -c "SELECT query, calls, total_time FROM pg_stat_statements ORDER BY total_time DESC LIMIT 10"
+
+# Enable query logging
+# Set LOG_LEVEL=debug in.env
+
+# Use monitoring stack for traces
+make monitor-up
+# Visit http://localhost:16686 (Jaeger)
+```
+
+## Getting Help
+
+### Gathering Debug Information
+
+Before asking for help, collect this information:
+
+```bash
+# System info
+docker version
+docker-compose version
+go version
+kubectl version
+
+# Service status
+make health-check
+
+# Logs from all services
+make logs > debug-logs.txt
+
+# Configuration (without secrets)
+cat.env | grep -v _KEY | grep -v _PASSWORD > config.txt
+
+# Docker system status
+docker system df
+docker ps -a
+```
+
+### Requesting Support
+
+When reporting issues, include:
+1. Error messages and logs
+2. Steps to reproduce
+3. Your environment (OS, Docker version, etc.)
+4. Configuration (sanitized)
+5. What you've already tried
+
+**Resources**:
+- [GitHub Issues](https://github.com/janhq/jan-server/issues)
+- [Discussions](https://github.com/janhq/jan-server/discussions)
+- [Architecture Documentation](../architecture/)
+- [Development Guide](./development.md)
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 00000000..b6192f04
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,92 @@
+# Documentation Index & Navigation Guide
+
+**Last updated:** November 16, 2025  
+**Status:** In progress (documentation verification underway)
+
+Use this file as your jump-off point when you are not sure where a topic lives.
+
+---
+
+## Start Here
+
+1. [Quick Start (5 minutes)](getting-started/README.md) - Run everything with Docker Compose.
+2. [Architecture Overview](architecture/README.md) - Understand each service and its role.
+3. [API Overview](api/README.md) - Learn how authentication and routing works.
+4. [First API Call](api/llm-api/README.md#quick-start) - Test the LLM API end to end.
+5. [Makefile Commands](guides/development.md#makefile-commands-reference) - 100+ targets and helper commands.
+
+---
+
+## Audience Navigation
+
+### New Users
+- [Quick Start Guide](getting-started/README.md) - Installation, prerequisites, and walkthrough.
+- [System Architecture](architecture/README.md) - High-level diagram and components.
+- [LLM API Quick Start](api/llm-api/README.md#quick-start) - Sample curl call with auth flow.
+
+### Developers
+- [Development Guide](guides/development.md) - Local + hybrid workflows, Make targets.
+- [Configuration System](configuration/README.md) - Precedence rules and env var mapping.
+- [Testing Guide](guides/testing.md) - jan-cli api-test collections, coverage, and best practices.
+- [Hybrid Mode](guides/hybrid-mode.md) - Run select services natively.
+- [Service Template](guides/services-template.md) - Generate a new Go microservice.
+- [IDE Setup](guides/ide/vscode.md) - Launch configurations and debugging tips.
+
+### API Consumers
+- [API Overview](api/README.md) - Authentication, headers, and service map.
+- [LLM API](api/llm-api/README.md) - Chat, models, conversations, streaming.
+- [Response API](api/response-api/README.md) - Tool orchestration workflows.
+- [Media API](api/media-api/README.md) - Upload, jan_* IDs, presigned URLs.
+- [MCP Tools](api/mcp-tools/README.md) - JSON-RPC endpoints for tool providers.
+- [LLM Examples](api/llm-api/examples.md) - Ready-made curl samples.
+
+### Operators
+- [Deployment Guide](guides/deployment.md) - Docker profiles, Kubernetes, and CI/CD.
+- [Kubernetes Setup](../k8s/SETUP.md) - Helm installation and cluster guidance.
+- [Monitoring Guide](guides/monitoring.md) - Grafana, Prometheus, Jaeger, OTEL.
+- [Authentication & Gateway](guides/authentication.md) - Kong + Keycloak integration.
+- [Troubleshooting](guides/troubleshooting.md) - Common errors and recovery steps.
+- [Security Policy](architecture/security.md) - Responsible disclosure and hardening checklist.
+
+### Reference & Governance
+- [Documentation Quality Report](../DOCUMENTATION_QUALITY_REPORT.md) - Release-ready criteria and latest findings.
+- [Conventions](conventions/conventions.md) - Code style, patterns, and workflow.
+- [Planning Overview](planning/README.md) - Roadmaps and initiatives.
+- [Templates](templates/README.md) - API, architecture, and guide templates.
+
+---
+
+## Directory Map
+
+- `docs/getting-started/` - Installation, prerequisites, and troubleshooting.
+- `docs/api/` - Service-specific references plus shared overview.
+- `docs/architecture/` - System design, services, security, data flow, observability.
+- `docs/configuration/` - Loader behavior, precedence, env var mapping, docker/k8s examples.
+- `docs/guides/` - Development, deployment, testing, CLI, monitoring, IDE, troubleshooting.
+- `docs/conventions/` - Standards, patterns, workflow, and reviews.
+- `docs/planning/` - Initiatives, RFCs, and planning templates.
+- `docs/templates/` - Boilerplates for new documentation.
+- `docs/architecture/services.md` - Service responsibilities, ports, and dependencies at a glance.
+
+Need something inside `/services`? Each microservice has its own `README.md` with implementation details.
+
+---
+
+## External References
+
+- [OpenAI API Docs](https://platform.openai.com/docs/api-reference)
+- [Model Context Protocol](https://modelcontextprotocol.io/)
+- [JSON-RPC 2.0 Spec](https://www.jsonrpc.org/specification)
+- [Kong Gateway](https://konghq.com/), [Keycloak](https://www.keycloak.org/), [PostgreSQL](https://www.postgresql.org/)
+- [OpenTelemetry](https://opentelemetry.io/), [Prometheus](https://prometheus.io/), [Jaeger](https://www.jaegertracing.io/), [Grafana](https://grafana.com/)
+
+---
+
+## Maintenance & Metrics
+
+- Markdown files in `/docs`: 48 (tracked on November 16, 2025 via `Get-ChildItem`).
+- Primary services covered: LLM API, Response API, Media API, MCP Tools, Template API.
+- Last major audit: November 10, 2025 (see [Audit Summary](AUDIT_SUMMARY.md)).
+- Next planned review: Q1 2026 once current verification checklist is completed.
+
+Keep this page updated whenever you add a new directory or move content so contributors always have an accurate map.
diff --git a/docs/quickstart.md b/docs/quickstart.md
new file mode 100644
index 00000000..52649990
--- /dev/null
+++ b/docs/quickstart.md
@@ -0,0 +1,298 @@
+# Jan Server Quick Start Guide
+
+Get Jan Server running in minutes with the interactive setup wizard.
+
+## Prerequisites
+
+- **Docker Desktop** (Windows/macOS) or **Docker + Docker Compose** (Linux)
+- **Make** (pre-installed on Linux/macOS, [install on Windows](https://gnuwin32.sourceforge.net/packages/make.htm))
+- At least **8GB RAM** available
+- Optional: **NVIDIA GPU** with CUDA support for local inference
+
+## One-Command Setup
+
+### Windows (PowerShell)
+
+```powershell
+git clone https://github.com/janhq/jan-server.git
+cd jan-server
+make quickstart
+```
+
+### Linux / macOS
+
+```bash
+git clone https://github.com/janhq/jan-server.git
+cd jan-server
+make quickstart
+```
+
+## Interactive Configuration Wizard
+
+The setup wizard will guide you through:
+
+### 1. LLM Provider Setup
+
+Choose your inference provider:
+
+**Option 1: Local vLLM (GPU required)**
+- Uses local GPU for inference
+- Requires HuggingFace token for model downloads
+- Models run in Docker container
+- Default model: `Qwen/Qwen2.5-0.5B-Instruct`
+
+**Option 2: Remote API Endpoint**
+- Use any OpenAI-compatible API
+- Options: OpenAI, Azure OpenAI, Anthropic, Groq, etc.
+- Provide URL and API key
+- No GPU or HuggingFace token needed
+- **vLLM service will not be started** (uses less resources)
+
+### 2. MCP Search Tool Configuration
+
+**Note**: MCP Tools and Vector Store always run. This choice only affects the search functionality.
+
+Choose search provider for MCP tools:
+
+**Option 1: Serper (Recommended)**
+- Google search API
+- Requires API key from [serper.dev](https://serper.dev)
+- Best search results
+
+**Option 2: SearXNG (Local)**
+- Privacy-focused meta-search engine
+- Runs locally in Docker
+- No API key required
+- Slightly slower
+
+**Option 3: None**
+- Disable search functionality only
+- MCP Tools and Vector Store still available for other features
+
+### 3. Media API Setup
+
+**Enable Media API**: For file uploads, image handling, and media management
+
+**Disable Media API**: If you don't need media functionality
+
+## Example Configuration Flows
+
+### Flow 1: Full Local Setup (GPU)
+
+```
+LLM Provider Setup
+Choose: [1] Local vLLM
+HF_TOKEN: hf_xxxxxxxxxxxxx
+
+MCP Search Tool Setup
+Choose: [2] SearXNG (no API key needed)
+
+Media API Setup
+Enable: [Y] Yes
+
+Result: Fully local, privacy-focused setup
+```
+
+### Flow 2: Cloud API + Serper
+
+```
+LLM Provider Setup
+Choose: [2] Remote API endpoint
+URL: https://api.openai.com/v1
+API Key: sk-xxxxxxxxxxxxx
+
+MCP Search Tool Setup
+Choose: [1] Serper
+SERPER_API_KEY: xxxxxxxxxxxxx
+
+Media API Setup
+Enable: [Y] Yes
+
+Result: Cloud-based inference with best search
+```
+
+### Flow 3: Minimal Setup (No Search, No Media)
+
+```
+LLM Provider Setup
+Choose: [2] Remote API endpoint
+URL: https://api.groq.com/openai/v1
+API Key: gsk_xxxxxxxxxxxxx
+
+MCP Search Tool Setup
+Choose: [3] None (MCP Tools/Vector Store still run)
+
+Media API Setup
+Enable: [N] No
+
+Result: Remote LLM + MCP Tools (no search) + No Media
+```
+
+## What Happens During Setup
+
+1. **Configuration Wizard** - Interactive prompts for your choices
+2. **Environment Setup** - Creates `.env` with your configuration
+3. **Dependency Check** - Verifies Docker is running
+4. **Network Creation** - Sets up Docker networks
+5. **Service Start** - Launches all configured services
+6. **Health Wait** - Waits 30s for services to be ready
+
+## Services Started
+
+Depending on your configuration:
+
+| Service | Port | When Active |
+|---------|------|-------------|
+| Kong API Gateway | 8000 | Always |
+| LLM API | 8080 | Always |
+| Keycloak Auth | 8085 | Always |
+| PostgreSQL | 5432 | Always |
+| **MCP Tools** | 8091 | **Always** |
+| **Vector Store** | 3015 | **Always** |
+| **vLLM Inference** | 8101 | **If Local vLLM chosen** |
+| Media API | 8285 | If Media enabled |
+
+**Note**: 
+- MCP Tools and Vector Store always run regardless of search engine choice
+- SearXNG and SandboxFusion are currently disabled in this phase
+- vLLM only starts if you choose "Local vLLM" as your provider
+
+## First API Call
+
+### 1. Get Guest Token
+
+```bash
+# Windows (PowerShell)
+$response = Invoke-RestMethod -Method Post -Uri http://localhost:8000/llm/auth/guest-login
+$token = $response.access_token
+
+# Linux / macOS
+TOKEN=$(curl -X POST http://localhost:8000/llm/auth/guest-login | jq -r '.access_token')
+```
+
+### 2. Chat Completion
+
+```bash
+# Windows (PowerShell)
+Invoke-RestMethod -Method Post -Uri http://localhost:8000/v1/chat/completions `
+ -Headers @{"Authorization"="Bearer $token"; "Content-Type"="application/json"} `
+ -Body '{"model":"qwen2.5-0.5b-instruct","messages":[{"role":"user","content":"Hello!"}]}'
+
+# Linux / macOS
+curl -X POST http://localhost:8000/v1/chat/completions \
+ -H "Authorization: Bearer $TOKEN" \
+ -H "Content-Type: application/json" \
+ -d '{"model":"qwen2.5-0.5b-instruct","messages":[{"role":"user","content":"Hello!"}]}'
+```
+
+## Common Commands
+
+```bash
+# Check service health
+make health-check
+
+# View logs
+make logs-llm-api # LLM API logs
+make logs-mcp # MCP tools logs
+make logs # All logs
+
+# Restart services
+make restart # Restart all
+make restart-llm-api # Restart specific service
+
+# Stop services
+make down # Stop and remove containers
+make stop # Stop but keep containers
+```
+
+## Updating Configuration
+
+To reconfigure after initial setup:
+
+```bash
+# Re-run interactive setup
+make quickstart
+
+# When prompted "Found existing .env, update?", choose [Y]
+```
+
+## Manual Configuration
+
+If you prefer manual setup:
+
+```bash
+# 1. Copy templates
+cp .env.template .env
+cp config/secrets.env.example config/secrets.env
+
+# 2. Edit .env / config/secrets.env with your values
+nano .env
+nano config/secrets.env
+
+# 3. Run setup without prompts
+# Windows
+powershell -ExecutionPolicy Bypass -File .\jan-cli.ps1 dev setup
+# Linux / macOS
+./jan-cli.sh dev setup
+
+# 4. Start services
+make up-full
+```
+
+## Troubleshooting
+
+### Port Conflicts
+
+If you see port binding errors:
+
+```bash
+# Windows
+netstat -ano | findstr "8000 8080 8085"
+
+# Linux/macOS
+lsof -i:8000
+lsof -i:8080
+```
+
+### Services Not Starting
+
+```bash
+# Check Docker
+docker --version
+docker compose version
+
+# View errors
+make logs-error
+
+# Full reset
+make down
+make clean
+docker system prune -a # Warning: removes all Docker data
+make quickstart
+```
+
+### GPU Not Detected
+
+If vLLM can't find your GPU:
+
+```bash
+# Check NVIDIA drivers
+nvidia-smi
+
+# Use CPU inference instead
+# Choose option [2] Remote API in setup wizard
+# Or manually set VLLM_PORT=8102 for CPU mode
+```
+
+## Next Steps
+
+- [API Documentation](http://localhost:8000/v1/swagger/)
+- [Development Guide](guides/development.md)
+- [Deployment Guide](guides/deployment.md)
+- [Testing Guide](guides/testing.md)
+
+## Getting Help
+
+- **Issues**: [GitHub Issues](https://github.com/janhq/jan-server/issues)
+- **Discussions**: [GitHub Discussions](https://github.com/janhq/jan-server/discussions)
+- **Documentation**: [Documentation Hub](README.md)
diff --git a/docs/runbooks/monitoring.md b/docs/runbooks/monitoring.md
new file mode 100644
index 00000000..54b9d1c4
--- /dev/null
+++ b/docs/runbooks/monitoring.md
@@ -0,0 +1,411 @@
+# Jan Server Monitoring Runbook
+
+## Quick Reference
+
+| Alert | Severity | MTTR Target | On-Call Action |
+|-------|----------|-------------|----------------|
+| HighLLMLatency | Warning | 15min | [§1](#1-high-llm-latency) |
+| QueueBacklog | Critical | 5min | [§2](#2-queue-backlog) |
+| CollectorDown | Critical | 2min | [§3](#3-collector-outage) |
+| StorageFailure | Critical | 10min | [§4](#4-media-api-storage-failure) |
+| TraceExportFailure | Warning | 30min | [§5](#5-trace-export-failure) |
+| ClassifierErrors | Warning | 20min | [§6](#6-conversation-classifier-errors) |
+
+---
+
+## 1. High LLM Latency
+
+**Alert:** `HighLLMLatency`  
+**Triggered when:** P95 LLM API latency >2s for 5min  
+**Impact:** Degraded user experience, potential timeouts, increased abandonment rate
+
+### Investigation Steps
+
+1. **Check LLM Provider Dashboard**
+   ```bash
+   # Open Grafana
+   open https://grafana/d/llm-overview
+   ```
+   - Review latency by model (GPT-4 vs GPT-3.5)
+   - Check error rates per provider
+
+2. **Verify Upstream Provider Status**
+   - OpenAI: https://status.openai.com
+   - Anthropic: https://status.anthropic.com
+   - Azure: https://status.azure.com
+
+3. **Check Recent Deployments**
+   ```bash
+   git log --since="1 hour ago" --oneline
+   kubectl rollout history deployment/llm-api
+   ```
+
+4. **Inspect Token Queue Depth**
+   ```bash
+   curl localhost:8080/metrics | grep queue_depth
+   ```
+
+5. **Review Jaeger Traces**
+   - Find slow traces: `http://jaeger:16686/search?service=llm-api&minDuration=2s`
+   - Look for database queries, external API calls taking >1s
+
+### Remediation
+
+**If Provider Issue:**
+```bash
+# Enable fallback provider
+jan-cli config set llm.fallback_enabled=true
+jan-cli config set llm.fallback_provider=anthropic
+```
+
+**If Jan Server Issue:**
+```bash
+# Scale replicas
+kubectl scale deployment/llm-api --replicas=5
+
+# If memory exhaustion
+kubectl top pod -l app=llm-api
+kubectl set resources deployment/llm-api --limits=memory=2Gi
+```
+
+**If Database Bottleneck:**
+```sql
+-- Check connection pool
+psql jan_server -c "SELECT COUNT(*), state FROM pg_stat_activity GROUP BY state;"
+
+-- Check slow queries
+psql jan_server -c "SELECT query, mean_exec_time FROM pg_stat_statements ORDER BY mean_exec_time DESC LIMIT 10;"
+```
+
+### Escalation
+
+- **After 30min:** Page SRE team lead via PagerDuty
+- **After 1h:** Engage vendor support (OpenAI/Anthropic)
+- **If P0:** Notify customer success team for user communication
+
+---
+
+## 2. Queue Backlog
+
+**Alert:** `ResponseAPIQueueBacklog`  
+**Triggered when:** Response API queue depth >100 for 10min  
+**Impact:** Processing delays, webhook failures, incomplete conversations
+
+### Root Causes
+
+- Background worker pool exhausted
+- Template API latency spike
+- Media API unavailable
+- Database connection pool exhausted
+
+### Investigation
+
+```bash
+# Check worker status
+curl http://response-api:8081/metrics | grep workers_active
+curl http://response-api:8081/metrics | grep workers_idle
+
+# View queue contents
+psql jan_server -c "SELECT COUNT(*), status, error_message FROM background_jobs GROUP BY status, error_message ORDER BY COUNT(*) DESC;"
+
+# Check dependent services
+make health-check
+
+# View recent job failures
+psql jan_server -c "SELECT id, status, error_message, created_at FROM background_jobs WHERE status='failed' ORDER BY created_at DESC LIMIT 20;"
+```
+
+### Remediation
+
+1. **Increase Worker Pool**
+   ```bash
+   kubectl set env deployment/response-api WORKER_POOL_SIZE=20
+   kubectl rollout status deployment/response-api
+   ```
+
+2. **Purge Old Jobs**
+   ```bash
+   jan-cli jobs purge --older-than=1h --status=failed
+   jan-cli jobs retry --status=failed --max-retries=3
+   ```
+
+3. **Restart Service (Last Resort)**
+   ```bash
+   kubectl rollout restart deployment/response-api
+   kubectl rollout status deployment/response-api
+   ```
+
+---
+
+## 3. Collector Outage
+
+**Alert:** `OTELCollectorDown`  
+**Triggered when:** Collector unreachable for 2min  
+**Impact:** Loss of observability (no new traces/metrics), blind operations
+
+### Symptoms
+
+- Grafana dashboards flatline
+- Jaeger UI shows no recent traces
+- Services log OTLP export errors
+
+### Investigation
+
+```bash
+# Check collector health
+curl http://otel-collector:13133/
+
+# View collector logs
+kubectl logs -l app=otel-collector --tail=100
+
+# Check resource usage
+kubectl top pod -l app=otel-collector
+
+# Verify connectivity from services
+kubectl run -it --rm debug --image=curlimages/curl --restart=Never \
+  -- curl -v http://otel-collector:4318/v1/traces
+```
+
+### Remediation
+
+1. **Restart Collector**
+   ```bash
+   kubectl rollout restart deployment/otel-collector
+   kubectl rollout status deployment/otel-collector
+   ```
+
+2. **If Resource Exhaustion**
+   ```bash
+   # Increase memory
+   kubectl set resources deployment/otel-collector --limits=memory=1Gi
+   
+   # Check Jaeger backend
+   curl http://jaeger-query:16686/api/services
+   ```
+
+3. **If Configuration Error**
+   ```bash
+   # Validate config
+   kubectl get configmap otel-collector-config -o yaml | yq '.data'
+   
+   # Revert to last known good config
+   kubectl rollout undo deployment/otel-collector
+   ```
+
+### Fallback Mode
+
+Services continue operating without telemetry until collector is restored. No user impact.
+
+---
+
+## 4. Media API Storage Failure
+
+**Alert:** `MediaAPIStorageFailure`  
+**Triggered when:** S3 error rate >10% for 2min  
+**Impact:** Upload/download failures, broken media references
+
+### Investigation
+
+```bash
+# Check S3 metrics
+curl http://media-api:8080/metrics | grep s3_errors
+
+# View recent errors
+kubectl logs -l app=media-api --tail=50 | grep -i s3
+
+# Check AWS status
+open https://health.aws.amazon.com/health/status
+
+# Verify credentials
+kubectl get secret media-api-s3-credentials -o yaml
+```
+
+### Remediation
+
+1. **Verify S3 bucket exists and is accessible**
+   ```bash
+   aws s3 ls s3://jan-media-bucket/
+   ```
+
+2. **Check IAM permissions**
+   ```bash
+   aws iam simulate-principal-policy \
+     --policy-source-arn arn:aws:iam::ACCOUNT:role/media-api-role \
+     --action-names s3:PutObject s3:GetObject
+   ```
+
+3. **Enable fallback storage**
+   ```bash
+   kubectl set env deployment/media-api STORAGE_FALLBACK_ENABLED=true
+   ```
+
+---
+
+## 5. Trace Export Failure
+
+**Alert:** `TraceExportFailure`  
+**Triggered when:** Jaeger export failing >10 spans/sec for 5min  
+**Impact:** Partial trace loss, incomplete observability
+
+### Investigation
+
+```bash
+# Check collector export metrics
+curl http://otel-collector:8889/metrics | grep exporter_send_failed
+
+# Check Jaeger ingestion
+curl http://jaeger-collector:14269/metrics | grep spans_received
+
+# View collector logs
+kubectl logs -l app=otel-collector | grep -i error
+```
+
+### Remediation
+
+1. **Verify Jaeger collector is running**
+   ```bash
+   kubectl get pods -l app=jaeger
+   kubectl logs -l app=jaeger --tail=50
+   ```
+
+2. **Check network connectivity**
+   ```bash
+   kubectl run -it --rm debug --image=curlimages/curl --restart=Never \
+     -- curl -v http://jaeger-collector:14268/api/traces
+   ```
+
+3. **Increase collector retry settings**
+   - Edit `monitoring/otel-collector.yaml`
+   - Increase `max_elapsed_time` from 5m to 10m
+   - Increase `queue_size` from 5000 to 10000
+   - Apply config: `kubectl apply -f monitoring/otel-collector.yaml`
+
+4. **Temporary: Reduce sampling rate**
+   ```bash
+   kubectl set env deployment/llm-api OTEL_TRACES_SAMPLER_ARG=0.1
+   kubectl set env deployment/response-api OTEL_TRACES_SAMPLER_ARG=0.1
+   ```
+
+---
+
+## 6. Conversation Classifier Errors
+
+**Alert:** `ConversationInsightFailure`  
+**Triggered when:** Classifier error rate >5% for 5min  
+**Impact:** Missing conversation metadata, incomplete analytics
+
+### Investigation
+
+```bash
+# View classifier metrics
+curl http://response-api:8081/metrics | grep classifier_errors
+
+# Check Template API (dependency)
+curl http://template-api:8082/health
+
+# Review error logs
+kubectl logs -l app=response-api | grep classifier
+```
+
+### Remediation
+
+1. **Verify Template API is accessible**
+   ```bash
+   make health-check
+   kubectl get pods -l app=template-api
+   ```
+
+2. **Check for malformed prompt data**
+   ```bash
+   # Review recent requests
+   kubectl logs -l app=response-api --tail=100 | grep -A5 "classifier error"
+   ```
+
+3. **Review recent classifier configuration changes**
+   ```bash
+   git log --since="1 day ago" --grep="classifier" --oneline
+   kubectl describe configmap response-api-config
+   ```
+
+4. **Disable classifier temporarily (if persistent)**
+   ```bash
+   kubectl set env deployment/response-api CLASSIFIER_ENABLED=false
+   ```
+
+---
+
+## Appendix A: Common Commands
+
+### Health Checks
+
+```bash
+# All services
+make health-check
+
+# Individual service
+curl http://SERVICE:PORT/health
+
+# Monitoring stack
+make monitor-test
+```
+
+### Viewing Logs
+
+```bash
+# Recent logs
+kubectl logs -l app=SERVICE --tail=100
+
+# Follow logs
+kubectl logs -l app=SERVICE -f
+
+# Logs with timestamp
+kubectl logs -l app=SERVICE --timestamps=true
+```
+
+### Metrics Queries
+
+```bash
+# Service metrics
+curl http://SERVICE:8080/metrics
+
+# Prometheus query
+curl 'http://localhost:9090/api/v1/query?query=METRIC_NAME'
+
+# Alert status
+curl http://localhost:9090/api/v1/rules
+```
+
+### Trace Queries
+
+```bash
+# Recent traces for service
+curl 'http://localhost:16686/api/traces?service=SERVICE&limit=10'
+
+# Specific trace
+curl 'http://localhost:16686/api/traces/TRACE_ID'
+
+# Slow traces
+curl 'http://localhost:16686/api/traces?service=SERVICE&minDuration=2s'
+```
+
+---
+
+## Appendix B: Escalation Contacts
+
+| Severity | Contact | Response Time | Channel |
+|----------|---------|---------------|---------|
+| P0 (Critical) | SRE On-Call | <5min | PagerDuty |
+| P1 (High) | Team Lead | <15min | Slack #incidents |
+| P2 (Medium) | Dev Team | <1h | Slack #engineering |
+| P3 (Low) | Ticket Queue | Next business day | Jira |
+
+---
+
+## Appendix C: Useful Links
+
+- **Grafana:** http://localhost:3000
+- **Jaeger:** http://localhost:16686
+- **Prometheus:** http://localhost:9090
+- **Monitoring Guide:** [docs/guides/monitoring.md](../guides/monitoring.md)
+- **Architecture Overview:** [docs/architecture/services.md](../architecture/services.md)
+- **Security Policy:** [docs/architecture/security.md](../architecture/security.md)
diff --git a/go.mod b/go.mod
new file mode 100644
index 00000000..f1bc5ca3
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,36 @@
+module github.com/janhq/jan-server
+
+go 1.25.0
+
+require (
+	github.com/invopop/jsonschema v0.13.0
+	go.opentelemetry.io/otel v1.21.0
+	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.44.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.21.0
+	go.opentelemetry.io/otel/metric v1.21.0
+	go.opentelemetry.io/otel/sdk v1.21.0
+	go.opentelemetry.io/otel/sdk/metric v1.21.0
+	go.opentelemetry.io/otel/trace v1.21.0
+	gopkg.in/yaml.v3 v3.0.1
+)
+
+require (
+	github.com/bahlo/generic-list-go v0.2.0 // indirect
+	github.com/buger/jsonparser v1.1.1 // indirect
+	github.com/cenkalti/backoff/v4 v4.2.1 // indirect
+	github.com/go-logr/logr v1.3.0 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.0.0 // indirect
+	golang.org/x/net v0.47.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
+	google.golang.org/grpc v1.59.0 // indirect
+	google.golang.org/protobuf v1.36.10 // indirect
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 00000000..a59d8a0a
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,82 @@
+github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
+github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
+github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
+github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
+github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
+github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY=
+github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo=
+github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
+github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg=
+github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
+github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
+github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
+github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
+github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
+go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc=
+go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo=
+go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.44.0 h1:bflGWrfYyuulcdxf14V6n9+CoQcu5SAAdHmDPAJnlps=
+go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.44.0/go.mod h1:qcTO4xHAxZLaLxPd60TdE88rxtItPHgHWqOhOGRr0as=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0 h1:cl5P5/GIfFh4t6xyruOgJP5QiA1pw4fYYdv6nc6CBWw=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0/go.mod h1:zgBdWWAu7oEEMC06MMKc5NLbA/1YDXV1sMpSqEeLQLg=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.21.0 h1:digkEZCJWobwBqMwC0cwCq8/wkkRy/OowZg5OArWZrM=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.21.0/go.mod h1:/OpE/y70qVkndM0TrxT4KBoN3RsFZP0QaofcfYrj76I=
+go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4=
+go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM=
+go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8=
+go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E=
+go.opentelemetry.io/otel/sdk/metric v1.21.0 h1:smhI5oD714d6jHE6Tie36fPx4WDFIg+Y6RfAY4ICcR0=
+go.opentelemetry.io/otel/sdk/metric v1.21.0/go.mod h1:FJ8RAsoPGv/wYMgBdUJXOm+6pzFY3YdljnXtv1SBE8Q=
+go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc=
+go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ=
+go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I=
+go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d h1:VBu5YqKPv6XiJ199exd8Br+Aetz+o08F+PLMnwJQHAY=
+google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4=
+google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d h1:DoPTO70H+bcDXcd39vOqb2viZxgqeBeSGtZ55yZU4/Q=
+google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M=
+google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk=
+google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
+google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/jan-cli.ps1 b/jan-cli.ps1
new file mode 100644
index 00000000..666a8d67
--- /dev/null
+++ b/jan-cli.ps1
@@ -0,0 +1,43 @@
+# jan-cli wrapper script for Windows PowerShell
+# Automatically builds and runs jan-cli from cmd/jan-cli/
+
+$ErrorActionPreference = "Stop"
+
+$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
+$CliDir = Join-Path $ScriptDir "cmd\jan-cli"
+$CliBinary = Join-Path $CliDir "jan-cli.exe"
+$MainGo = Join-Path $CliDir "main.go"
+
+# Check if binary needs to be built
+$needsBuild = $false
+if (-not (Test-Path $CliBinary)) {
+    $needsBuild = $true
+} else {
+    # Check if any .go file is newer than the binary
+    $binaryTime = (Get-Item $CliBinary).LastWriteTime
+    $goFiles = Get-ChildItem -Path $CliDir -Filter "*.go"
+    foreach ($goFile in $goFiles) {
+        if ($goFile.LastWriteTime -gt $binaryTime) {
+            $needsBuild = $true
+            break
+        }
+    }
+}
+
+# Build if needed
+if ($needsBuild) {
+    Write-Host "Building jan-cli..." -ForegroundColor Yellow
+    Push-Location $CliDir
+    try {
+        go build -o jan-cli.exe .
+        if ($LASTEXITCODE -ne 0) {
+            throw "Build failed with exit code $LASTEXITCODE"
+        }
+    } finally {
+        Pop-Location
+    }
+}
+
+# Run jan-cli with all arguments
+& $CliBinary $args
+exit $LASTEXITCODE
diff --git a/jan-cli.sh b/jan-cli.sh
new file mode 100644
index 00000000..947e92f9
--- /dev/null
+++ b/jan-cli.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# jan-cli wrapper script
+# Automatically builds and runs jan-cli from cmd/jan-cli/
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+CLI_DIR="${SCRIPT_DIR}/cmd/jan-cli"
+CLI_BINARY="${CLI_DIR}/jan-cli"
+
+# Build if binary doesn't exist or any .go source is newer
+needs_build=false
+if [ ! -f "${CLI_BINARY}" ]; then
+    needs_build=true
+else
+    # Check if any .go file is newer than the binary
+    while IFS= read -r -d '' gofile; do
+        if [ "$gofile" -nt "${CLI_BINARY}" ]; then
+            needs_build=true
+            break
+        fi
+    done < <(find "${CLI_DIR}" -maxdepth 1 -name "*.go" -print0)
+fi
+
+if [ "$needs_build" = true ]; then
+    echo "Building jan-cli..." >&2
+    cd "${CLI_DIR}"
+    go build -o jan-cli .
+    cd "${SCRIPT_DIR}"
+fi
+
+# Run jan-cli with all arguments
+exec "${CLI_BINARY}" "$@"
diff --git a/k8s/README.md b/k8s/README.md
new file mode 100644
index 00000000..780d8197
--- /dev/null
+++ b/k8s/README.md
@@ -0,0 +1,609 @@
+# Jan Server Kubernetes Deployment
+
+Complete Helm chart for deploying Jan Server on Kubernetes.
+
+## Docs Overview
+
+This directory contains Helm charts for deploying the entire Jan Server stack:
+
+```
++---------------------------------------------------------+
+|                    Kong API Gateway                      |
+|                    (LoadBalancer)                        |
++---------------------------------------------------------+
+                           |
+        +------------------+------------------+----------+
+        |                  |                  |          |
+        v                  v                  v          v
+   +---------+      +----------+      +----------+ +---------+
+   | LLM API |      |Media API |      |Response  | |MCP Tools|
+   |         |      |          |      |   API    | |         |
+   +---------+      +----------+      +----------+ +---------+
+        |                  |                  |          |
+        +------------------+------------------+----------+
+                           |
+                           v
+                   +--------------+
+                   |  PostgreSQL  |
+                   +--------------+
+                           
++--------------+   +--------------+   +--------------+
+|   Keycloak   |   |    Redis     |   |   SearXNG    |
++--------------+   +--------------+   +--------------+
+
++--------------+   +--------------+
+| Vector Store |   |SandboxFusion |
++--------------+   +--------------+
+```
+
+##  Quick Start
+
+### Prerequisites
+
+```bash
+# Kubernetes cluster (1.23+)
+kubectl version
+
+# Helm 3.8+
+helm version
+
+# Add Bitnami repository
+helm repo add bitnami https://charts.bitnami.com/bitnami
+helm repo update
+```
+
+**Important:** If you don't have a Kubernetes cluster yet, see [SETUP.md](./SETUP.md) for detailed instructions on setting up Docker Desktop Kubernetes or minikube.
+
+### Install Jan Server
+
+**For Minikube Development Setup**, follow these steps:
+
+```bash
+# Step 1: Build Go services and Docker images
+cd services/llm-api && go mod tidy && docker build -t jan/llm-api:latest .
+cd ../media-api && go mod tidy && docker build -t jan/media-api:latest .
+cd ../response-api && go mod tidy && docker build -t jan/response-api:latest .
+cd ../mcp-tools && go mod tidy && docker build -t jan/mcp-tools:latest .
+cd ../.. && docker pull quay.io/keycloak/keycloak:24.0.5
+
+# Step 2: Load images into minikube
+minikube image load jan/llm-api:latest
+minikube image load jan/media-api:latest
+minikube image load jan/response-api:latest
+minikube image load jan/mcp-tools:latest
+minikube image load quay.io/keycloak/keycloak:24.0.5
+
+docker pull bitnami/postgresql:latest bitnami/redis:latest
+minikube image load bitnami/postgresql:latest bitnami/redis:latest
+
+# Step 3: Build Helm dependencies
+cd k8s/jan-server
+helm dependency build
+
+# Step 4: Install
+cd ..
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace
+
+# Step 5: Create additional databases
+kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=postgresql -n jan-server --timeout=300s
+kubectl exec -n jan-server jan-server-postgresql-0 -- bash -c "PGPASSWORD=postgres psql -U postgres << 'EOF'
+CREATE USER media WITH PASSWORD 'media';
+CREATE DATABASE media_api OWNER media;
+CREATE USER keycloak WITH PASSWORD 'keycloak';
+CREATE DATABASE keycloak OWNER keycloak;
+EOF"
+```
+
+**For Production with Cloud Kubernetes**, use values-production.yaml:
+
+```bash
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --values ./jan-server/values-production.yaml \
+  --wait \
+  --timeout 15m
+```
+
+### Access Services
+
+```bash
+# Check deployment status
+kubectl get pods -n jan-server
+
+# Port forward to access services locally (in separate terminals)
+kubectl port-forward -n jan-server svc/jan-server-llm-api 8080:8080
+kubectl port-forward -n jan-server svc/jan-server-media-api 8285:8285
+kubectl port-forward -n jan-server svc/jan-server-response-api 8082:8082
+kubectl port-forward -n jan-server svc/jan-server-keycloak 8085:8085
+
+# Test health endpoints
+curl http://localhost:8080/healthz
+curl http://localhost:8285/healthz
+curl http://localhost:8082/healthz
+
+# Access Keycloak Admin Console
+# Username: admin, Password: changeme
+open http://localhost:8085
+```
+
+**Note:** Kong is available but may show restarts due to memory constraints in minikube. For production, increase Kong's memory limits or access services directly.
+
+## Package Components
+
+### Core Services
+
+| Service | Port | Description | Status |
+|---------|------|-------------|--------|
+| LLM API | 8080 | Core LLM orchestration service | OK Working |
+| Media API | 8285 | Media upload and management | OK Working |
+| Response API | 8280 | Response generation service | OK Working |
+| MCP Tools | 8091 | Model Context Protocol tools | OK Working |
+| Keycloak | 8085 | Authentication server | OK Working |
+| Kong | 8000 | Unified API Gateway | WARNING Optional |
+
+### Supporting Services
+
+| Service | Port | Description | Status |
+|---------|------|-------------|--------|
+| PostgreSQL | 5432 | Primary database (3 databases) | OK Working |
+| Redis | 6379 | Caching and sessions | OK Working |
+| SearXNG | 8080 | Meta search engine | OK Working |
+| SandboxFusion | 8080 | Code interpreter | OK Working |
+| Vector Store | 3015 | File search database | Offline Disabled by default |
+
+## Tools Configuration
+
+### Values Files
+
+- `values.yaml` - Default values (for minikube/development with imagePullPolicy: Never)
+- `values-production.yaml` - Production configuration (for cloud with IfNotPresent)
+- `values-development.yaml` - Minimal resource allocation
+
+### Key Configuration Areas
+
+#### 1. Image Pull Policy (Important for Minikube)
+
+For minikube with locally built images:
+```yaml
+llmApi:
+  image:
+    pullPolicy: Never  # Use local images only
+
+postgresql:
+  image:
+    tag: "latest"
+    pullPolicy: Never  # Use local Bitnami images
+
+redis:
+  image:
+    tag: "latest"
+    pullPolicy: Never  # Use local Bitnami images
+```
+
+For production with image registries:
+```yaml
+llmApi:
+  image:
+    pullPolicy: IfNotPresent  # Pull if not present
+```
+
+#### 2. Database Configuration
+
+PostgreSQL creates the primary database automatically. Additional databases are created manually:
+```yaml
+postgresql:
+  auth:
+    username: jan_user
+    password: jan_password  # Change in production!
+    database: jan_llm_api
+    postgresPassword: postgres  # Change in production!
+```
+
+**Note:** Media API and Keycloak databases must be created manually after deployment (see SETUP.md).
+
+#### 3. Environment Variables
+
+**LLM API** key settings:
+```yaml
+llmApi:
+  env:
+    JAN_DEFAULT_NODE_SETUP: "false"  # Disable if no Jan provider available
+    DATABASE_URL: "postgres://..."   # Auto-configured via secret
+    KEYCLOAK_BASE_URL: "http://..."  # Auto-configured
+    BACKEND_CLIENT_ID: "llm-api"
+    CLIENT: "jan-client"
+```
+
+**Response API** key settings:
+```yaml
+responseApi:
+  env:
+    SERVICE_NAME: "response-api"
+    HTTP_PORT: "8082"
+    LLM_API_URL: "http://jan-server-llm-api:8080"
+    MCP_TOOLS_URL: "http://jan-server-mcp-tools:8091"
+    MAX_TOOL_EXECUTION_DEPTH: "8"
+    TOOL_EXECUTION_TIMEOUT: "45s"
+    AUTO_MIGRATE: "true"
+```
+
+**Media API** key settings:
+```yaml
+mediaApi:
+  env:
+    MEDIA_API_PORT: "8285"
+    MEDIA_MAX_BYTES: "20971520"  # 20MB
+    MEDIA_PROXY_DOWNLOAD: "true"
+    MEDIA_RETENTION_DAYS: "30"
+```
+
+#### 4. S3 Storage (Media API)
+
+**Required** for media-api to function:
+```yaml
+mediaApi:
+  secrets:
+    serviceKey: "changeme-media-key"  # Required!
+    apiKey: "changeme-media-key"      # Required!
+    s3Endpoint: "https://s3.amazonaws.com"
+    s3Bucket: "your-bucket"  # Required!
+    s3AccessKey: "YOUR_KEY"   # Required!
+    s3SecretKey: "YOUR_SECRET"  # Required!
+```
+
+#### 5. Keycloak Admin
+
+```yaml
+keycloak:
+  admin:
+    username: admin
+    password: "changeme"  # Change in production!
+  database:
+    password: keycloak  # Change in production!
+```
+
+#### 6. Resource Limits
+
+Adjust based on your environment:
+```yaml
+llmApi:
+  resources:
+    requests:
+      memory: 256Mi  # Minimum for minikube
+      cpu: 250m
+    limits:
+      memory: 512Mi
+      cpu: 500m
+
+# For production, increase limits:
+# memory: 1Gi, cpu: 1000m
+```
+
+#### 7. Autoscaling (Disabled by default)
+
+```yaml
+llmApi:
+  autoscaling:
+    enabled: false  # Enable for production
+    minReplicas: 2
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 70
+```
+
+#### 8. Ingress Configuration
+
+```yaml
+llmApi:
+  ingress:
+    enabled: true
+    className: "nginx"
+    hosts:
+      - host: api.yourdomain.com
+        paths:
+          - path: /
+            pathType: Prefix
+    tls:
+      - secretName: api-tls
+        hosts:
+          - api.yourdomain.com
+```
+
+## Global Deployment Scenarios
+
+### Development (Minikube) - Verified Working 
+
+```bash
+# Start minikube with enough resources
+minikube start --cpus=4 --memory=8192 --driver=docker
+
+# Build and load images (see SETUP.md for complete steps)
+# ... build services and docker images ...
+minikube image load jan/llm-api:latest
+minikube image load jan/media-api:latest
+minikube image load jan/response-api:latest
+minikube image load jan/mcp-tools:latest
+minikube image load quay.io/keycloak/keycloak:24.0.5
+
+# Install
+cd k8s
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace
+
+# Create databases
+kubectl exec -n jan-server jan-server-postgresql-0 -- bash -c "PGPASSWORD=postgres psql -U postgres << 'EOF'
+CREATE USER media WITH PASSWORD 'media';
+CREATE DATABASE media_api OWNER media;
+CREATE USER keycloak WITH PASSWORD 'keycloak';
+CREATE DATABASE keycloak OWNER keycloak;
+EOF"
+
+# Access via port-forward
+kubectl port-forward -n jan-server svc/jan-server-llm-api 8080:8080
+```
+
+### Docker Desktop Kubernetes
+
+```bash
+# Build images (same as minikube)
+# Images are automatically available in Docker Desktop's Kubernetes
+
+# Install with IfNotPresent pull policy
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --set llmApi.image.pullPolicy=IfNotPresent \
+  --set mediaApi.image.pullPolicy=IfNotPresent \
+  --set mcpTools.image.pullPolicy=IfNotPresent \
+  --set keycloak.image.pullPolicy=IfNotPresent
+```
+
+### Cloud Kubernetes (AKS/EKS/GKE)
+
+```bash
+# Option 1: Use cloud-managed databases (recommended)
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --set postgresql.enabled=false \
+  --set redis.enabled=false \
+  --set global.postgresql.host=your-managed-postgres.cloud \
+  --set global.redis.host=your-managed-redis.cloud \
+  --set ingress.enabled=true \
+  --set ingress.className=nginx \
+  --set ingress.hosts[0].host=jan.yourdomain.com \
+  --set llmApi.autoscaling.enabled=true \
+  --set llmApi.replicaCount=3
+
+# Option 2: Use in-cluster databases with persistent storage
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --set postgresql.persistence.enabled=true \
+  --set postgresql.persistence.size=50Gi \
+  --set postgresql.persistence.storageClass=gp3 \
+  --set redis.master.persistence.enabled=true \
+  --set ingress.enabled=true \
+  --set llmApi.autoscaling.enabled=true
+```
+
+### Production On-Premises
+
+```bash
+# Use production values template
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --values ./jan-server/values-production.yaml \
+  --set postgresql.enabled=false \
+  --set redis.enabled=false \
+  --set global.postgresql.host=postgres.internal \
+  --set global.redis.host=redis.internal
+```
+
+## Lock Security Best Practices
+
+### 1. Use External Secrets
+
+```bash
+# Install external-secrets operator
+helm repo add external-secrets https://charts.external-secrets.io
+helm install external-secrets external-secrets/external-secrets \
+  --namespace external-secrets-system \
+  --create-namespace
+
+# Create SecretStore for AWS Secrets Manager
+kubectl apply -f - <<EOF
+apiVersion: external-secrets.io/v1beta1
+kind: SecretStore
+metadata:
+  name: aws-secretsmanager
+  namespace: jan-server
+spec:
+  provider:
+    aws:
+      service: SecretsManager
+      region: us-west-2
+EOF
+```
+
+### 2. Enable Network Policies
+
+```bash
+# Create network policy to restrict pod communication
+kubectl apply -f - <<EOF
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: jan-server-netpol
+  namespace: jan-server
+spec:
+  podSelector: {}
+  policyTypes:
+  - Ingress
+  - Egress
+  ingress:
+  - from:
+    - namespaceSelector:
+        matchLabels:
+          name: jan-server
+EOF
+```
+
+### 3. Pod Security Standards
+
+```bash
+# Label namespace with pod security standard
+kubectl label namespace jan-server \
+  pod-security.kubernetes.io/enforce=baseline \
+  pod-security.kubernetes.io/audit=restricted \
+  pod-security.kubernetes.io/warn=restricted
+```
+
+### 4. Enable TLS
+
+```bash
+# Install cert-manager
+kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.0/cert-manager.yaml
+
+# Create ClusterIssuer for Let's Encrypt
+kubectl apply -f - <<EOF
+apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+  name: letsencrypt-prod
+spec:
+  acme:
+    server: https://acme-v02.api.letsencrypt.org/directory
+    email: your-email@example.com
+    privateKeySecretRef:
+      name: letsencrypt-prod
+    solvers:
+    - http01:
+        ingress:
+          class: nginx
+EOF
+```
+
+## Stats Monitoring
+
+### Prometheus & Grafana
+
+```bash
+# Add Prometheus stack
+helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
+helm install prometheus prometheus-community/kube-prometheus-stack \
+  --namespace monitoring \
+  --create-namespace
+
+# Access Grafana
+kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80
+# Default: admin/prom-operator
+```
+
+### Logging
+
+```bash
+# Install Loki stack
+helm repo add grafana https://grafana.github.io/helm-charts
+helm install loki grafana/loki-stack \
+  --namespace logging \
+  --create-namespace \
+  --set grafana.enabled=true
+```
+
+## Refresh Upgrade & Maintenance
+
+### Upgrade Helm Release
+
+```bash
+# Upgrade with new values
+helm upgrade jan-server ./jan-server \
+  --namespace jan-server \
+  --values my-values.yaml
+
+# Rollback if needed
+helm rollback jan-server -n jan-server
+```
+
+### Database Migrations
+
+```bash
+# Run migrations manually before upgrade
+kubectl run migration-job \
+  --namespace jan-server \
+  --image=jan/llm-api:latest \
+  --restart=Never \
+  --command -- /app/migrate
+```
+
+### Backup PostgreSQL
+
+```bash
+# Create backup
+kubectl exec -n jan-server jan-server-postgresql-0 -- \
+  pg_dump -U jan_user jan_llm_api > backup-$(date +%Y%m%d).sql
+
+# Restore backup
+kubectl exec -i -n jan-server jan-server-postgresql-0 -- \
+  psql -U jan_user jan_llm_api < backup-20250109.sql
+```
+
+## Bug Troubleshooting
+
+### Common Issues
+
+#### Pods Not Starting
+
+```bash
+# Check events
+kubectl describe pod -n jan-server <pod-name>
+
+# Check logs
+kubectl logs -n jan-server <pod-name> --previous
+```
+
+#### Database Connection Errors
+
+```bash
+# Verify PostgreSQL is running
+kubectl get pods -n jan-server -l app.kubernetes.io/name=postgresql
+
+# Test connection
+kubectl run -n jan-server psql-test --rm -it \
+  --image=postgres:18 \
+  -- psql -h jan-server-postgresql -U jan_user -d jan_llm_api
+```
+
+#### Service Not Accessible
+
+```bash
+# Check service endpoints
+kubectl get endpoints -n jan-server
+
+# Test service internally
+kubectl run -n jan-server curl-test --rm -it \
+  --image=curlimages/curl \
+  -- curl http://jan-server-llm-api:8080/healthz
+```
+
+## Docs Additional Resources
+
+- [Helm Documentation](https://helm.sh/docs/)
+- [Kubernetes Best Practices](https://kubernetes.io/docs/concepts/configuration/overview/)
+- [Jan Server Documentation](https://docs.jan.ai)
+- [Bitnami Charts](https://github.com/bitnami/charts)
+
+## Partner Support
+
+For issues and questions:
+- GitHub Issues: https://github.com/janhq/jan-server/issues
+- Documentation: https://docs.jan.ai
+- Community: https://discord.gg/jan
+
+## Doc License
+
+See the main project LICENSE file.
diff --git a/k8s/SETUP.md b/k8s/SETUP.md
new file mode 100644
index 00000000..a3f32e98
--- /dev/null
+++ b/k8s/SETUP.md
@@ -0,0 +1,476 @@
+# Kubernetes Setup and Deployment Guide
+
+This guide walks through setting up a Kubernetes cluster and deploying Jan Server using Helm.
+
+## Prerequisites
+
+- Docker Desktop OR minikube
+- kubectl CLI
+- Helm 3.8+
+- Go 1.23+ (for building services)
+
+## Services Overview
+
+Jan Server includes the following microservices:
+
+- **LLM API** (port 8080) - Core LLM orchestration service
+- **Media API** (port 8285) - Media upload and management
+- **Response API** (port 8082) - Response generation with tool orchestration
+- **MCP Tools** (port 8091) - Model Context Protocol tools integration
+- **Keycloak** (port 8085) - Authentication server
+- **Kong** (port 8000) - API Gateway
+- **PostgreSQL** (port 5432) - Database (shared by all services)
+- **Redis** (port 6379) - Caching
+- **SearXNG** (port 8080) - Meta search engine
+- **SandboxFusion** (port 8080) - Code interpreter
+
+## Option 1: Minikube (Recommended for Development)
+
+### Install Minikube
+
+```powershell
+# Using Chocolatey
+choco install minikube
+
+# Or download from: https://minikube.sigs.k8s.io/docs/start/
+```
+
+### Start Minikube
+
+```powershell
+# Start with sufficient resources
+minikube start --cpus=4 --memory=8192 --driver=docker
+```
+
+### Verify Installation
+
+```powershell
+kubectl cluster-info
+kubectl get nodes
+minikube status
+```
+
+## Option 2: Docker Desktop Kubernetes
+
+### Enable Kubernetes in Docker Desktop
+
+1. Open Docker Desktop
+2. Go to Settings -> Kubernetes
+3. Check "Enable Kubernetes"
+4. Click "Apply & Restart"
+5. Wait for Kubernetes to start (green indicator)
+
+### Verify Installation
+
+```powershell
+kubectl cluster-info
+kubectl get nodes
+```
+
+You should see:
+```
+Kubernetes control plane is running at https://kubernetes.docker.internal:6443
+```
+
+## Building and Deploying Jan Server
+
+### Step 1: Fix Go Modules
+
+```powershell
+cd d:\Working\Menlo\jan-server
+
+# Fix go modules for all services
+cd services\llm-api
+go mod tidy
+
+cd ..\media-api
+go mod tidy
+
+cd ..\mcp-tools
+go mod tidy
+
+cd ..\..
+```
+
+### Step 2: Build Docker Images
+
+```powershell
+cd d:\Working\Menlo\jan-server
+
+# Build LLM API
+cd services\llm-api
+docker build -t jan/llm-api:latest .
+
+# Build Media API
+cd ..\media-api
+docker build -t jan/media-api:latest .
+
+# Build Response API
+cd ..\response-api
+docker build -t jan/response-api:latest .
+
+# Build MCP Tools
+cd ..\mcp-tools
+docker build -t jan/mcp-tools:latest .
+cd ..\..
+
+# Official Keycloak image (no build required)
+docker pull quay.io/keycloak/keycloak:24.0.5
+```
+
+### Step 3: Load Images into Minikube (Minikube Only)
+
+If using minikube, load the images into the cluster:
+
+```powershell
+# Load custom images
+minikube image load jan/llm-api:latest
+minikube image load jan/media-api:latest
+minikube image load jan/response-api:latest
+minikube image load jan/mcp-tools:latest
+minikube image load quay.io/keycloak/keycloak:24.0.5
+
+# Pull and load Bitnami images
+docker pull bitnami/postgresql:latest
+docker pull bitnami/redis:latest
+minikube image load bitnami/postgresql:latest
+minikube image load bitnami/redis:latest
+
+# Verify images are loaded
+minikube image ls | Select-String "jan/|bitnami"
+```
+
+### Step 4: Add Bitnami Repository
+
+```powershell
+helm repo add bitnami https://charts.bitnami.com/bitnami
+helm repo update
+```
+
+### Step 5: Build Chart Dependencies
+
+```powershell
+cd d:\Working\Menlo\jan-server\k8s\jan-server
+helm dependency build
+```
+
+This downloads PostgreSQL and Redis charts from Bitnami.
+
+### Step 6: Install Jan Server
+
+```powershell
+cd d:\Working\Menlo\jan-server\k8s
+
+# Install with default values
+helm install jan-server ./jan-server `
+  --namespace jan-server `
+  --create-namespace
+```
+
+### Step 7: Create Additional Databases
+
+PostgreSQL needs additional databases for media-api and keycloak:
+
+```powershell
+# Wait for PostgreSQL to be ready
+kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=postgresql -n jan-server --timeout=300s
+
+# Create databases
+kubectl exec -n jan-server jan-server-postgresql-0 -- bash -c "PGPASSWORD=postgres psql -U postgres << 'EOF'
+CREATE USER media WITH PASSWORD 'media';
+CREATE DATABASE media_api OWNER media;
+CREATE USER keycloak WITH PASSWORD 'keycloak';
+CREATE DATABASE keycloak OWNER keycloak;
+EOF"
+```
+
+### Step 8: Verify Deployment
+
+```powershell
+# Check all resources
+kubectl get all -n jan-server
+
+# Check pods status
+kubectl get pods -n jan-server
+
+# Check services
+kubectl get svc -n jan-server
+```
+
+Wait until all pods show `Running` status (this may take 2-5 minutes):
+```
+NAME                                    READY   STATUS    RESTARTS   AGE
+jan-server-keycloak-xxx                 1/1     Running   0          3m
+jan-server-kong-xxx                     1/1     Running   0          3m
+jan-server-llm-api-xxx                  1/1     Running   0          3m
+jan-server-media-api-xxx                1/1     Running   0          3m
+jan-server-response-api-xxx             1/1     Running   0          3m
+jan-server-mcp-tools-xxx                1/1     Running   0          3m
+jan-server-postgresql-0                 1/1     Running   0          3m
+jan-server-redis-master-0               1/1     Running   0          3m
+jan-server-redis-replicas-0             1/1     Running   0          3m
+jan-server-searxng-xxx                  1/1     Running   0          3m
+jan-server-sandboxfusion-xxx            1/1     Running   0          3m
+```
+
+**Note:** `vector-store` is disabled by default. Kong may show restarts due to low memory limits (this is expected).
+
+### Step 9: Access Services via Port-Forward
+
+Open multiple PowerShell terminals and run:
+
+```powershell
+# Terminal 1: Kong API Gateway (Main entry point)
+kubectl port-forward -n jan-server svc/jan-server-kong 8000:8000
+
+# Terminal 2: Keycloak Authentication
+kubectl port-forward -n jan-server svc/jan-server-keycloak 8085:8085
+
+# Optional: Direct service access
+kubectl port-forward -n jan-server svc/jan-server-llm-api 8080:8080
+kubectl port-forward -n jan-server svc/jan-server-media-api 8285:8285
+kubectl port-forward -n jan-server svc/jan-server-response-api 8082:8082
+kubectl port-forward -n jan-server svc/jan-server-mcp-tools 8091:8091
+```
+
+### Step 10: Test API Endpoints
+
+```powershell
+# Test via Kong API Gateway
+curl http://localhost:8000/api/llm/healthz
+curl http://localhost:8000/api/media/healthz
+curl http://localhost:8000/api/responses/healthz
+curl http://localhost:8000/api/mcp/healthz
+
+# Or test direct service access
+curl http://localhost:8080/healthz  # LLM API
+curl http://localhost:8285/healthz  # Media API
+curl http://localhost:8082/healthz  # Response API
+curl http://localhost:8091/healthz  # MCP Tools
+
+# Access Keycloak Admin
+# Open browser: http://localhost:8085
+# Username: admin
+# Password: changeme
+```
+
+## Common Commands
+
+### View Logs
+
+```powershell
+# View logs for a specific pod
+kubectl logs -n jan-server <pod-name>
+
+# Follow logs
+kubectl logs -n jan-server <pod-name> -f
+
+# View logs from all containers in a pod
+kubectl logs -n jan-server <pod-name> --all-containers
+```
+
+### Describe Resources
+
+```powershell
+# Describe a pod (shows events and status)
+kubectl describe pod -n jan-server <pod-name>
+
+# Describe a service
+kubectl describe svc -n jan-server jan-server-kong
+```
+
+### Execute Commands in Pods
+
+```powershell
+# Connect to PostgreSQL
+kubectl exec -it -n jan-server jan-server-postgresql-0 -- psql -U jan_user -d jan_llm_api
+
+# Shell into a pod
+kubectl exec -it -n jan-server <pod-name> -- /bin/sh
+```
+
+### Restart a Deployment
+
+```powershell
+kubectl rollout restart deployment -n jan-server jan-server-llm-api
+```
+
+## Upgrade Deployment
+
+```powershell
+# Upgrade with new values
+helm upgrade jan-server ./jan-server `
+  --namespace jan-server `
+  --wait `
+  --timeout 10m
+
+# Upgrade with custom values
+helm upgrade jan-server ./jan-server `
+  --namespace jan-server `
+  --values ./jan-server/values-development.yaml `
+  --wait
+```
+
+## Uninstall
+
+```powershell
+# Uninstall the release
+helm uninstall jan-server -n jan-server
+
+# Delete the namespace (including PVCs)
+kubectl delete namespace jan-server
+```
+
+## Troubleshooting
+
+### Pods in CrashLoopBackOff
+
+```powershell
+# Check pod logs
+kubectl logs -n jan-server <pod-name> --previous
+
+# Check pod events
+kubectl describe pod -n jan-server <pod-name>
+```
+
+### ImagePullBackOff Error
+
+This means Docker images are not available. You need to:
+
+1. Build the Docker images locally:
+   ```powershell
+   cd d:\Working\Menlo\jan-server
+   docker compose build
+   ```
+
+2. Update values.yaml to use local images or configure image pull policy:
+   ```yaml
+   llmApi:
+     image:
+       registry: ""
+       repository: jan/llm-api
+       tag: latest
+       pullPolicy: IfNotPresent
+   ```
+
+### PostgreSQL Not Starting
+
+```powershell
+# Check PostgreSQL logs
+kubectl logs -n jan-server jan-server-postgresql-0
+
+# Check PVC
+kubectl get pvc -n jan-server
+```
+
+### Services Not Accessible
+
+```powershell
+# Check if service has endpoints
+kubectl get endpoints -n jan-server
+
+# Test internal connectivity
+kubectl run -n jan-server curl-test --rm -it --image=curlimages/curl -- curl http://jan-server-llm-api:8080/healthz
+```
+
+## Next Steps
+
+Once the infrastructure is deployed and all pods are running:
+
+1. Run the port-forward script (use the helper in `k8s/port-forward.ps1`)
+2. Configure Keycloak realm and clients
+3. Run automation tests from `tests/automation/`
+
+## For Minikube Users
+
+### Access LoadBalancer Services
+
+Minikube doesn't support LoadBalancer type services by default. Use one of these methods:
+
+```powershell
+# Method 1: Use minikube tunnel (requires admin privileges)
+minikube tunnel
+
+# Method 2: Change Kong service to NodePort in values.yaml
+kong:
+  service:
+    type: NodePort
+    nodePort: 30000
+
+# Then access via: http://$(minikube ip):30000
+```
+
+### Enable Metrics Server
+
+```powershell
+minikube addons enable metrics-server
+```
+
+## Production Deployment
+
+For production deployment, create a custom values file:
+
+```yaml
+# my-production-values.yaml
+postgresql:
+  auth:
+    password: "STRONG_PASSWORD_HERE"
+    postgresPassword: "STRONG_POSTGRES_PASSWORD"
+
+keycloak:
+  admin:
+    password: "STRONG_ADMIN_PASSWORD"
+  database:
+    password: "STRONG_DB_PASSWORD"
+
+mediaApi:
+  secrets:
+    serviceKey: "YOUR_SERVICE_KEY"
+    apiKey: "YOUR_API_KEY"
+    s3Endpoint: "https://your-s3-endpoint.com"
+    s3Bucket: "your-bucket"
+    s3AccessKey: "YOUR_ACCESS_KEY"
+    s3SecretKey: "YOUR_SECRET_KEY"
+
+responseApi:
+  secrets:
+    databaseUrl: "postgres://jan_user:STRONG_PASSWORD@jan-server-postgresql:5432/jan_llm_api?sslmode=disable"
+  env:
+    maxToolExecutionDepth: "5"
+    toolExecutionTimeout: "30s"
+
+kong:
+  service:
+    type: LoadBalancer
+
+llmApi:
+  replicaCount: 3
+  autoscaling:
+    enabled: true
+    minReplicas: 3
+    maxReplicas: 10
+
+responseApi:
+  replicaCount: 2
+  autoscaling:
+    enabled: true
+    minReplicas: 2
+    maxReplicas: 8
+
+mediaApi:
+  replicaCount: 2
+  autoscaling:
+    enabled: true
+    minReplicas: 2
+    maxReplicas: 6
+```
+
+Deploy with:
+```powershell
+helm install jan-server ./jan-server `
+  --namespace jan-server `
+  --create-namespace `
+  --values my-production-values.yaml `
+  --wait `
+  --timeout 15m
+```
diff --git a/k8s/jan-server/.helmignore b/k8s/jan-server/.helmignore
new file mode 100644
index 00000000..1c82e47d
--- /dev/null
+++ b/k8s/jan-server/.helmignore
@@ -0,0 +1,12 @@
+# Patterns to ignore when building packages
+*.md
+.git/
+.gitignore
+.helmignore
+OWNERS
+examples/
+test/
+*.bak
+*.tmp
+*~
+.DS_Store
diff --git a/k8s/jan-server/Chart.lock b/k8s/jan-server/Chart.lock
new file mode 100644
index 00000000..ebb4e84d
--- /dev/null
+++ b/k8s/jan-server/Chart.lock
@@ -0,0 +1,9 @@
+dependencies:
+- name: postgresql
+  repository: https://charts.bitnami.com/bitnami
+  version: 15.2.13
+- name: redis
+  repository: https://charts.bitnami.com/bitnami
+  version: 19.0.2
+digest: sha256:58912648c8cec6230a15fa9c2e96a742f1417aa007ef88cbbdf408911cdb6aeb
+generated: "2025-11-09T17:48:30.6771678+07:00"
diff --git a/k8s/jan-server/Chart.yaml b/k8s/jan-server/Chart.yaml
new file mode 100644
index 00000000..dcc32423
--- /dev/null
+++ b/k8s/jan-server/Chart.yaml
@@ -0,0 +1,28 @@
+apiVersion: v2
+name: jan-server
+description: A Helm chart for Jan Server - LLM API Platform with MCP Tools, Media API, and Response API
+type: application
+version: 1.1.0
+appVersion: "2.0.0"
+keywords:
+  - llm
+  - api
+  - mcp
+  - keycloak
+  - media
+  - responses
+maintainers:
+  - name: Jan Team
+    email: support@jan.ai
+home: https://github.com/janhq/jan-server
+sources:
+  - https://github.com/janhq/jan-server
+dependencies:
+  - name: postgresql
+    version: "~15.2.0"
+    repository: "https://charts.bitnami.com/bitnami"
+    condition: postgresql.enabled
+  - name: redis
+    version: "~19.0.0"
+    repository: "https://charts.bitnami.com/bitnami"
+    condition: redis.enabled
diff --git a/k8s/jan-server/README.md b/k8s/jan-server/README.md
new file mode 100644
index 00000000..6369f011
--- /dev/null
+++ b/k8s/jan-server/README.md
@@ -0,0 +1,337 @@
+# Jan Server Helm Chart
+
+This Helm chart deploys the complete Jan Server platform on Kubernetes, including:
+
+- **LLM API** - Core LLM orchestration service
+- **Media API** - Media upload and management service
+- **MCP Tools** - Model Context Protocol tools and utilities
+- **Keycloak** - Authentication and authorization server
+- **Kong** - API Gateway for unified API endpoint
+- **PostgreSQL** - Primary database (via Bitnami chart)
+- **Redis** - Caching and session store (via Bitnami chart)
+- **SearXNG** - Meta search engine for MCP
+- **Vector Store** - Lightweight vector database for file search
+- **SandboxFusion** - Code interpreter and execution environment
+
+## Prerequisites
+
+- Kubernetes 1.23+
+- Helm 3.8+
+- PV provisioner support in the underlying infrastructure (for persistent volumes)
+- LoadBalancer support (for Kong ingress) or Ingress Controller
+
+## Installing the Chart
+
+### Add Bitnami Repository (for PostgreSQL and Redis)
+
+```bash
+helm repo add bitnami https://charts.bitnami.com/bitnami
+helm repo update
+```
+
+### Install from local directory
+
+```bash
+# From the k8s directory
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --values ./jan-server/values.yaml
+```
+
+### Install with custom values
+
+```bash
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --values ./jan-server/values-production.yaml
+```
+
+## Configuration
+
+The following table lists the configurable parameters and their default values.
+
+### Global Settings
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `global.imageRegistry` | Global Docker registry | `""` |
+| `global.imagePullSecrets` | Global image pull secrets | `[]` |
+| `global.storageClass` | Global storage class | `""` |
+
+### PostgreSQL (Bitnami)
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `postgresql.enabled` | Enable PostgreSQL | `true` |
+| `postgresql.auth.username` | PostgreSQL username | `jan_user` |
+| `postgresql.auth.password` | PostgreSQL password | `jan_password` |
+| `postgresql.auth.database` | PostgreSQL database | `jan_llm_api` |
+| `postgresql.primary.persistence.size` | PVC size | `10Gi` |
+
+### LLM API
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `llmApi.enabled` | Enable LLM API | `true` |
+| `llmApi.replicaCount` | Number of replicas | `2` |
+| `llmApi.image.repository` | Image repository | `jan/llm-api` |
+| `llmApi.image.tag` | Image tag | `latest` |
+| `llmApi.service.type` | Service type | `ClusterIP` |
+| `llmApi.service.port` | Service port | `8080` |
+| `llmApi.resources.requests.memory` | Memory request | `256Mi` |
+| `llmApi.resources.requests.cpu` | CPU request | `250m` |
+| `llmApi.autoscaling.enabled` | Enable autoscaling | `false` |
+| `llmApi.ingress.enabled` | Enable ingress | `false` |
+
+### Media API
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `mediaApi.enabled` | Enable Media API | `true` |
+| `mediaApi.replicaCount` | Number of replicas | `2` |
+| `mediaApi.image.repository` | Image repository | `jan/media-api` |
+| `mediaApi.image.tag` | Image tag | `latest` |
+| `mediaApi.service.port` | Service port | `8285` |
+| `mediaApi.ingress.enabled` | Enable Media API ingress | `false` |
+| `mediaApi.secrets.s3Endpoint` | S3 endpoint URL | `https://s3.menlo.ai` |
+| `mediaApi.secrets.s3Bucket` | S3 bucket name | `platform-dev` |
+| `mediaApi.secrets.s3AccessKey` | S3 access key | `XXXXX` |
+| `mediaApi.secrets.s3SecretKey` | S3 secret key | `YYYY` |
+
+### MCP Tools
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `mcpTools.enabled` | Enable MCP Tools | `true` |
+| `mcpTools.replicaCount` | Number of replicas | `2` |
+| `mcpTools.service.port` | Service port | `8091` |
+| `mcpTools.secrets.serperApiKey` | Serper API key | `""` |
+
+### Keycloak
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `keycloak.enabled` | Enable Keycloak | `true` |
+| `keycloak.admin.username` | Admin username | `admin` |
+| `keycloak.admin.password` | Admin password | `changeme` |
+| `keycloak.service.port` | Service port | `8085` |
+
+### Kong API Gateway
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `kong.enabled` | Enable Kong | `true` |
+| `kong.service.type` | Service type | `LoadBalancer` |
+| `kong.service.port` | Service port | `8000` |
+
+## Upgrading
+
+```bash
+helm upgrade jan-server ./jan-server \
+  --namespace jan-server \
+  --values ./jan-server/values-production.yaml
+```
+
+## Uninstalling
+
+```bash
+helm uninstall jan-server --namespace jan-server
+```
+
+To delete the PVCs as well:
+
+```bash
+kubectl delete pvc -n jan-server --all
+```
+
+## Examples
+
+### Production Deployment
+
+```bash
+# Create a production values file
+cat > values-prod.yaml <<EOF
+global:
+  storageClass: "gp3"
+
+postgresql:
+  primary:
+    persistence:
+      size: 50Gi
+  auth:
+    password: "STRONG_PASSWORD_HERE"
+
+llmApi:
+  replicaCount: 3
+  autoscaling:
+    enabled: true
+    minReplicas: 3
+    maxReplicas: 10
+  resources:
+    requests:
+      memory: 512Mi
+      cpu: 500m
+    limits:
+      memory: 1Gi
+      cpu: 1000m
+  ingress:
+    enabled: true
+    className: "nginx"
+    hosts:
+      - host: api.yourdomain.com
+        paths:
+          - path: /
+            pathType: Prefix
+
+mediaApi:
+  secrets:
+    s3AccessKey: "YOUR_ACCESS_KEY"
+    s3SecretKey: "YOUR_SECRET_KEY"
+    serviceKey: "YOUR_SERVICE_KEY"
+
+keycloak:
+  admin:
+    password: "STRONG_ADMIN_PASSWORD"
+  ingress:
+    enabled: true
+    hosts:
+      - host: auth.yourdomain.com
+EOF
+
+# Install with production values
+helm install jan-server ./jan-server \
+  --namespace jan-server \
+  --create-namespace \
+  --values values-prod.yaml
+```
+
+### Development Deployment (Minimal Resources)
+
+```bash
+cat > values-dev.yaml <<EOF
+llmApi:
+  replicaCount: 1
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+
+mediaApi:
+  replicaCount: 1
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+
+mcpTools:
+  replicaCount: 1
+
+postgresql:
+  primary:
+    persistence:
+      size: 5Gi
+    resources:
+      requests:
+        memory: 128Mi
+        cpu: 100m
+EOF
+
+helm install jan-server ./jan-server \
+  --namespace jan-server-dev \
+  --create-namespace \
+  --values values-dev.yaml
+```
+
+## Accessing Services
+
+After installation, you can access services via:
+
+### Via Kong API Gateway (Recommended)
+
+```bash
+# Get Kong external IP
+kubectl get svc -n jan-server jan-server-kong
+
+# Access services through Kong
+curl http://<KONG_IP>:8000/api/llm/healthz
+curl http://<KONG_IP>:8000/api/media/healthz
+curl http://<KONG_IP>:8000/api/mcp/healthz
+```
+
+### Direct Service Access (Port Forward)
+
+```bash
+# LLM API
+kubectl port-forward -n jan-server svc/jan-server-llm-api 8080:8080
+
+# Media API
+kubectl port-forward -n jan-server svc/jan-server-media-api 8285:8285
+
+# Keycloak Admin Console
+kubectl port-forward -n jan-server svc/jan-server-keycloak 8085:8085
+# Visit: http://localhost:8085
+```
+
+## Troubleshooting
+
+### Check Pod Status
+
+```bash
+kubectl get pods -n jan-server
+```
+
+### View Logs
+
+```bash
+# LLM API logs
+kubectl logs -n jan-server -l app.kubernetes.io/component=llm-api --tail=100
+
+# Media API logs
+kubectl logs -n jan-server -l app.kubernetes.io/component=media-api --tail=100
+
+# Keycloak logs
+kubectl logs -n jan-server -l app.kubernetes.io/component=keycloak --tail=100
+```
+
+### Check Service Connectivity
+
+```bash
+# Test internal service connectivity
+kubectl run -n jan-server test-pod --rm -it --image=curlimages/curl -- sh
+
+# Inside the pod:
+curl http://jan-server-llm-api:8080/healthz
+curl http://jan-server-media-api:8285/healthz
+curl http://jan-server-keycloak:8085
+```
+
+### Database Connection Issues
+
+```bash
+# Check PostgreSQL status
+kubectl get pods -n jan-server -l app.kubernetes.io/name=postgresql
+
+# Connect to PostgreSQL
+kubectl exec -it -n jan-server jan-server-postgresql-0 -- psql -U jan_user -d jan_llm_api
+```
+
+## Security Considerations
+
+1. **Change default passwords** in production
+2. **Enable TLS/HTTPS** for all ingresses
+3. **Use Kubernetes Secrets** for sensitive data
+4. **Enable Network Policies** to restrict pod-to-pod communication
+5. **Use Pod Security Policies** or Pod Security Standards
+6. **Regular security audits** and updates
+
+## Support
+
+For issues and questions:
+- GitHub: https://github.com/janhq/jan-server
+- Documentation: https://docs.jan.ai
+
+## License
+
+See the main project LICENSE file.
diff --git a/k8s/jan-server/templates/_helpers.tpl b/k8s/jan-server/templates/_helpers.tpl
new file mode 100644
index 00000000..4d375526
--- /dev/null
+++ b/k8s/jan-server/templates/_helpers.tpl
@@ -0,0 +1,77 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "jan-server.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+*/}}
+{{- define "jan-server.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "jan-server.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "jan-server.labels" -}}
+helm.sh/chart: {{ include "jan-server.chart" . }}
+{{ include "jan-server.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- with .Values.commonLabels }}
+{{ toYaml . }}
+{{- end }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "jan-server.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "jan-server.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "jan-server.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "jan-server.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
+
+{{/*
+PostgreSQL fullname
+*/}}
+{{- define "jan-server.postgresql.fullname" -}}
+{{- printf "%s-postgresql" .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Redis fullname
+*/}}
+{{- define "jan-server.redis.fullname" -}}
+{{- printf "%s-redis-master" .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- end }}
diff --git a/k8s/jan-server/templates/keycloak-deployment.yaml b/k8s/jan-server/templates/keycloak-deployment.yaml
new file mode 100644
index 00000000..e1a628db
--- /dev/null
+++ b/k8s/jan-server/templates/keycloak-deployment.yaml
@@ -0,0 +1,110 @@
+{{- if .Values.keycloak.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "jan-server.fullname" . }}-keycloak
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: keycloak
+spec:
+  replicas: {{ .Values.keycloak.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "jan-server.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: keycloak
+  template:
+    metadata:
+      labels:
+        {{- include "jan-server.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: keycloak
+    spec:
+      {{- with .Values.global.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "jan-server.serviceAccountName" . }}
+      containers:
+      - name: keycloak
+        image: "{{ .Values.keycloak.image.registry }}/{{ .Values.keycloak.image.repository }}:{{ .Values.keycloak.image.tag }}"
+        imagePullPolicy: {{ .Values.keycloak.image.pullPolicy }}
+        ports:
+        - name: http
+          containerPort: {{ .Values.keycloak.service.targetPort }}
+          protocol: TCP
+        env:
+        - name: KC_DB
+          value: "postgres"
+        - name: KC_DB_URL_HOST
+          value: "{{ include "jan-server.fullname" . }}-postgresql"
+        - name: KC_DB_USERNAME
+          value: "{{ .Values.keycloak.database.username }}"
+        - name: KC_DB_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-keycloak-secret
+              key: password
+        - name: KC_DB_URL_DATABASE
+          value: "{{ .Values.keycloak.database.name }}"
+        - name: KC_FEATURES
+          value: "token-exchange"
+        - name: KC_HTTP_PORT
+          value: "8085"
+        - name: KC_HOSTNAME_STRICT_HTTPS
+          value: "false"
+        - name: KEYCLOAK_ADMIN
+          value: "{{ .Values.keycloak.admin.username }}"
+        - name: KEYCLOAK_ADMIN_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-keycloak-secret
+              key: admin-password
+        livenessProbe:
+          httpGet:
+            path: /
+            port: http
+          initialDelaySeconds: 60
+          periodSeconds: 10
+          failureThreshold: 5
+        readinessProbe:
+          httpGet:
+            path: /
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 5
+          failureThreshold: 3
+        resources:
+          {{- toYaml .Values.keycloak.resources | nindent 10 }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jan-server.fullname" . }}-keycloak
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: keycloak
+spec:
+  type: {{ .Values.keycloak.service.type }}
+  ports:
+  - port: {{ .Values.keycloak.service.port }}
+    targetPort: http
+    protocol: TCP
+    name: http
+  selector:
+    {{- include "jan-server.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: keycloak
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "jan-server.fullname" . }}-keycloak-secret
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: keycloak
+type: Opaque
+stringData:
+  password: {{ .Values.keycloak.database.password | quote }}
+  admin-password: {{ .Values.keycloak.admin.password | quote }}
+{{- end }}
diff --git a/k8s/jan-server/templates/kong-deployment.yaml b/k8s/jan-server/templates/kong-deployment.yaml
new file mode 100644
index 00000000..4d7784e1
--- /dev/null
+++ b/k8s/jan-server/templates/kong-deployment.yaml
@@ -0,0 +1,521 @@
+{{- if .Values.kong.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "jan-server.fullname" . }}-kong-config
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: kong
+data:
+  kong.yml: |
+    _format_version: "3.0"
+    _transform: true
+
+    consumers:
+      - username: kong-anon-jwt
+        custom_id: anon-jwt
+        tags: [anonymous, auth, fallback]
+      - username: keycloak-issuer
+        custom_id: keycloak-jwt
+        tags: [auth, jwt, keycloak]
+
+    jwt_secrets:
+      - consumer: keycloak-issuer
+        algorithm: RS256
+        key: {{ .Values.keycloak.issuer }}
+        rsa_public_key: |
+          -----BEGIN PUBLIC KEY-----
+          MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAks4bK7EqsKVvrW6F8gRD
+          izRuGFzhfZVdHImVbmwavyK+yGrxVR5BOfbAYZy6/LnLei3aCmYbwKNgV+BU8Lch
+          +USX/BPpHswRXqf/GcBcdwAhqxAtwoKFG8KwTORP/RZGbVxOMS9D9T6iHPQmT7Md
+          4FyvHwTx7BwPx5oMIEOnur+NNaTsECN3cGR21SAnCtNCl188D3ubTsjUwERp6B4E
+          p2sVXsTDzT0ZOYbmmZiZJ59Fvk+0UNMn2uQyAj+j7lv15g6GtNSlG1DBnRKEVbOz
+          C50TfRUcCpQTrS8FkTTS0Pc/9MCOCHy9YDDDhdEuI5dvo9y9QUTIHPx4AhSubE0C
+          bwIDAQAB
+          -----END PUBLIC KEY-----
+
+    # Note: API key authentication handled by keycloak-apikey plugin
+    # Kong in DB-less mode validates API keys via llm-api service
+
+    plugins:
+      - name: rate-limiting
+        tags: [global, security, rate]
+        config:
+          minute: 600
+          hour: 10000
+          policy: local
+          limit_by: ip
+          fault_tolerant: true
+      - name: request-transformer
+        tags: [global, security, transformer]
+        config:
+          add:
+            headers:
+              - "X-Gateway-Auth: kong"
+              - "X-Gateway-Version: 3.5"
+
+    services:
+      - name: llm-api-svc
+        url: http://{{ include "jan-server.fullname" . }}-llm-api:{{ .Values.llmApi.service.port }}
+        connect_timeout: 60000
+        write_timeout: 60000
+        read_timeout: 60000
+        retries: 3
+        tags: [llm, api]
+        routes:
+          - name: llm-api-proxy
+            paths:
+              - /llm
+            strip_path: true
+            path_handling: v0
+            tags: [llm, api, protected]
+            plugins:
+              - name: jwt
+                tags: [llm, api, jwt]
+                config:
+                  key_claim_name: iss
+                  claims_to_verify: ["exp", "nbf"]
+                  maximum_expiration: 3600
+                  secret_is_base64: false
+                  run_on_preflight: false
+                  anonymous: kong-anon-jwt
+              - name: keycloak-apikey
+                tags: [llm, api, apikey]
+                config:
+                  validation_url: "http://{{ include "jan-server.fullname" . }}-llm-api:{{ .Values.llmApi.service.port }}/auth/validate-api-key"
+                  validation_timeout: 5000
+                  hide_credentials: true
+                  run_on_preflight: false
+              - name: rate-limiting
+                tags: [llm, api, rate]
+                config:
+                  minute: 120
+                  policy: local
+                  limit_by: consumer
+                  fault_tolerant: true
+              - name: cors
+                tags: [llm, api, cors]
+                config:
+                  origins: {{ .Values.kong.cors.origins | toJson }}
+                  methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+                  headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+                  exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+                  credentials: true
+                  max_age: 3600
+          - name: llm-api-v1
+            paths:
+              - /v1
+            strip_path: false
+            path_handling: v0
+            tags: [llm, api, v1, protected]
+            plugins:
+              - name: jwt
+                tags: [llm, api, jwt]
+                config:
+                  key_claim_name: iss
+                  claims_to_verify: ["exp", "nbf"]
+                  maximum_expiration: 3600
+                  secret_is_base64: false
+                  run_on_preflight: false
+                  anonymous: kong-anon-jwt
+              - name: keycloak-apikey
+                tags: [llm, api, apikey]
+                config:
+                  validation_url: "http://{{ include "jan-server.fullname" . }}-llm-api:{{ .Values.llmApi.service.port }}/auth/validate-api-key"
+                  validation_timeout: 5000
+                  hide_credentials: true
+                  run_on_preflight: false
+              - name: rate-limiting
+                tags: [llm, api, rate]
+                config:
+                  minute: 120
+                  policy: local
+                  limit_by: ip
+                  fault_tolerant: true
+              - name: cors
+                tags: [llm, api, cors]
+                config:
+                  origins: {{ .Values.kong.cors.origins | toJson }}
+                  methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+                  headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+                  exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+                  credentials: true
+                  max_age: 3600
+          - name: llm-api-health
+            paths:
+              - /healthz
+              - /readyz
+            strip_path: false
+            path_handling: v0
+            methods: [GET]
+            tags: [llm, health, public]
+            plugins:
+              - name: cors
+                tags: [llm, health, cors]
+                config:
+                  origins: ["*"]
+                  methods: ["GET", "OPTIONS"]
+                  headers: ["Content-Type"]
+                  exposed_headers: ["X-Request-Id"]
+                  credentials: false
+                  max_age: 3600
+
+      - name: media-api-svc
+        url: http://{{ include "jan-server.fullname" . }}-media-api:{{ .Values.mediaApi.service.port }}
+        connect_timeout: 60000
+        write_timeout: 60000
+        read_timeout: 60000
+        retries: 3
+        tags: [media, api]
+        routes:
+          - name: media-api-proxy
+            paths: [/media]
+            strip_path: true
+            path_handling: v0
+            tags: [media, protected]
+            plugins:
+              - name: jwt
+                tags: [media, jwt]
+                config:
+                  key_claim_name: iss
+                  claims_to_verify: ["exp", "nbf"]
+                  maximum_expiration: 3600
+                  secret_is_base64: false
+                  run_on_preflight: false
+                  anonymous: kong-anon-jwt
+              - name: keycloak-apikey
+                tags: [media, apikey]
+                config:
+                  validation_url: "http://{{ include "jan-server.fullname" . }}-llm-api:{{ .Values.llmApi.service.port }}/auth/validate-api-key"
+                  validation_timeout: 5000
+                  hide_credentials: true
+                  run_on_preflight: false
+              - name: rate-limiting
+                tags: [media, rate]
+                config:
+                  minute: 60
+                  policy: local
+                  limit_by: ip
+                  fault_tolerant: true
+              - name: cors
+                tags: [media, cors]
+                config:
+                  origins: {{ .Values.kong.cors.origins | toJson }}
+                  methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+                  headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id"]
+                  exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+                  credentials: true
+                  max_age: 3600
+
+      - name: llm-auth-svc
+        url: http://{{ include "jan-server.fullname" . }}-llm-api:{{ .Values.llmApi.service.port }}
+        connect_timeout: 60000
+        write_timeout: 60000
+        read_timeout: 60000
+        retries: 3
+        tags: [llm, auth]
+        routes:
+          - name: llm-auth-public
+            paths:
+              - /llm/auth/guest-login
+              - /llm/auth/refresh-token
+              - /llm/auth/logout
+              - /llm/auth/validate-api-key
+            strip_path: false
+            methods: [GET, POST, OPTIONS]
+            tags: [llm, auth, public]
+            plugins:
+              - name: cors
+                tags: [llm, cors, public]
+                config:
+                  origins: {{ .Values.kong.cors.origins | toJson }}
+                  methods: ["GET", "POST", "OPTIONS"]
+                  headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id"]
+                  exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+                  credentials: true
+                  max_age: 3600
+          - name: llm-auth-protected
+            paths:
+              - /llm/auth
+              - /auth
+            strip_path: false
+            tags: [llm, auth, protected]
+            plugins:
+              - name: jwt
+                tags: [llm, auth, jwt]
+                config:
+                  key_claim_name: iss
+                  claims_to_verify: ["exp", "nbf"]
+                  maximum_expiration: 3600
+                  secret_is_base64: false
+                  run_on_preflight: false
+                  anonymous: kong-anon-jwt
+              - name: keycloak-apikey
+                tags: [llm, auth, apikey]
+                config:
+                  validation_url: "http://{{ include "jan-server.fullname" . }}-llm-api:{{ .Values.llmApi.service.port }}/auth/validate-api-key"
+                  validation_timeout: 5000
+                  hide_credentials: true
+                  run_on_preflight: false
+              - name: rate-limiting
+                tags: [llm, auth, rate]
+                config:
+                  minute: 60
+                  policy: local
+                  limit_by: ip
+                  fault_tolerant: true
+              - name: cors
+                tags: [llm, cors, protected]
+                config:
+                  origins: {{ .Values.kong.cors.origins | toJson }}
+                  methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+                  headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+                  exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+                  credentials: true
+                  max_age: 3600
+
+      - name: response-api-svc
+        url: http://{{ include "jan-server.fullname" . }}-response-api:{{ .Values.responseApi.service.port }}
+        connect_timeout: 60000
+        write_timeout: 60000
+        read_timeout: 60000
+        retries: 3
+        tags: [response, api]
+        routes:
+          - name: response-api-proxy
+            paths: [/responses]
+            strip_path: true
+            path_handling: v0
+            tags: [response, protected]
+            plugins:
+              - name: jwt
+                tags: [response, jwt]
+                config:
+                  key_claim_name: iss
+                  claims_to_verify: ["exp", "nbf"]
+                  maximum_expiration: 3600
+                  secret_is_base64: false
+                  run_on_preflight: false
+                  anonymous: kong-anon-jwt
+              - name: keycloak-apikey
+                tags: [response, apikey]
+                config:
+                  validation_url: "http://{{ include "jan-server.fullname" . }}-llm-api:{{ .Values.llmApi.service.port }}/auth/validate-api-key"
+                  validation_timeout: 5000
+                  hide_credentials: true
+                  run_on_preflight: false
+              - name: rate-limiting
+                tags: [response, rate]
+                config:
+                  minute: 100
+                  policy: local
+                  limit_by: ip
+                  fault_tolerant: true
+              - name: cors
+                tags: [response, cors]
+                config:
+                  origins: {{ .Values.kong.cors.origins | toJson }}
+                  methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+                  headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+                  exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+                  credentials: true
+                  max_age: 3600
+
+      - name: mcp-tools-rpc-svc
+        url: http://{{ include "jan-server.fullname" . }}-mcp-tools:{{ .Values.mcpTools.service.port }}/v1/mcp
+        connect_timeout: 60000
+        write_timeout: 60000
+        read_timeout: 60000
+        retries: 3
+        tags: [mcp, rpc]
+        routes:
+          - name: mcp-tools-rpc
+            paths: [/mcp]
+            strip_path: true
+            methods: ["POST", "OPTIONS"]
+            path_handling: v0
+            tags: [mcp, protected]
+            plugins:
+              - name: jwt
+                tags: [mcp, jwt]
+                config:
+                  key_claim_name: iss
+                  claims_to_verify: ["exp", "nbf"]
+                  maximum_expiration: 3600
+                  secret_is_base64: false
+                  run_on_preflight: false
+                  anonymous: kong-anon-jwt
+              - name: keycloak-apikey
+                tags: [mcp, apikey]
+                config:
+                  validation_url: "http://{{ include "jan-server.fullname" . }}-llm-api:{{ .Values.llmApi.service.port }}/auth/validate-api-key"
+                  validation_timeout: 5000
+                  hide_credentials: true
+                  run_on_preflight: false
+              - name: rate-limiting
+                tags: [mcp, rate]
+                config:
+                  minute: 200
+                  policy: local
+                  limit_by: ip
+                  fault_tolerant: true
+              - name: cors
+                tags: [mcp, cors]
+                config:
+                  origins: {{ .Values.kong.cors.origins | toJson }}
+                  methods: ["POST", "OPTIONS"]
+                  headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "mcp-protocol-version"]
+                  exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+                  credentials: true
+                  max_age: 3600
+
+      - name: mcp-tools-health-svc
+        url: http://{{ include "jan-server.fullname" . }}-mcp-tools:{{ .Values.mcpTools.service.port }}
+        connect_timeout: 60000
+        write_timeout: 60000
+        read_timeout: 60000
+        retries: 3
+        tags: [mcp, health]
+        routes:
+          - name: mcp-tools-health
+            paths: [/mcp/healthz, /mcp/readyz]
+            strip_path: true
+            methods: ["GET"]
+            path_handling: v0
+            tags: [mcp, health, protected]
+            plugins:
+              - name: jwt
+                tags: [mcp, health, jwt]
+                config:
+                  key_claim_name: iss
+                  claims_to_verify: ["exp", "nbf"]
+                  maximum_expiration: 3600
+                  secret_is_base64: false
+                  run_on_preflight: false
+                  anonymous: kong-anon-jwt
+              - name: keycloak-apikey
+                tags: [mcp, health, apikey]
+                config:
+                  validation_url: "http://{{ include "jan-server.fullname" . }}-llm-api:{{ .Values.llmApi.service.port }}/auth/validate-api-key"
+                  validation_timeout: 5000
+                  hide_credentials: true
+                  run_on_preflight: false
+              - name: cors
+                tags: [mcp, health, cors]
+                config:
+                  origins: {{ .Values.kong.cors.origins | toJson }}
+                  methods: ["GET", "OPTIONS"]
+                  headers: ["Authorization", "Content-Type", "X-API-Key", "X-Request-Id"]
+                  exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+                  credentials: true
+                  max_age: 3600
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "jan-server.fullname" . }}-kong
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: kong
+spec:
+  replicas: {{ .Values.kong.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "jan-server.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: kong
+  template:
+    metadata:
+      labels:
+        {{- include "jan-server.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: kong
+    spec:
+      initContainers:
+      - name: copy-plugins
+        image: busybox:latest
+        command:
+        - sh
+        - -c
+        - |
+          mkdir -p /usr/local/kong/plugins/keycloak-apikey
+          cp /tmp/plugins/keycloak-apikey-handler.lua /usr/local/kong/plugins/keycloak-apikey/handler.lua
+          cp /tmp/plugins/keycloak-apikey-schema.lua /usr/local/kong/plugins/keycloak-apikey/schema.lua
+          echo "Plugins copied successfully"
+        volumeMounts:
+        - name: plugins-source
+          mountPath: /tmp/plugins
+          readOnly: true
+        - name: plugins-dest
+          mountPath: /usr/local/kong/plugins
+      containers:
+      - name: kong
+        image: "{{ .Values.kong.image.registry }}/{{ .Values.kong.image.repository }}:{{ .Values.kong.image.tag }}"
+        imagePullPolicy: {{ .Values.kong.image.pullPolicy }}
+        ports:
+        - name: http
+          containerPort: {{ .Values.kong.service.targetPort }}
+          protocol: TCP
+        env:
+        - name: KONG_DATABASE
+          value: "off"
+        - name: KONG_DECLARATIVE_CONFIG
+          value: /kong/kong.yml
+        - name: KONG_LOG_LEVEL
+          value: info
+        - name: KONG_PLUGINS
+          value: "bundled,keycloak-apikey"
+        - name: KONG_LUA_PACKAGE_PATH
+          value: "/usr/local/share/lua/5.1/?.lua;/usr/local/kong/plugins/?.lua;;"
+        volumeMounts:
+        - name: config
+          mountPath: /kong
+          readOnly: true
+        - name: plugins-dest
+          mountPath: /usr/local/kong/plugins
+          readOnly: true
+        livenessProbe:
+          exec:
+            command:
+              - kong
+              - health
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          exec:
+            command:
+              - kong
+              - health
+          initialDelaySeconds: 10
+          periodSeconds: 5
+        resources:
+          {{- toYaml .Values.kong.resources | nindent 10 }}
+      volumes:
+      - name: config
+        configMap:
+          name: {{ include "jan-server.fullname" . }}-kong-config
+      - name: plugins-source
+        configMap:
+          name: {{ include "jan-server.fullname" . }}-kong-plugins
+      - name: plugins-dest
+        emptyDir: {}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jan-server.fullname" . }}-kong
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: kong
+spec:
+  type: {{ .Values.kong.service.type }}
+  ports:
+  - port: {{ .Values.kong.service.port }}
+    targetPort: http
+    protocol: TCP
+    name: http
+  selector:
+    {{- include "jan-server.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: kong
+{{- end }}
diff --git a/k8s/jan-server/templates/kong-plugins-configmap.yaml b/k8s/jan-server/templates/kong-plugins-configmap.yaml
new file mode 100644
index 00000000..3a72fa80
--- /dev/null
+++ b/k8s/jan-server/templates/kong-plugins-configmap.yaml
@@ -0,0 +1,137 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "jan-server.fullname" . }}-kong-plugins
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: kong-plugins
+data:
+  keycloak-apikey-handler.lua: |
+    local http = require "resty.http"
+    local cjson = require "cjson.safe"
+
+    local KeycloakAPIKeyHandler = {
+      PRIORITY = 1002, -- Run after JWT plugin (1005) but before other plugins
+      VERSION = "1.0.0",
+    }
+
+    function KeycloakAPIKeyHandler:access(conf)
+      -- Get API key from headers
+      local api_key = kong.request.get_header("X-API-Key") or 
+                      kong.request.get_header("X-Api-Key") or
+                      kong.request.get_header("apikey")
+      
+      -- If no API key, skip (let JWT or other auth handle it)
+      if not api_key or api_key == "" then
+        return
+      end
+      
+      -- Check if it's our format (sk_xxxxx)
+      if not string.match(api_key, "^sk_") then
+        kong.log.debug("API key doesn't match sk_ format, skipping")
+        return
+      end
+      
+      -- Call validation endpoint
+      local httpc = http.new()
+      httpc:set_timeout(conf.validation_timeout or 5000)
+      
+      local validation_url = conf.validation_url or "http://llm-api:8080/auth/validate-api-key"
+      
+      kong.log.debug("Validating API key via: ", validation_url)
+      
+      local res, err = httpc:request_uri(validation_url, {
+        method = "POST",
+        body = cjson.encode({ api_key = api_key }),
+        headers = {
+          ["Content-Type"] = "application/json",
+        },
+        keepalive_timeout = 60000,
+        keepalive_pool = 10,
+      })
+      
+      if not res then
+        kong.log.err("Failed to validate API key: ", err)
+        return kong.response.exit(500, { 
+          message = "API key validation service unavailable" 
+        })
+      end
+      
+      if res.status ~= 200 then
+        kong.log.debug("API key validation failed: ", res.status)
+        return kong.response.exit(401, { 
+          message = "Invalid API key" 
+        })
+      end
+      
+      -- Parse user info
+      local user_info, decode_err = cjson.decode(res.body)
+      if not user_info then
+        kong.log.err("Failed to decode validation response: ", decode_err)
+        return kong.response.exit(500, { 
+          message = "Invalid validation response" 
+        })
+      end
+      
+      -- Set headers for downstream services (like JWT does)
+      kong.service.request.set_header("X-User-ID", user_info.user_id)
+      kong.service.request.set_header("X-User-Subject", user_info.subject)
+      kong.service.request.set_header("X-User-Email", user_info.email or "")
+      kong.service.request.set_header("X-User-Username", user_info.username or "")
+      kong.service.request.set_header("X-Auth-Method", "apikey")
+      
+      -- Set authenticated credential for rate limiting
+      kong.client.authenticate(user_info, {
+        id = user_info.user_id,
+        custom_id = user_info.subject,
+      })
+      
+      -- Hide the API key from downstream services
+      if conf.hide_credentials then
+        kong.service.request.clear_header("X-API-Key")
+        kong.service.request.clear_header("X-Api-Key")
+        kong.service.request.clear_header("apikey")
+      end
+      
+      kong.log.info("API key validated successfully for user: ", user_info.user_id)
+    end
+
+    return KeycloakAPIKeyHandler
+
+  keycloak-apikey-schema.lua: |
+    local typedefs = require "kong.db.schema.typedefs"
+
+    return {
+      name = "keycloak-apikey",
+      fields = {
+        { config = {
+            type = "record",
+            fields = {
+              { validation_url = {
+                  type = "string",
+                  required = true,
+                  default = "http://llm-api:8080/auth/validate-api-key",
+                  description = "URL of the API key validation endpoint"
+              }},
+              { validation_timeout = {
+                  type = "number",
+                  required = true,
+                  default = 5000,
+                  description = "Timeout for validation request in milliseconds"
+              }},
+              { hide_credentials = {
+                  type = "boolean",
+                  required = true,
+                  default = true,
+                  description = "Hide API key from downstream services"
+              }},
+              { run_on_preflight = {
+                  type = "boolean",
+                  required = true,
+                  default = false,
+                  description = "Run on CORS preflight requests"
+              }},
+            }
+        }},
+      },
+    }
diff --git a/k8s/jan-server/templates/llm-api-configmap.yaml b/k8s/jan-server/templates/llm-api-configmap.yaml
new file mode 100644
index 00000000..98ff91b7
--- /dev/null
+++ b/k8s/jan-server/templates/llm-api-configmap.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "jan-server.fullname" . }}-llm-api-config
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: llm-api
+data:
+  # Add any config files here if needed
+  config.yaml: |
+    # LLM API configuration
diff --git a/k8s/jan-server/templates/llm-api-deployment.yaml b/k8s/jan-server/templates/llm-api-deployment.yaml
new file mode 100644
index 00000000..8bfc8f2d
--- /dev/null
+++ b/k8s/jan-server/templates/llm-api-deployment.yaml
@@ -0,0 +1,117 @@
+{{- if .Values.llmApi.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "jan-server.fullname" . }}-llm-api
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: llm-api
+spec:
+  {{- if not .Values.llmApi.autoscaling.enabled }}
+  replicas: {{ .Values.llmApi.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "jan-server.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: llm-api
+  template:
+    metadata:
+      annotations:
+        checksum/config: {{ include (print $.Template.BasePath "/llm-api-configmap.yaml") . | sha256sum }}
+        checksum/secret: {{ include (print $.Template.BasePath "/llm-api-secret.yaml") . | sha256sum }}
+      labels:
+        {{- include "jan-server.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: llm-api
+    spec:
+      {{- with .Values.global.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "jan-server.serviceAccountName" . }}
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 1000
+        fsGroup: 1000
+      containers:
+      - name: llm-api
+        image: "{{ .Values.llmApi.image.registry }}/{{ .Values.llmApi.image.repository }}:{{ .Values.llmApi.image.tag }}"
+        imagePullPolicy: {{ .Values.llmApi.image.pullPolicy }}
+        ports:
+        - name: http
+          containerPort: {{ .Values.llmApi.service.targetPort }}
+          protocol: TCP
+        env:
+        - name: HTTP_PORT
+          value: {{ .Values.llmApi.env.HTTP_PORT | quote }}
+        - name: LOG_LEVEL
+          value: {{ .Values.llmApi.env.LOG_LEVEL | quote }}
+        - name: LOG_FORMAT
+          value: {{ .Values.llmApi.env.LOG_FORMAT | quote }}
+        - name: AUTO_MIGRATE
+          value: {{ .Values.llmApi.env.AUTO_MIGRATE | quote }}
+        - name: OTEL_ENABLED
+          value: {{ .Values.llmApi.env.OTEL_ENABLED | quote }}
+        - name: JAN_DEFAULT_NODE_SETUP
+          value: {{ .Values.llmApi.env.JAN_DEFAULT_NODE_SETUP | quote }}
+        - name: JAN_DEFAULT_NODE_URL
+          value: {{ .Values.llmApi.env.JAN_DEFAULT_NODE_URL | quote }}
+        - name: DATABASE_URL
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-llm-api-secret
+              key: database-url
+        - name: KEYCLOAK_BASE_URL
+          value: {{ tpl .Values.llmApi.env.KEYCLOAK_BASE_URL . | quote }}
+        - name: KEYCLOAK_REALM
+          value: {{ .Values.llmApi.env.KEYCLOAK_REALM | quote }}
+        - name: BACKEND_CLIENT_ID
+          value: {{ .Values.llmApi.env.BACKEND_CLIENT_ID | quote }}
+        - name: BACKEND_CLIENT_SECRET
+          value: {{ .Values.llmApi.env.BACKEND_CLIENT_SECRET | quote }}
+        - name: CLIENT
+          value: {{ .Values.llmApi.env.CLIENT | quote }}
+        - name: ACCOUNT
+          value: {{ .Values.llmApi.env.ACCOUNT | quote }}
+        - name: JWKS_URL
+          value: {{ tpl .Values.llmApi.env.JWKS_URL . | quote }}
+        - name: ISSUER
+          value: {{ tpl .Values.llmApi.env.ISSUER . | quote }}
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 3
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: http
+          initialDelaySeconds: 10
+          periodSeconds: 5
+          timeoutSeconds: 3
+          failureThreshold: 3
+        resources:
+          {{- toYaml .Values.llmApi.resources | nindent 10 }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jan-server.fullname" . }}-llm-api
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: llm-api
+spec:
+  type: {{ .Values.llmApi.service.type }}
+  ports:
+  - port: {{ .Values.llmApi.service.port }}
+    targetPort: http
+    protocol: TCP
+    name: http
+  selector:
+    {{- include "jan-server.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: llm-api
+{{- end }}
diff --git a/k8s/jan-server/templates/llm-api-ingress.yaml b/k8s/jan-server/templates/llm-api-ingress.yaml
new file mode 100644
index 00000000..e75cc621
--- /dev/null
+++ b/k8s/jan-server/templates/llm-api-ingress.yaml
@@ -0,0 +1,45 @@
+{{- if .Values.llmApi.enabled }}
+{{- if .Values.llmApi.ingress.enabled }}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "jan-server.fullname" . }}-llm-api
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: llm-api
+  {{- with .Values.llmApi.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- if .Values.llmApi.ingress.className }}
+  ingressClassName: {{ .Values.llmApi.ingress.className }}
+  {{- end }}
+  {{- if .Values.llmApi.ingress.tls }}
+  tls:
+    {{- range .Values.llmApi.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.llmApi.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            pathType: {{ .pathType }}
+            backend:
+              service:
+                name: {{ include "jan-server.fullname" $ }}-llm-api
+                port:
+                  number: {{ $.Values.llmApi.service.port }}
+          {{- end }}
+    {{- end }}
+{{- end }}
+{{- end }}
diff --git a/k8s/jan-server/templates/llm-api-secret.yaml b/k8s/jan-server/templates/llm-api-secret.yaml
new file mode 100644
index 00000000..757827a4
--- /dev/null
+++ b/k8s/jan-server/templates/llm-api-secret.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.llmApi.enabled }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "jan-server.fullname" . }}-llm-api-secret
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: llm-api
+type: Opaque
+stringData:
+  database-url: {{ tpl .Values.llmApi.secrets.databaseUrl . | quote }}
+{{- end }}
diff --git a/k8s/jan-server/templates/mcp-tools-deployment.yaml b/k8s/jan-server/templates/mcp-tools-deployment.yaml
new file mode 100644
index 00000000..47a56869
--- /dev/null
+++ b/k8s/jan-server/templates/mcp-tools-deployment.yaml
@@ -0,0 +1,101 @@
+{{- if .Values.mcpTools.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "jan-server.fullname" . }}-mcp-tools
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: mcp-tools
+spec:
+  {{- if not .Values.mcpTools.autoscaling.enabled }}
+  replicas: {{ .Values.mcpTools.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "jan-server.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: mcp-tools
+  template:
+    metadata:
+      labels:
+        {{- include "jan-server.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: mcp-tools
+    spec:
+      {{- with .Values.global.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "jan-server.serviceAccountName" . }}
+      containers:
+      - name: mcp-tools
+        image: "{{ .Values.mcpTools.image.registry }}/{{ .Values.mcpTools.image.repository }}:{{ .Values.mcpTools.image.tag }}"
+        imagePullPolicy: {{ .Values.mcpTools.image.pullPolicy }}
+        ports:
+        - name: http
+          containerPort: {{ .Values.mcpTools.service.targetPort }}
+          protocol: TCP
+        env:
+        - name: HTTP_PORT
+          value: {{ .Values.mcpTools.env.HTTP_PORT | quote }}
+        - name: LOG_LEVEL
+          value: {{ .Values.mcpTools.env.LOG_LEVEL | quote }}
+        - name: LOG_FORMAT
+          value: {{ .Values.mcpTools.env.LOG_FORMAT | quote }}
+        - name: SEARXNG_URL
+          value: "http://{{ include "jan-server.fullname" . }}-searxng:{{ .Values.searxng.service.port }}"
+        - name: VECTOR_STORE_URL
+          value: "http://{{ include "jan-server.fullname" . }}-vector-store:{{ .Values.vectorStore.service.port }}"
+        - name: SANDBOX_FUSION_URL
+          value: "http://{{ include "jan-server.fullname" . }}-sandboxfusion:{{ .Values.sandboxfusion.service.port }}"
+        - name: SERPER_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-mcp-tools-secret
+              key: serper-api-key
+              optional: true
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: http
+          initialDelaySeconds: 10
+          periodSeconds: 5
+        resources:
+          {{- toYaml .Values.mcpTools.resources | nindent 10 }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jan-server.fullname" . }}-mcp-tools
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: mcp-tools
+spec:
+  type: {{ .Values.mcpTools.service.type }}
+  ports:
+  - port: {{ .Values.mcpTools.service.port }}
+    targetPort: http
+    protocol: TCP
+    name: http
+  selector:
+    {{- include "jan-server.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: mcp-tools
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "jan-server.fullname" . }}-mcp-tools-secret
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: mcp-tools
+type: Opaque
+stringData:
+  serper-api-key: {{ .Values.mcpTools.secrets.serperApiKey | quote }}
+{{- end }}
diff --git a/k8s/jan-server/templates/media-api-deployment.yaml b/k8s/jan-server/templates/media-api-deployment.yaml
new file mode 100644
index 00000000..07cc7904
--- /dev/null
+++ b/k8s/jan-server/templates/media-api-deployment.yaml
@@ -0,0 +1,130 @@
+{{- if .Values.mediaApi.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "jan-server.fullname" . }}-media-api
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: media-api
+spec:
+  {{- if not .Values.mediaApi.autoscaling.enabled }}
+  replicas: {{ .Values.mediaApi.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "jan-server.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: media-api
+  template:
+    metadata:
+      annotations:
+        checksum/secret: {{ include (print $.Template.BasePath "/media-api-secret.yaml") . | sha256sum }}
+      labels:
+        {{- include "jan-server.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: media-api
+    spec:
+      {{- with .Values.global.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "jan-server.serviceAccountName" . }}
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 1000
+        fsGroup: 1000
+      containers:
+      - name: media-api
+        image: "{{ .Values.mediaApi.image.registry }}/{{ .Values.mediaApi.image.repository }}:{{ .Values.mediaApi.image.tag }}"
+        imagePullPolicy: {{ .Values.mediaApi.image.pullPolicy }}
+        ports:
+        - name: http
+          containerPort: {{ .Values.mediaApi.service.targetPort }}
+          protocol: TCP
+        env:
+        - name: MEDIA_API_PORT
+          value: {{ .Values.mediaApi.env.MEDIA_API_PORT | quote }}
+        - name: MEDIA_MAX_BYTES
+          value: {{ .Values.mediaApi.env.MEDIA_MAX_BYTES | quote }}
+        - name: MEDIA_PROXY_DOWNLOAD
+          value: {{ .Values.mediaApi.env.MEDIA_PROXY_DOWNLOAD | quote }}
+        - name: MEDIA_RETENTION_DAYS
+          value: {{ .Values.mediaApi.env.MEDIA_RETENTION_DAYS | quote }}
+        - name: MEDIA_REMOTE_FETCH_TIMEOUT
+          value: {{ .Values.mediaApi.env.MEDIA_REMOTE_FETCH_TIMEOUT | quote }}
+        - name: MEDIA_S3_PRESIGN_TTL
+          value: {{ .Values.mediaApi.env.MEDIA_S3_PRESIGN_TTL | quote }}
+        - name: MEDIA_S3_USE_PATH_STYLE
+          value: {{ .Values.mediaApi.env.MEDIA_S3_USE_PATH_STYLE | quote }}
+        - name: MEDIA_S3_REGION
+          value: {{ .Values.mediaApi.env.MEDIA_S3_REGION | quote }}
+        - name: AUTH_ENABLED
+          value: {{ .Values.mediaApi.env.AUTH_ENABLED | quote }}
+        - name: AUTH_ISSUER
+          value: {{ .Values.mediaApi.env.AUTH_ISSUER | quote }}
+        - name: ACCOUNT
+          value: {{ .Values.mediaApi.env.ACCOUNT | quote }}
+        - name: AUTH_JWKS_URL
+          value: {{ .Values.mediaApi.env.AUTH_JWKS_URL | quote }}
+        - name: MEDIA_DATABASE_URL
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-media-api-secret
+              key: database-url
+        - name: MEDIA_S3_ENDPOINT
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-media-api-secret
+              key: s3-endpoint
+        - name: MEDIA_S3_BUCKET
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-media-api-secret
+              key: s3-bucket
+        - name: MEDIA_S3_ACCESS_KEY_ID
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-media-api-secret
+              key: s3-access-key
+        - name: MEDIA_S3_SECRET_ACCESS_KEY
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-media-api-secret
+              key: s3-secret-key
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 3
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: http
+          initialDelaySeconds: 10
+          periodSeconds: 5
+          timeoutSeconds: 3
+          failureThreshold: 3
+        resources:
+          {{- toYaml .Values.mediaApi.resources | nindent 10 }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jan-server.fullname" . }}-media-api
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: media-api
+spec:
+  type: {{ .Values.mediaApi.service.type }}
+  ports:
+  - port: {{ .Values.mediaApi.service.port }}
+    targetPort: http
+    protocol: TCP
+    name: http
+  selector:
+    {{- include "jan-server.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: media-api
+{{- end }}
diff --git a/k8s/jan-server/templates/media-api-ingress.yaml b/k8s/jan-server/templates/media-api-ingress.yaml
new file mode 100644
index 00000000..edea7653
--- /dev/null
+++ b/k8s/jan-server/templates/media-api-ingress.yaml
@@ -0,0 +1,45 @@
+{{- if .Values.mediaApi.enabled }}
+{{- if .Values.mediaApi.ingress.enabled }}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "jan-server.fullname" . }}-media-api
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: media-api
+  {{- with .Values.mediaApi.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- if .Values.mediaApi.ingress.className }}
+  ingressClassName: {{ .Values.mediaApi.ingress.className }}
+  {{- end }}
+  {{- if .Values.mediaApi.ingress.tls }}
+  tls:
+    {{- range .Values.mediaApi.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.mediaApi.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            pathType: {{ .pathType }}
+            backend:
+              service:
+                name: {{ include "jan-server.fullname" $ }}-media-api
+                port:
+                  number: {{ $.Values.mediaApi.service.port }}
+          {{- end }}
+    {{- end }}
+{{- end }}
+{{- end }}
diff --git a/k8s/jan-server/templates/media-api-secret.yaml b/k8s/jan-server/templates/media-api-secret.yaml
new file mode 100644
index 00000000..dba2909f
--- /dev/null
+++ b/k8s/jan-server/templates/media-api-secret.yaml
@@ -0,0 +1,17 @@
+{{- if .Values.mediaApi.enabled }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "jan-server.fullname" . }}-media-api-secret
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: media-api
+type: Opaque
+stringData:
+  database-url: {{ tpl .Values.mediaApi.secrets.databaseUrl . | quote }}
+  s3-endpoint: {{ .Values.mediaApi.secrets.s3Endpoint | quote }}
+  s3-bucket: {{ .Values.mediaApi.secrets.s3Bucket | quote }}
+  s3-access-key: {{ .Values.mediaApi.secrets.s3AccessKey | quote }}
+  s3-secret-key: {{ .Values.mediaApi.secrets.s3SecretKey | quote }}
+{{- end }}
diff --git a/k8s/jan-server/templates/response-api-deployment.yaml b/k8s/jan-server/templates/response-api-deployment.yaml
new file mode 100644
index 00000000..977bb09d
--- /dev/null
+++ b/k8s/jan-server/templates/response-api-deployment.yaml
@@ -0,0 +1,120 @@
+{{- if .Values.responseApi.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "jan-server.fullname" . }}-response-api
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: response-api
+spec:
+  {{- if not .Values.responseApi.autoscaling.enabled }}
+  replicas: {{ .Values.responseApi.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "jan-server.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: response-api
+  template:
+    metadata:
+      annotations:
+        checksum/secret: {{ include (print $.Template.BasePath "/response-api-secret.yaml") . | sha256sum }}
+      labels:
+        {{- include "jan-server.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: response-api
+    spec:
+      {{- with .Values.global.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "jan-server.serviceAccountName" . }}
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 1000
+        fsGroup: 1000
+      containers:
+      - name: response-api
+        image: "{{ .Values.responseApi.image.registry }}/{{ .Values.responseApi.image.repository }}:{{ .Values.responseApi.image.tag }}"
+        imagePullPolicy: {{ .Values.responseApi.image.pullPolicy }}
+        ports:
+        - name: http
+          containerPort: {{ .Values.responseApi.service.targetPort }}
+          protocol: TCP
+        env:
+        - name: SERVICE_NAME
+          value: {{ .Values.responseApi.env.SERVICE_NAME | quote }}
+        - name: HTTP_PORT
+          value: {{ .Values.responseApi.env.HTTP_PORT | quote }}
+        - name: LOG_LEVEL
+          value: {{ .Values.responseApi.env.LOG_LEVEL | quote }}
+        - name: LOG_FORMAT
+          value: {{ .Values.responseApi.env.LOG_FORMAT | quote }}
+        - name: AUTO_MIGRATE
+          value: {{ .Values.responseApi.env.AUTO_MIGRATE | quote }}
+        - name: OTEL_ENABLED
+          value: {{ .Values.responseApi.env.OTEL_ENABLED | quote }}
+        - name: MAX_TOOL_EXECUTION_DEPTH
+          value: {{ .Values.responseApi.env.MAX_TOOL_EXECUTION_DEPTH | quote }}
+        - name: TOOL_EXECUTION_TIMEOUT
+          value: {{ .Values.responseApi.env.TOOL_EXECUTION_TIMEOUT | quote }}
+        - name: RESPONSE_DATABASE_URL
+          valueFrom:
+            secretKeyRef:
+              name: {{ include "jan-server.fullname" . }}-response-api-secret
+              key: database-url
+        - name: LLM_API_URL
+          value: {{ tpl .Values.responseApi.env.LLM_API_URL . | quote }}
+        - name: MCP_TOOLS_URL
+          value: {{ tpl .Values.responseApi.env.MCP_TOOLS_URL . | quote }}
+        - name: AUTH_ENABLED
+          value: "true"
+        - name: AUTH_ISSUER
+          value: {{ .Values.keycloak.issuer | quote }}
+        - name: ACCOUNT
+          value: {{ .Values.keycloak.account | default "account" | quote }}
+        - name: AUTH_JWKS_URL
+          value: {{ tpl .Values.keycloak.jwksUrl . | quote }}
+        - name: MAX_TOOL_EXECUTION_DEPTH
+          value: {{ .Values.responseApi.env.MAX_TOOL_EXECUTION_DEPTH | default "8" | quote }}
+        - name: TOOL_EXECUTION_TIMEOUT
+          value: {{ .Values.responseApi.env.TOOL_EXECUTION_TIMEOUT | default "45s" | quote }}
+        - name: LOG_LEVEL
+          value: {{ .Values.responseApi.env.LOG_LEVEL | default "info" | quote }}
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 3
+        readinessProbe:
+          httpGet:
+            path: /healthz
+            port: http
+          initialDelaySeconds: 10
+          periodSeconds: 5
+          timeoutSeconds: 3
+          failureThreshold: 3
+        resources:
+          {{- toYaml .Values.responseApi.resources | nindent 10 }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jan-server.fullname" . }}-response-api
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: response-api
+spec:
+  type: {{ .Values.responseApi.service.type }}
+  ports:
+  - port: {{ .Values.responseApi.service.port }}
+    targetPort: http
+    protocol: TCP
+    name: http
+  selector:
+    {{- include "jan-server.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: response-api
+{{- end }}
diff --git a/k8s/jan-server/templates/response-api-ingress.yaml b/k8s/jan-server/templates/response-api-ingress.yaml
new file mode 100644
index 00000000..4e4dd3dd
--- /dev/null
+++ b/k8s/jan-server/templates/response-api-ingress.yaml
@@ -0,0 +1,43 @@
+{{- if and .Values.responseApi.enabled .Values.responseApi.ingress.enabled }}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "jan-server.fullname" . }}-response-api
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: response-api
+  {{- with .Values.responseApi.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- if .Values.responseApi.ingress.className }}
+  ingressClassName: {{ .Values.responseApi.ingress.className }}
+  {{- end }}
+  {{- if .Values.responseApi.ingress.tls }}
+  tls:
+    {{- range .Values.responseApi.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.responseApi.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            pathType: {{ .pathType }}
+            backend:
+              service:
+                name: {{ include "jan-server.fullname" $ }}-response-api
+                port:
+                  number: {{ $.Values.responseApi.service.port }}
+          {{- end }}
+    {{- end }}
+{{- end }}
diff --git a/k8s/jan-server/templates/response-api-secret.yaml b/k8s/jan-server/templates/response-api-secret.yaml
new file mode 100644
index 00000000..04dff9dd
--- /dev/null
+++ b/k8s/jan-server/templates/response-api-secret.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.responseApi.enabled }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "jan-server.fullname" . }}-response-api-secret
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: response-api
+type: Opaque
+stringData:
+  database-url: {{ tpl .Values.responseApi.secrets.databaseUrl . | quote }}
+{{- end }}
diff --git a/charts/jan-server/templates/serviceaccount.yaml b/k8s/jan-server/templates/serviceaccount.yaml
similarity index 53%
rename from charts/jan-server/templates/serviceaccount.yaml
rename to k8s/jan-server/templates/serviceaccount.yaml
index 2801059f..768c5ca3 100644
--- a/charts/jan-server/templates/serviceaccount.yaml
+++ b/k8s/jan-server/templates/serviceaccount.yaml
@@ -1,13 +1,13 @@
-{{- if .Values.gateway.serviceAccount.create -}}
+{{- if .Values.serviceAccount.create -}}
 apiVersion: v1
 kind: ServiceAccount
 metadata:
   name: {{ include "jan-server.serviceAccountName" . }}
+  namespace: {{ .Release.Namespace }}
   labels:
     {{- include "jan-server.labels" . | nindent 4 }}
-  {{- with .Values.gateway.serviceAccount.annotations }}
+  {{- with .Values.serviceAccount.annotations }}
   annotations:
     {{- toYaml . | nindent 4 }}
   {{- end }}
-automountServiceAccountToken: {{ .Values.gateway.serviceAccount.automount }}
-{{- end }}
\ No newline at end of file
+{{- end }}
diff --git a/k8s/jan-server/templates/support-services.yaml b/k8s/jan-server/templates/support-services.yaml
new file mode 100644
index 00000000..4c64db2b
--- /dev/null
+++ b/k8s/jan-server/templates/support-services.yaml
@@ -0,0 +1,189 @@
+{{- if .Values.searxng.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "jan-server.fullname" . }}-searxng
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: searxng
+spec:
+  replicas: {{ .Values.searxng.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "jan-server.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: searxng
+  template:
+    metadata:
+      labels:
+        {{- include "jan-server.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: searxng
+    spec:
+      containers:
+      - name: searxng
+        image: "{{ .Values.searxng.image.registry }}/{{ .Values.searxng.image.repository }}:{{ .Values.searxng.image.tag }}"
+        imagePullPolicy: {{ .Values.searxng.image.pullPolicy }}
+        ports:
+        - name: http
+          containerPort: {{ .Values.searxng.service.targetPort }}
+          protocol: TCP
+        env:
+        - name: SEARXNG_BASE_URL
+          value: {{ .Values.searxng.env.SEARXNG_BASE_URL | quote }}
+        - name: SEARXNG_REDIS_URL
+          value: "redis://{{ include "jan-server.redis.fullname" . }}:6379/0"
+        livenessProbe:
+          httpGet:
+            path: /
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /
+            port: http
+          initialDelaySeconds: 10
+          periodSeconds: 5
+        resources:
+          {{- toYaml .Values.searxng.resources | nindent 10 }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jan-server.fullname" . }}-searxng
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: searxng
+spec:
+  type: {{ .Values.searxng.service.type }}
+  ports:
+  - port: {{ .Values.searxng.service.port }}
+    targetPort: http
+    protocol: TCP
+    name: http
+  selector:
+    {{- include "jan-server.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: searxng
+{{- end }}
+
+---
+{{- if .Values.vectorStore.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "jan-server.fullname" . }}-vector-store
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: vector-store
+spec:
+  replicas: {{ .Values.vectorStore.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "jan-server.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: vector-store
+  template:
+    metadata:
+      labels:
+        {{- include "jan-server.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: vector-store
+    spec:
+      containers:
+      - name: vector-store
+        image: "{{ .Values.vectorStore.image.registry }}/{{ .Values.vectorStore.image.repository }}:{{ .Values.vectorStore.image.tag }}"
+        imagePullPolicy: {{ .Values.vectorStore.image.pullPolicy }}
+        ports:
+        - name: http
+          containerPort: {{ .Values.vectorStore.service.targetPort }}
+          protocol: TCP
+        env:
+        - name: VECTOR_STORE_PORT
+          value: {{ .Values.vectorStore.env.VECTOR_STORE_PORT | quote }}
+        resources:
+          {{- toYaml .Values.vectorStore.resources | nindent 10 }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jan-server.fullname" . }}-vector-store
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: vector-store
+spec:
+  type: {{ .Values.vectorStore.service.type }}
+  ports:
+  - port: {{ .Values.vectorStore.service.port }}
+    targetPort: http
+    protocol: TCP
+    name: http
+  selector:
+    {{- include "jan-server.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: vector-store
+{{- end }}
+
+---
+{{- if .Values.sandboxfusion.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "jan-server.fullname" . }}-sandboxfusion
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: sandboxfusion
+spec:
+  replicas: {{ .Values.sandboxfusion.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "jan-server.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: sandboxfusion
+  template:
+    metadata:
+      labels:
+        {{- include "jan-server.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: sandboxfusion
+    spec:
+      containers:
+      - name: sandboxfusion
+        image: "{{ .Values.sandboxfusion.image.registry }}/{{ .Values.sandboxfusion.image.repository }}:{{ .Values.sandboxfusion.image.tag }}"
+        imagePullPolicy: {{ .Values.sandboxfusion.image.pullPolicy }}
+        ports:
+        - name: http
+          containerPort: {{ .Values.sandboxfusion.service.targetPort }}
+          protocol: TCP
+        livenessProbe:
+          httpGet:
+            path: /
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /
+            port: http
+          initialDelaySeconds: 10
+          periodSeconds: 5
+        resources:
+          {{- toYaml .Values.sandboxfusion.resources | nindent 10 }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "jan-server.fullname" . }}-sandboxfusion
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "jan-server.labels" . | nindent 4 }}
+    app.kubernetes.io/component: sandboxfusion
+spec:
+  type: {{ .Values.sandboxfusion.service.type }}
+  ports:
+  - port: {{ .Values.sandboxfusion.service.port }}
+    targetPort: http
+    protocol: TCP
+    name: http
+  selector:
+    {{- include "jan-server.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: sandboxfusion
+{{- end }}
diff --git a/k8s/jan-server/values-development.yaml b/k8s/jan-server/values-development.yaml
new file mode 100644
index 00000000..97bed5eb
--- /dev/null
+++ b/k8s/jan-server/values-development.yaml
@@ -0,0 +1,196 @@
+# Development values for Jan Server
+# Minimal resource allocation for local development
+
+global:
+  storageClass: "standard"
+  imagePullSecrets: []
+
+## PostgreSQL - Development Configuration
+postgresql:
+  enabled: true
+  auth:
+    username: jan_user
+    password: "devpassword"
+    database: jan_llm_api
+  primary:
+    persistence:
+      enabled: true
+      size: 5Gi
+    resources:
+      requests:
+        memory: 128Mi
+        cpu: 100m
+      limits:
+        memory: 256Mi
+        cpu: 200m
+
+## Redis - Development Configuration
+redis:
+  enabled: true
+  auth:
+    enabled: false
+  master:
+    persistence:
+      enabled: false
+    resources:
+      requests:
+        memory: 64Mi
+        cpu: 50m
+      limits:
+        memory: 128Mi
+        cpu: 100m
+
+## Keycloak - Development Configuration
+keycloak:
+  enabled: true
+  replicaCount: 1
+  admin:
+    username: admin
+    password: "admin"
+  database:
+    password: "keycloak"
+  resources:
+    requests:
+      memory: 256Mi
+      cpu: 100m
+    limits:
+      memory: 512Mi
+      cpu: 250m
+
+## Kong - Development Configuration
+kong:
+  enabled: true
+  replicaCount: 1
+  service:
+    type: ClusterIP  # Use port-forward for local access
+  resources:
+    requests:
+      memory: 64Mi
+      cpu: 50m
+    limits:
+      memory: 128Mi
+      cpu: 100m
+
+## LLM API - Development Configuration
+llmApi:
+  enabled: true
+  replicaCount: 1
+  image:
+    tag: "latest"
+    pullPolicy: Always
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+    limits:
+      memory: 256Mi
+      cpu: 250m
+  autoscaling:
+    enabled: false
+  env:
+    LOG_LEVEL: debug
+    LOG_FORMAT: console
+    AUTO_MIGRATE: "true"
+    OTEL_ENABLED: "false"
+
+## Media API - Development Configuration
+mediaApi:
+  enabled: true
+  replicaCount: 1
+  image:
+    tag: "latest"
+    pullPolicy: Always
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+    limits:
+      memory: 256Mi
+      cpu: 250m
+  autoscaling:
+    enabled: false
+  env:
+    MEDIA_MAX_BYTES: "10485760"  # 10MB
+    MEDIA_RETENTION_DAYS: "7"
+  secrets:
+    serviceKey: "dev-media-key"
+    apiKey: "dev-media-key"
+
+## Response API - Development Configuration
+responseApi:
+  enabled: true
+  replicaCount: 1
+  image:
+    tag: "latest"
+    pullPolicy: Always
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+    limits:
+      memory: 256Mi
+      cpu: 250m
+  autoscaling:
+    enabled: false
+  env:
+    LOG_LEVEL: debug
+    LOG_FORMAT: console
+    AUTO_MIGRATE: "true"
+    MAX_TOOL_EXECUTION_DEPTH: "8"
+
+## MCP Tools - Development Configuration
+mcpTools:
+  enabled: true
+  replicaCount: 1
+  image:
+    tag: "latest"
+    pullPolicy: Always
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+    limits:
+      memory: 256Mi
+      cpu: 250m
+  autoscaling:
+    enabled: false
+  secrets:
+    serperApiKey: ""
+
+## Supporting Services - Development Configuration
+searxng:
+  enabled: true
+  replicaCount: 1
+  resources:
+    requests:
+      memory: 64Mi
+      cpu: 50m
+    limits:
+      memory: 128Mi
+      cpu: 100m
+
+vectorStore:
+  enabled: true
+  replicaCount: 1
+  resources:
+    requests:
+      memory: 64Mi
+      cpu: 50m
+    limits:
+      memory: 128Mi
+      cpu: 100m
+
+sandboxfusion:
+  enabled: true
+  replicaCount: 1
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+    limits:
+      memory: 256Mi
+      cpu: 200m
+
+## Monitoring - Disabled for Development
+monitoring:
+  enabled: false
diff --git a/k8s/jan-server/values-production.yaml b/k8s/jan-server/values-production.yaml
new file mode 100644
index 00000000..55e61d87
--- /dev/null
+++ b/k8s/jan-server/values-production.yaml
@@ -0,0 +1,290 @@
+# Production values for Jan Server
+# Override default values for production deployment
+
+global:
+  storageClass: "gp3"  # AWS EBS gp3, change for your cloud provider
+  imagePullSecrets:
+    - name: docker-registry-secret
+
+## PostgreSQL - Production Configuration
+postgresql:
+  enabled: true
+  auth:
+    username: jan_user
+    password: "CHANGE_ME_STRONG_PASSWORD"  # Use external secret in production
+    database: jan_llm_api
+    postgresPassword: "CHANGE_ME_POSTGRES_PASSWORD"
+  primary:
+    persistence:
+      enabled: true
+      size: 50Gi
+      storageClass: "gp3"
+    resources:
+      requests:
+        memory: 1Gi
+        cpu: 500m
+      limits:
+        memory: 2Gi
+        cpu: 1000m
+  metrics:
+    enabled: true
+
+## Redis - Production Configuration
+redis:
+  enabled: true
+  auth:
+    enabled: true
+    password: "CHANGE_ME_REDIS_PASSWORD"
+  master:
+    persistence:
+      enabled: true
+      size: 8Gi
+    resources:
+      requests:
+        memory: 256Mi
+        cpu: 200m
+      limits:
+        memory: 512Mi
+        cpu: 500m
+
+## Keycloak - Production Configuration
+keycloak:
+  enabled: true
+  replicaCount: 2
+  admin:
+    username: admin
+    password: "CHANGE_ME_ADMIN_PASSWORD"
+  database:
+    username: keycloak
+    password: "CHANGE_ME_KC_DB_PASSWORD"
+    name: keycloak
+  resources:
+    requests:
+      memory: 1Gi
+      cpu: 500m
+    limits:
+      memory: 2Gi
+      cpu: 1000m
+  ingress:
+    enabled: true
+    className: "nginx"
+    annotations:
+      cert-manager.io/cluster-issuer: "letsencrypt-prod"
+      nginx.ingress.kubernetes.io/ssl-redirect: "true"
+    hosts:
+      - host: auth.yourdomain.com
+        paths:
+          - path: /
+            pathType: Prefix
+    tls:
+      - secretName: keycloak-tls
+        hosts:
+          - auth.yourdomain.com
+
+## Kong - Production Configuration
+kong:
+  enabled: true
+  replicaCount: 2
+  service:
+    type: LoadBalancer
+    annotations:
+      service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
+  resources:
+    requests:
+      memory: 256Mi
+      cpu: 200m
+    limits:
+      memory: 512Mi
+      cpu: 500m
+  ingress:
+    enabled: true
+    className: "nginx"
+    annotations:
+      cert-manager.io/cluster-issuer: "letsencrypt-prod"
+    hosts:
+      - host: api.yourdomain.com
+        paths:
+          - path: /
+            pathType: Prefix
+    tls:
+      - secretName: kong-tls
+        hosts:
+          - api.yourdomain.com
+
+## LLM API - Production Configuration
+llmApi:
+  enabled: true
+  replicaCount: 3
+  image:
+    registry: your-registry.io
+    repository: jan/llm-api
+    tag: "v2.0.0"
+  resources:
+    requests:
+      memory: 512Mi
+      cpu: 500m
+    limits:
+      memory: 1Gi
+      cpu: 1000m
+  autoscaling:
+    enabled: true
+    minReplicas: 3
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 70
+    targetMemoryUtilizationPercentage: 80
+  env:
+    LOG_LEVEL: info
+    LOG_FORMAT: json
+    AUTO_MIGRATE: "false"  # Run migrations separately in prod
+    OTEL_ENABLED: "true"
+  ingress:
+    enabled: true
+    className: "nginx"
+    hosts:
+      - host: llm-api.yourdomain.com
+        paths:
+          - path: /
+            pathType: Prefix
+
+## Media API - Production Configuration
+mediaApi:
+  enabled: true
+  replicaCount: 3
+  image:
+    registry: your-registry.io
+    repository: jan/media-api
+    tag: "v2.0.0"
+  resources:
+    requests:
+      memory: 512Mi
+      cpu: 500m
+    limits:
+      memory: 1Gi
+      cpu: 1000m
+  autoscaling:
+    enabled: true
+    minReplicas: 3
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 70
+  env:
+    MEDIA_MAX_BYTES: "52428800"  # 50MB
+    MEDIA_RETENTION_DAYS: "90"
+  secrets:
+    databaseUrl: "postgres://media:STRONG_PASSWORD@jan-server-postgresql:5432/media_api?sslmode=require"
+    serviceKey: "CHANGE_ME_STRONG_SERVICE_KEY"
+    apiKey: "CHANGE_ME_STRONG_API_KEY"
+    s3Endpoint: "https://s3.your-region.amazonaws.com"
+    s3Bucket: "your-production-bucket"
+    s3AccessKey: "YOUR_AWS_ACCESS_KEY"
+    s3SecretKey: "YOUR_AWS_SECRET_KEY"
+  ingress:
+    enabled: true
+    className: "nginx"
+    hosts:
+      - host: media.yourdomain.com
+        paths:
+          - path: /
+            pathType: Prefix
+
+## Response API - Production Configuration
+responseApi:
+  enabled: true
+  replicaCount: 3
+  image:
+    registry: your-registry.io
+    repository: jan/response-api
+    tag: "v2.0.0"
+  resources:
+    requests:
+      memory: 512Mi
+      cpu: 500m
+    limits:
+      memory: 1Gi
+      cpu: 1000m
+  autoscaling:
+    enabled: true
+    minReplicas: 3
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 70
+  env:
+    LOG_LEVEL: info
+    LOG_FORMAT: json
+    AUTO_MIGRATE: "false"
+    MAX_TOOL_EXECUTION_DEPTH: "10"
+    TOOL_EXECUTION_TIMEOUT: "60s"
+  secrets:
+    databaseUrl: "postgres://jan_user:STRONG_PASSWORD@jan-server-postgresql:5432/jan_llm_api?sslmode=require"
+  ingress:
+    enabled: true
+    className: "nginx"
+    hosts:
+      - host: responses.yourdomain.com
+        paths:
+          - path: /
+            pathType: Prefix
+
+## MCP Tools - Production Configuration
+mcpTools:
+  enabled: true
+  replicaCount: 3
+  image:
+    registry: your-registry.io
+    repository: jan/mcp-tools
+    tag: "v2.0.0"
+  resources:
+    requests:
+      memory: 512Mi
+      cpu: 500m
+    limits:
+      memory: 1Gi
+      cpu: 1000m
+  autoscaling:
+    enabled: true
+    minReplicas: 3
+    maxReplicas: 10
+  secrets:
+    serperApiKey: "YOUR_SERPER_API_KEY"
+
+## Supporting Services - Production Configuration
+searxng:
+  enabled: true
+  replicaCount: 2
+  resources:
+    requests:
+      memory: 256Mi
+      cpu: 200m
+    limits:
+      memory: 512Mi
+      cpu: 500m
+
+vectorStore:
+  enabled: true
+  replicaCount: 2
+  resources:
+    requests:
+      memory: 256Mi
+      cpu: 200m
+    limits:
+      memory: 512Mi
+      cpu: 500m
+
+sandboxfusion:
+  enabled: true
+  replicaCount: 2
+  resources:
+    requests:
+      memory: 512Mi
+      cpu: 400m
+    limits:
+      memory: 1Gi
+      cpu: 800m
+
+## Monitoring - Production Configuration
+monitoring:
+  enabled: true
+  prometheus:
+    enabled: true
+  grafana:
+    enabled: true
+  jaeger:
+    enabled: true
diff --git a/k8s/jan-server/values.yaml b/k8s/jan-server/values.yaml
new file mode 100644
index 00000000..e3db3726
--- /dev/null
+++ b/k8s/jan-server/values.yaml
@@ -0,0 +1,551 @@
+# Default values for jan-server
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+## Global settings
+global:
+  ## Image registry for all images
+  imageRegistry: ""
+  ## Image pull secrets
+  imagePullSecrets: []
+  storageClass: ""
+
+## Common labels to add to all resources
+commonLabels: {}
+
+## Common annotations to add to all resources
+commonAnnotations: {}
+
+## Service account
+serviceAccount:
+  create: true
+  annotations: {}
+  name: ""
+
+## ============================================================================
+## PostgreSQL Database (using Bitnami chart)
+## ============================================================================
+postgresql:
+  enabled: true
+  image:
+    registry: docker.io
+    repository: bitnami/postgresql
+    tag: "latest"
+    pullPolicy: Never
+  auth:
+    username: jan_user
+    password: jan_password
+    database: jan_llm_api
+    postgresPassword: postgres
+  primary:
+    persistence:
+      enabled: true
+      size: 10Gi
+    resources:
+      requests:
+        memory: 256Mi
+        cpu: 250m
+      limits:
+        memory: 512Mi
+        cpu: 500m
+    livenessProbe:
+      enabled: true
+      initialDelaySeconds: 60
+      periodSeconds: 10
+      timeoutSeconds: 5
+      failureThreshold: 6
+    readinessProbe:
+      enabled: true
+      initialDelaySeconds: 10
+      periodSeconds: 10
+      timeoutSeconds: 5
+      failureThreshold: 6
+
+## ============================================================================
+## Redis (for SearXNG and caching)
+## ============================================================================
+redis:
+  enabled: true
+  image:
+    registry: docker.io
+    repository: bitnami/redis
+    tag: "latest"
+    pullPolicy: Never
+  auth:
+    enabled: false
+  master:
+    persistence:
+      enabled: false
+    resources:
+      requests:
+        memory: 128Mi
+        cpu: 100m
+      limits:
+        memory: 256Mi
+        cpu: 200m
+
+## ============================================================================
+## Keycloak Authentication Server
+## ============================================================================
+keycloak:
+  enabled: true
+  replicaCount: 1
+  
+  image:
+    registry: quay.io
+    repository: keycloak/keycloak
+    tag: "24.0.5"
+    pullPolicy: IfNotPresent
+  
+  service:
+    type: ClusterIP
+    port: 8085
+    targetPort: 8085
+  
+  ingress:
+    enabled: false
+    className: ""
+    annotations: {}
+    hosts:
+      - host: auth.example.com
+        paths:
+          - path: /
+            pathType: Prefix
+    tls: []
+  
+  resources:
+    requests:
+      memory: 512Mi
+      cpu: 250m
+    limits:
+      memory: 1Gi
+      cpu: 500m
+  
+  env:
+    - name: KC_DB
+      value: postgres
+    - name: KC_DB_URL_HOST
+      value: "{{ .Release.Name }}-postgresql"
+    - name: KC_DB_USERNAME
+      value: jan_user
+    - name: KC_DB_PASSWORD
+      valueFrom:
+        secretKeyRef:
+          name: "{{ .Release.Name }}-keycloak-secret"
+          key: password
+    - name: KC_DB_URL_DATABASE
+      value: jan_llm_api
+    - name: KC_FEATURES
+      value: token-exchange
+    - name: KC_HTTP_PORT
+      value: "8085"
+    - name: KC_HOSTNAME_STRICT_HTTPS
+      value: "false"
+    - name: KEYCLOAK_ADMIN
+      value: admin
+    - name: KEYCLOAK_ADMIN_PASSWORD
+      valueFrom:
+        secretKeyRef:
+          name: "{{ .Release.Name }}-keycloak-secret"
+          key: admin-password
+  
+  admin:
+    username: admin
+    password: changeme  # Override in production
+  
+  database:
+    username: jan_user
+    password: jan_password  # Using jan_user credentials
+    name: jan_llm_api
+  
+  # Keycloak issuer URL for Kong JWT validation
+  issuer: "http://keycloak:8085/realms/jan"
+  
+  # Keycloak JWKS URL for JWT verification
+  jwksUrl: "http://keycloak:8085/realms/jan/protocol/openid-connect/certs"
+  
+  # Keycloak account/audience for JWT claims
+  account: "account"
+
+## ============================================================================
+## Kong API Gateway
+## ============================================================================
+kong:
+  enabled: true
+  replicaCount: 1
+  
+  image:
+    registry: docker.io
+    repository: kong
+    tag: "3.5"
+    pullPolicy: IfNotPresent
+  
+  service:
+    type: LoadBalancer
+    port: 8000
+    targetPort: 8000
+  
+  ingress:
+    enabled: false
+    className: ""
+    annotations: {}
+    hosts:
+      - host: api.example.com
+        paths:
+          - path: /
+            pathType: Prefix
+    tls: []
+  
+  cors:
+    origins:
+      - "http://localhost"
+      - "http://localhost:3000"
+      - "http://127.0.0.1"
+  
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+    limits:
+      memory: 256Mi
+      cpu: 200m
+  
+  config:
+    # Kong configuration will be mounted from ConfigMap
+
+## ============================================================================
+## LLM API Service
+## ============================================================================
+llmApi:
+  enabled: true
+  replicaCount: 2
+  
+  image:
+    registry: docker.io
+    repository: jan/llm-api
+    tag: "latest"
+    pullPolicy: Never
+  
+  service:
+    type: ClusterIP
+    port: 8080
+    targetPort: 8080
+  
+  ingress:
+    enabled: false
+    className: ""
+    annotations: {}
+    hosts:
+      - host: llm-api.example.com
+        paths:
+          - path: /
+            pathType: Prefix
+    tls: []
+  
+  resources:
+    requests:
+      memory: 256Mi
+      cpu: 250m
+    limits:
+      memory: 512Mi
+      cpu: 500m
+  
+  autoscaling:
+    enabled: false
+    minReplicas: 2
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 80
+    targetMemoryUtilizationPercentage: 80
+  
+  env:
+    HTTP_PORT: "8080"
+    LOG_LEVEL: info
+    LOG_FORMAT: json
+    AUTO_MIGRATE: "true"
+    OTEL_ENABLED: "false"
+    JAN_DEFAULT_NODE_SETUP: "false"
+    JAN_DEFAULT_NODE_URL: "http://localhost:8101/v1"
+    KEYCLOAK_BASE_URL: "http://{{ .Release.Name }}-keycloak:8085"
+    KEYCLOAK_REALM: "jan"
+    BACKEND_CLIENT_ID: "llm-api"
+    BACKEND_CLIENT_SECRET: "llm-api-secret"
+    CLIENT: "jan-client"
+    ACCOUNT: "account"
+    JWKS_URL: "http://{{ .Release.Name }}-keycloak:8085/realms/jan/protocol/openid-connect/certs"
+    ISSUER: "http://{{ .Release.Name }}-keycloak:8085/realms/jan"
+  
+  secrets:
+    # Database connection
+    databaseUrl: "postgres://jan_user:jan_password@{{ .Release.Name }}-postgresql:5432/jan_llm_api?sslmode=disable"
+
+## ============================================================================
+## Media API Service
+## ============================================================================
+mediaApi:
+  enabled: true
+  replicaCount: 2
+  
+  image:
+    registry: docker.io
+    repository: jan/media-api
+    tag: "latest"
+    pullPolicy: Never
+  
+  service:
+    type: ClusterIP
+    port: 8285
+    targetPort: 8285
+  
+  ingress:
+    enabled: false
+    className: ""
+    annotations: {}
+    hosts:
+      - host: media-api.example.com
+        paths:
+          - path: /
+            pathType: Prefix
+    tls: []
+  
+  resources:
+    requests:
+      memory: 256Mi
+      cpu: 250m
+    limits:
+      memory: 512Mi
+      cpu: 500m
+  
+  autoscaling:
+    enabled: false
+    minReplicas: 2
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 80
+  
+  env:
+    MEDIA_API_PORT: "8285"
+    MEDIA_MAX_BYTES: "20971520"
+    MEDIA_PROXY_DOWNLOAD: "true"
+    MEDIA_RETENTION_DAYS: "30"
+    MEDIA_REMOTE_FETCH_TIMEOUT: "15s"
+    MEDIA_S3_PRESIGN_TTL: "5m"
+    MEDIA_S3_USE_PATH_STYLE: "true"
+    MEDIA_S3_REGION: "us-west-2"
+    AUTH_ENABLED: "true"
+    AUTH_ISSUER: "http://localhost:8085/realms/jan"
+    ACCOUNT: "account"
+    AUTH_JWKS_URL: "http://keycloak:8085/realms/jan/protocol/openid-connect/certs"
+  
+  secrets:
+    # Database connection - using jan_user and jan_llm_api database
+    databaseUrl: "postgres://jan_user:jan_password@{{ .Release.Name }}-postgresql:5432/jan_llm_api?sslmode=disable"
+    # S3 credentials
+    s3Endpoint: https://s3.menlo.ai
+    s3Bucket: platform-dev
+    s3AccessKey: XXXXX
+    s3SecretKey: YYYY
+
+## ============================================================================
+## Response API Service
+## ============================================================================
+responseApi:
+  enabled: true
+  replicaCount: 2
+  
+  image:
+    registry: docker.io
+    repository: jan/response-api
+    tag: "latest"
+    pullPolicy: Never
+  
+  service:
+    type: ClusterIP
+    port: 8082
+    targetPort: 8082
+  
+  ingress:
+    enabled: false
+    className: ""
+    annotations: {}
+    hosts:
+      - host: response-api.example.com
+        paths:
+          - path: /
+            pathType: Prefix
+    tls: []
+  
+  resources:
+    requests:
+      memory: 256Mi
+      cpu: 250m
+    limits:
+      memory: 512Mi
+      cpu: 500m
+  
+  autoscaling:
+    enabled: false
+    minReplicas: 2
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 80
+  
+  env:
+    SERVICE_NAME: "response-api"
+    HTTP_PORT: "8082"
+    LOG_LEVEL: info
+    LOG_FORMAT: json
+    AUTO_MIGRATE: "true"
+    OTEL_ENABLED: "false"
+    MAX_TOOL_EXECUTION_DEPTH: "8"
+    TOOL_EXECUTION_TIMEOUT: "45s"
+    LLM_API_URL: "http://{{ .Release.Name }}-llm-api:8080"
+    MCP_TOOLS_URL: "http://{{ .Release.Name }}-mcp-tools:8091"
+  
+  secrets:
+    # Database connection - using jan_user and jan_llm_api database
+    databaseUrl: "postgres://jan_user:jan_password@{{ .Release.Name }}-postgresql:5432/jan_llm_api?sslmode=disable"
+
+## ============================================================================
+## MCP Tools Service
+## ============================================================================
+mcpTools:
+  enabled: true
+  replicaCount: 2
+  
+  image:
+    registry: docker.io
+    repository: jan/mcp-tools
+    tag: "latest"
+    pullPolicy: Never
+  
+  service:
+    type: ClusterIP
+    port: 8091
+    targetPort: 8091
+  
+  ingress:
+    enabled: false
+    className: ""
+    annotations: {}
+    hosts:
+      - host: mcp-tools.example.com
+        paths:
+          - path: /
+            pathType: Prefix
+    tls: []
+  
+  resources:
+    requests:
+      memory: 256Mi
+      cpu: 250m
+    limits:
+      memory: 512Mi
+      cpu: 500m
+  
+  autoscaling:
+    enabled: false
+    minReplicas: 2
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 80
+  
+  env:
+    HTTP_PORT: "8091"
+    LOG_LEVEL: info
+    LOG_FORMAT: json
+  
+  secrets:
+    serperApiKey: ""
+
+## ============================================================================
+## SearXNG Meta Search Engine
+## ============================================================================
+searxng:
+  enabled: true
+  replicaCount: 1
+  
+  image:
+    registry: docker.io
+    repository: searxng/searxng
+    tag: "latest"
+    pullPolicy: IfNotPresent
+  
+  service:
+    type: ClusterIP
+    port: 8080
+    targetPort: 8080
+  
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+    limits:
+      memory: 256Mi
+      cpu: 200m
+  
+  env:
+    SEARXNG_BASE_URL: "http://localhost:8086/"
+
+## ============================================================================
+## Vector Store Service
+## ============================================================================
+vectorStore:
+  enabled: false
+  replicaCount: 1
+  
+  image:
+    registry: docker.io
+    repository: jan/vector-store
+    tag: "latest"
+    pullPolicy: IfNotPresent
+  
+  service:
+    type: ClusterIP
+    port: 3015
+    targetPort: 3015
+  
+  resources:
+    requests:
+      memory: 128Mi
+      cpu: 100m
+    limits:
+      memory: 256Mi
+      cpu: 200m
+  
+  env:
+    VECTOR_STORE_PORT: "3015"
+
+## ============================================================================
+## SandboxFusion Code Interpreter
+## ============================================================================
+sandboxfusion:
+  enabled: true
+  replicaCount: 1
+  
+  image:
+    registry: docker.io
+    repository: volcengine/sandbox-fusion
+    tag: "server-20250609"
+    pullPolicy: IfNotPresent
+  
+  service:
+    type: ClusterIP
+    port: 8080
+    targetPort: 8080
+  
+  resources:
+    requests:
+      memory: 256Mi
+      cpu: 200m
+    limits:
+      memory: 512Mi
+      cpu: 400m
+
+## ============================================================================
+## Monitoring (Optional)
+## ============================================================================
+monitoring:
+  enabled: false
+  prometheus:
+    enabled: false
+  grafana:
+    enabled: false
+  jaeger:
+    enabled: false
diff --git a/keycloak/import/realm-jan.json b/keycloak/import/realm-jan.json
new file mode 100644
index 00000000..8b89e88c
--- /dev/null
+++ b/keycloak/import/realm-jan.json
@@ -0,0 +1,138 @@
+{
+  "realm": "jan",
+  "enabled": true,
+  "registrationAllowed": true,
+  "rememberMe": true,
+  "loginWithEmailAllowed": true,
+  "duplicateEmailsAllowed": false,
+  "resetPasswordAllowed": false,
+  "verifyEmail": false,
+  "registrationEmailAsUsername": true,
+  "requiredActions": [],
+  "attributes": {
+    "tokenExchangeEnabled": "true"
+  },
+  "clients": [
+    {
+      "clientId": "backend",
+      "name": "Backend Automation",
+      "protocol": "openid-connect",
+      "publicClient": false,
+      "bearerOnly": false,
+      "secret": "backend-secret",
+      "serviceAccountsEnabled": true,
+      "directAccessGrantsEnabled": true,
+      "standardFlowEnabled": false,
+      "fullScopeAllowed": true,
+      "authorizationServicesEnabled": true,
+      "attributes": {
+        "access.token.lifespan": "300",
+        "oauth2.device.authorization.grant.enabled": "true",
+        "token.exchange.grant.enabled": "true"
+      },
+      "defaultClientScopes": ["web-origins", "acr", "profile", "roles", "email"],
+      "optionalClientScopes": ["address", "phone", "offline_access", "microprofile-jwt"]
+    },
+    {
+      "clientId": "jan-client",
+      "name": "Jan Client",
+      "protocol": "openid-connect",
+      "publicClient": true,
+      "bearerOnly": false,
+      "directAccessGrantsEnabled": true,
+      "standardFlowEnabled": true,
+      "serviceAccountsEnabled": false,
+      "implicitFlowEnabled": false,
+      "authorizationServicesEnabled": false,
+      "redirectUris": [
+        "http://localhost:8000/auth/callback",
+        "http://localhost:8000/auth/google/callback",
+        "http://localhost:8080/auth/callback",
+        "http://localhost:8080/auth/google/callback",
+        "http://localhost:3000/*",
+        "http://localhost:3001/*",
+        "http://127.0.0.1:8000/auth/callback",
+        "http://127.0.0.1:8000/auth/google/callback",
+        "http://127.0.0.1:8080/auth/callback",
+        "http://127.0.0.1:8080/auth/google/callback",
+        "http://127.0.0.1:3000/*",
+        "http://127.0.0.1:3001/*",
+        "https://yourdomain.com/auth/callback",
+        "https://yourdomain.com/auth/google/callback"
+      ],
+      "webOrigins": [
+        "http://localhost:3000",
+        "http://localhost:3001",
+        "http://localhost:8000",
+        "http://localhost:8080",
+        "http://127.0.0.1:3000",
+        "http://127.0.0.1:3001",
+        "http://127.0.0.1:8000",
+        "http://127.0.0.1:8080",
+        "https://yourdomain.com"
+      ],
+      "attributes": {
+        "oauth2.device.authorization.grant.enabled": "true",
+        "access.token.lifespan": "3600",
+        "refresh.token.lifespan": "2592000",
+        "token.exchange.grant.enabled": "true",
+        "pkce.code.challenge.method": "S256"
+      },
+      "protocolMappers": [
+        {
+          "name": "preferred_username",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-property-mapper",
+          "consentRequired": false,
+          "config": {
+            "user.attribute": "username",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "preferred_username",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "name": "guest",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "user.attribute": "guest",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "guest",
+            "jsonType.label": "String"
+          }
+        },
+        {
+          "name": "pid",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-usermodel-attribute-mapper",
+          "consentRequired": false,
+          "config": {
+            "user.attribute": "pid",
+            "id.token.claim": "true",
+            "access.token.claim": "true",
+            "claim.name": "pid",
+            "jsonType.label": "String"
+          }
+        }
+      ]
+    }
+  ],
+  "roles": {
+    "realm": [
+      {
+        "name": "guest",
+        "description": "Ephemeral guest user"
+      },
+      {
+        "name": "user",
+        "description": "Registered user"
+      }
+    ]
+  },
+  "internationalizationEnabled": false,
+  "eventsEnabled": false
+}
\ No newline at end of file
diff --git a/kong/kong-dev-full.yml b/kong/kong-dev-full.yml
new file mode 100644
index 00000000..3c759d99
--- /dev/null
+++ b/kong/kong-dev-full.yml
@@ -0,0 +1,438 @@
+_format_version: "3.0"
+_transform: true
+
+# Kong configuration for dev-full mode
+# Routes to services in Docker or on host.docker.internal
+# Allows stopping Docker services and running them manually on host
+
+consumers:
+  - username: kong-anon-jwt
+    custom_id: anon-jwt
+    tags: [anonymous, auth, fallback]
+  - username: keycloak-issuer
+    custom_id: keycloak-jwt
+    tags: [auth, jwt, keycloak]
+
+jwt_secrets:
+  - consumer: keycloak-issuer
+    algorithm: RS256
+    key: http://localhost:8085/realms/jan
+    rsa_public_key: |
+      -----BEGIN PUBLIC KEY-----
+      MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAks4bK7EqsKVvrW6F8gRD
+      izRuGFzhfZVdHImVbmwavyK+yGrxVR5BOfbAYZy6/LnLei3aCmYbwKNgV+BU8Lch
+      +USX/BPpHswRXqf/GcBcdwAhqxAtwoKFG8KwTORP/RZGbVxOMS9D9T6iHPQmT7Md
+      4FyvHwTx7BwPx5oMIEOnur+NNaTsECN3cGR21SAnCtNCl188D3ubTsjUwERp6B4E
+      p2sVXsTDzT0ZOYbmmZiZJ59Fvk+0UNMn2uQyAj+j7lv15g6GtNSlG1DBnRKEVbOz
+      C50TfRUcCpQTrS8FkTTS0Pc/9MCOCHy9YDDDhdEuI5dvo9y9QUTIHPx4AhSubE0C
+      bwIDAQAB
+      -----END PUBLIC KEY-----
+
+plugins:
+  - name: rate-limiting
+    tags: [global, security, rate]
+    config:
+      minute: 600
+      hour: 10000
+      policy: local
+      limit_by: ip
+      fault_tolerant: true
+  - name: request-transformer
+    tags: [global, security, transformer]
+    config:
+      add:
+        headers:
+          - "X-Gateway-Auth: kong"
+          - "X-Gateway-Version: 3.5"
+          - "X-Dev-Mode: full"
+
+# Service upstreams - supports both Docker network and host.docker.internal
+# To run service manually on host:
+# 1. Stop Docker service: docker compose stop llm-api
+# 2. Run on host: cd services/llm-api && go run ./cmd/server
+# 3. Kong will route to host.docker.internal:8080
+
+upstreams:
+  - name: llm-api-upstream
+    algorithm: round-robin
+    targets:
+      - target: host.docker.internal:8080
+        weight: 100
+    healthchecks:
+      active:
+        type: http
+        http_path: /healthz
+        healthy:
+          interval: 5
+          successes: 2
+        unhealthy:
+          interval: 5
+          http_failures: 3
+      passive:
+        healthy:
+          successes: 3
+        unhealthy:
+          http_failures: 3
+
+  - name: media-api-upstream
+    algorithm: round-robin
+    targets:
+      - target: host.docker.internal:8285
+        weight: 100
+    healthchecks:
+      active:
+        type: http
+        http_path: /healthz
+        healthy:
+          interval: 5
+          successes: 2
+        unhealthy:
+          interval: 5
+          http_failures: 3
+
+  - name: response-api-upstream
+    algorithm: round-robin
+    targets:
+      - target: host.docker.internal:8082
+        weight: 100
+    healthchecks:
+      active:
+        type: http
+        http_path: /healthz
+        healthy:
+          interval: 5
+          successes: 2
+        unhealthy:
+          interval: 5
+          http_failures: 3
+
+  - name: mcp-tools-upstream
+    algorithm: round-robin
+    targets:
+      - target: host.docker.internal:8091
+        weight: 100
+    healthchecks:
+      active:
+        type: http
+        http_path: /healthz
+        healthy:
+          interval: 5
+          successes: 2
+        unhealthy:
+          interval: 5
+          http_failures: 3
+
+services:
+  - name: llm-api-svc
+    host: llm-api-upstream
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [llm, api]
+    routes:
+      - name: llm-api-proxy
+        paths:
+          - /llm
+        strip_path: true
+        path_handling: v0
+        tags: [llm, api, protected]
+        plugins:
+          - name: jwt
+            tags: [llm, api, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [llm, api, apikey]
+            config:
+              validation_url: "http://host.docker.internal:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [llm, api, rate]
+            config:
+              minute: 120
+              policy: local
+              limit_by: consumer
+              fault_tolerant: true
+          - name: cors
+            tags: [llm, api, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+      - name: llm-api-v1
+        paths:
+          - /v1
+        strip_path: false
+        path_handling: v0
+        tags: [llm, api, v1, protected]
+        plugins:
+          - name: jwt
+            tags: [llm, api, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [llm, api, apikey]
+            config:
+              validation_url: "http://host.docker.internal:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [llm, api, rate]
+            config:
+              minute: 120
+              policy: local
+              limit_by: ip
+              fault_tolerant: true
+          - name: cors
+            tags: [llm, api, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+      - name: llm-api-health
+        paths:
+          - /healthz
+          - /readyz
+        strip_path: false
+        path_handling: v0
+        methods: [GET]
+        tags: [llm, health, public]
+        plugins:
+          - name: cors
+            tags: [llm, health, cors]
+            config:
+              origins: ["*"]
+              methods: ["GET", "OPTIONS"]
+              headers: ["Content-Type"]
+              exposed_headers: ["X-Request-Id"]
+              credentials: false
+              max_age: 3600
+
+  - name: media-api-svc
+    host: media-api-upstream
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [media, api]
+    routes:
+      - name: media-api-proxy
+        paths: [/media]
+        strip_path: true
+        path_handling: v0
+        tags: [media, protected]
+        plugins:
+          - name: jwt
+            tags: [media, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [media, apikey]
+            config:
+              validation_url: "http://host.docker.internal:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [media, rate]
+            config:
+              minute: 60
+              policy: local
+              limit_by: ip
+              fault_tolerant: true
+          - name: cors
+            tags: [media, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "X-Media-Service-Key", "Idempotency-Key", "X-Request-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+
+  - name: llm-auth-svc
+    host: llm-api-upstream
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [llm, auth]
+    routes:
+      - name: llm-auth-public
+        paths:
+          - /auth
+        strip_path: false
+        methods: [GET, POST, DELETE, OPTIONS]
+        tags: [llm, auth, public]
+        plugins:
+          - name: cors
+            tags: [llm, cors, public]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+              
+  - name: response-api-svc
+    host: response-api-upstream
+    connect_timeout: 900000
+    write_timeout: 900000
+    read_timeout: 900000
+    retries: 3
+    tags: [response, api]
+    routes:
+      - name: response-api-proxy
+        paths: [/responses]
+        strip_path: true
+        path_handling: v0
+        tags: [response, protected]
+        plugins:
+          - name: jwt
+            tags: [response, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [response, apikey]
+            config:
+              validation_url: "http://host.docker.internal:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [response, rate]
+            config:
+              minute: 100
+              policy: local
+              limit_by: ip
+              fault_tolerant: true
+          - name: cors
+            tags: [response, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+
+  - name: mcp-tools-rpc-svc
+    host: mcp-tools-upstream
+    path: /v1/mcp
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [mcp, rpc]
+    routes:
+      - name: mcp-tools-rpc
+        paths: [/mcp]
+        strip_path: true
+        methods: ["POST", "OPTIONS"]
+        path_handling: v0
+        tags: [mcp, protected]
+        plugins:
+          - name: jwt
+            tags: [mcp, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [mcp, apikey]
+            config:
+              validation_url: "http://host.docker.internal:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [mcp, rate]
+            config:
+              minute: 200
+              policy: local
+              limit_by: ip
+              fault_tolerant: true
+          - name: cors
+            tags: [mcp, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["POST", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "mcp-protocol-version"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+
+  - name: mcp-tools-health-svc
+    host: mcp-tools-upstream
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [mcp, health]
+    routes:
+      - name: mcp-tools-health
+        paths: [/mcp/healthz, /mcp/readyz]
+        strip_path: true
+        methods: ["GET"]
+        path_handling: v0
+        tags: [mcp, health, protected]
+        plugins:
+          - name: jwt
+            tags: [mcp, health, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [mcp, health, apikey]
+            config:
+              validation_url: "http://host.docker.internal:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: cors
+            tags: [mcp, health, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "X-Request-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
diff --git a/kong/kong.yml b/kong/kong.yml
new file mode 100644
index 00000000..988175b3
--- /dev/null
+++ b/kong/kong.yml
@@ -0,0 +1,377 @@
+_format_version: "3.0"
+_transform: true
+
+consumers:
+  - username: kong-anon-jwt
+    custom_id: anon-jwt
+    tags: [anonymous, auth, fallback]
+  - username: keycloak-issuer
+    custom_id: keycloak-jwt
+    tags: [auth, jwt, keycloak]
+
+jwt_secrets:
+  - consumer: keycloak-issuer
+    algorithm: RS256
+    key: http://localhost:8085/realms/jan
+    rsa_public_key: |
+      -----BEGIN PUBLIC KEY-----
+      MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAks4bK7EqsKVvrW6F8gRD
+      izRuGFzhfZVdHImVbmwavyK+yGrxVR5BOfbAYZy6/LnLei3aCmYbwKNgV+BU8Lch
+      +USX/BPpHswRXqf/GcBcdwAhqxAtwoKFG8KwTORP/RZGbVxOMS9D9T6iHPQmT7Md
+      4FyvHwTx7BwPx5oMIEOnur+NNaTsECN3cGR21SAnCtNCl188D3ubTsjUwERp6B4E
+      p2sVXsTDzT0ZOYbmmZiZJ59Fvk+0UNMn2uQyAj+j7lv15g6GtNSlG1DBnRKEVbOz
+      C50TfRUcCpQTrS8FkTTS0Pc/9MCOCHy9YDDDhdEuI5dvo9y9QUTIHPx4AhSubE0C
+      bwIDAQAB
+      -----END PUBLIC KEY-----
+
+# Note: API key authentication moved to service level (llm-api)
+# Kong in DB-less mode cannot manage consumers/credentials dynamically
+# API keys are validated by llm-api service via Keycloak
+
+plugins:
+  - name: rate-limiting
+    tags: [global, security, rate]
+    config:
+      minute: 600
+      hour: 10000
+      policy: local
+      limit_by: ip
+      fault_tolerant: true
+  - name: request-transformer
+    tags: [global, security, transformer]
+    config:
+      add:
+        headers:
+          - "X-Gateway-Auth: kong"
+          - "X-Gateway-Version: 3.5"
+
+services:
+  - name: llm-api-svc
+    url: http://llm-api:8080
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [llm, api]
+    routes:
+      - name: llm-api-proxy
+        paths:
+          - /llm
+        strip_path: true
+        path_handling: v0
+        tags: [llm, api, protected]
+        plugins:
+          - name: jwt
+            tags: [llm, api, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [llm, api, apikey]
+            config:
+              validation_url: "http://llm-api:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [llm, api, rate]
+            config:
+              minute: 120
+              policy: local
+              limit_by: consumer
+              fault_tolerant: true
+          - name: cors
+            tags: [llm, api, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+      - name: llm-api-v1
+        paths:
+          - /v1
+        strip_path: false
+        path_handling: v0
+        tags: [llm, api, v1, protected]
+        plugins:
+          - name: jwt
+            tags: [llm, api, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [llm, api, apikey]
+            config:
+              validation_url: "http://llm-api:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [llm, api, rate]
+            config:
+              minute: 120
+              policy: local
+              limit_by: ip
+              fault_tolerant: true
+          - name: cors
+            tags: [llm, api, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+      - name: llm-api-health
+        paths:
+          - /healthz
+          - /readyz
+        strip_path: false
+        path_handling: v0
+        methods: [GET]
+        tags: [llm, health, public]
+        plugins:
+          - name: cors
+            tags: [llm, health, cors]
+            config:
+              origins: ["*"]
+              methods: ["GET", "OPTIONS"]
+              headers: ["Content-Type"]
+              exposed_headers: ["X-Request-Id"]
+              credentials: false
+              max_age: 3600
+      - name: llm-api-swagger
+        paths:
+          - ~/api/swagger.*
+        strip_path: false
+        path_handling: v0
+        tags: [llm, swagger, public]
+        plugins:
+          - name: cors
+            tags: [llm, swagger, cors]
+            config:
+              origins: ["*"]
+              methods: ["GET", "OPTIONS"]
+              headers: ["Content-Type", "Accept"]
+              exposed_headers: ["X-Request-Id"]
+              credentials: false
+              max_age: 3600
+
+  - name: media-api-svc
+    url: http://media-api:8285
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [media, api]
+    routes:
+      - name: media-api-proxy
+        paths: [/media]
+        strip_path: true
+        path_handling: v0
+        tags: [media, protected]
+        plugins:
+          - name: jwt
+            tags: [media, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [media, apikey]
+            config:
+              validation_url: "http://llm-api:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [media, rate]
+            config:
+              minute: 60
+              policy: local
+              limit_by: ip
+              fault_tolerant: true
+          - name: cors
+            tags: [media, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "X-Media-Service-Key", "x-media-service-key", "Idempotency-Key", "X-Request-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+
+  - name: llm-auth-svc
+    url: http://llm-api:8080
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [llm, auth]
+    routes:
+      - name: llm-auth-public
+        paths:
+          - /auth
+        strip_path: false
+        methods: [GET, POST, DELETE, OPTIONS]
+        tags: [llm, auth, public]
+        plugins:
+          - name: cors
+            tags: [llm, cors, public]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+  - name: response-api-svc
+    url: http://response-api:8082
+    connect_timeout: 900000
+    write_timeout: 900000
+    read_timeout: 900000
+    retries: 3
+    tags: [response, api]
+    routes:
+      - name: response-api-proxy
+        paths: [/responses]
+        strip_path: true
+        path_handling: v0
+        tags: [response, protected]
+        plugins:
+          - name: jwt
+            tags: [response, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [response, apikey]
+            config:
+              validation_url: "http://llm-api:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [response, rate]
+            config:
+              minute: 100
+              policy: local
+              limit_by: ip
+              fault_tolerant: true
+          - name: cors
+            tags: [response, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "Mcp-Session-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+
+  - name: mcp-tools-rpc-svc
+    url: http://mcp-tools:8091/v1/mcp
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [mcp, rpc]
+    routes:
+      - name: mcp-tools-rpc
+        paths: [/mcp]
+        strip_path: true
+        methods: ["POST", "OPTIONS"]
+        path_handling: v0
+        tags: [mcp, protected]
+        plugins:
+          - name: jwt
+            tags: [mcp, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [mcp, apikey]
+            config:
+              validation_url: "http://llm-api:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: rate-limiting
+            tags: [mcp, rate]
+            config:
+              minute: 200
+              policy: local
+              limit_by: ip
+              fault_tolerant: true
+          - name: cors
+            tags: [mcp, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["POST", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "Idempotency-Key", "X-Request-Id", "mcp-protocol-version"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+
+  - name: mcp-tools-health-svc
+    url: http://mcp-tools:8091
+    connect_timeout: 60000
+    write_timeout: 60000
+    read_timeout: 60000
+    retries: 3
+    tags: [mcp, health]
+    routes:
+      - name: mcp-tools-health
+        paths: [/mcp/healthz, /mcp/readyz]
+        strip_path: true
+        methods: ["GET"]
+        path_handling: v0
+        tags: [mcp, health, protected]
+        plugins:
+          - name: jwt
+            tags: [mcp, health, jwt]
+            config:
+              key_claim_name: iss
+              claims_to_verify: ["exp", "nbf"]
+              maximum_expiration: 3600
+              secret_is_base64: false
+              run_on_preflight: false
+              anonymous: kong-anon-jwt
+          - name: keycloak-apikey
+            tags: [mcp, health, apikey]
+            config:
+              validation_url: "http://llm-api:8080/auth/validate-api-key"
+              validation_timeout: 5000
+              hide_credentials: true
+              run_on_preflight: false
+          - name: cors
+            tags: [mcp, health, cors]
+            config:
+              origins: ["http://localhost", "http://localhost:3000", "http://localhost:3001", "http://127.0.0.1"]
+              methods: ["GET", "OPTIONS"]
+              headers: ["Authorization", "Content-Type", "X-API-Key", "X-Request-Id"]
+              exposed_headers: ["X-Request-Id", "X-Gateway-Auth"]
+              credentials: true
+              max_age: 3600
+
+
diff --git a/kong/plugins/keycloak-apikey/README.md b/kong/plugins/keycloak-apikey/README.md
new file mode 100644
index 00000000..6ee5b0e2
--- /dev/null
+++ b/kong/plugins/keycloak-apikey/README.md
@@ -0,0 +1,200 @@
+# Keycloak API Key Authentication Plugin for Kong
+
+This custom Kong plugin validates API keys stored in Keycloak user attributes.
+
+## Overview
+
+The plugin:
+1. Extracts API key from `X-API-Key` header
+2. Validates it starts with `sk_` prefix
+3. Calls LLM-API validation endpoint
+4. Injects user headers for downstream services
+5. Enables authenticated consumer for rate limiting
+
+## Architecture
+
+```
+Client Request (X-API-Key: sk_xxx)
+       v
+Kong Gateway (keycloak-apikey plugin)
+       v
+LLM-API (/auth/validate-api-key)
+       v
+Keycloak (validate hash in user attributes)
+       v
+Kong injects headers -> Downstream Service
+```
+
+## Configuration
+
+### Plugin Schema
+
+```yaml
+- name: keycloak-apikey
+  config:
+    validation_url: "http://llm-api:8080/auth/validate-api-key"  # Validation endpoint
+    validation_timeout: 5000                                      # Timeout in ms
+    hide_credentials: true                                        # Hide API key from services
+    run_on_preflight: false                                       # Skip CORS preflight
+```
+
+### Injected Headers
+
+When API key is valid, Kong injects:
+
+- `X-User-ID` - User's internal database ID
+- `X-User-Subject` - Keycloak user subject/ID
+- `X-User-Email` - User's email address
+- `X-User-Username` - Username
+- `X-Auth-Method: apikey` - Authentication method used
+
+### Plugin Priority
+
+**Priority: 1002** - Runs after JWT plugin (1005) but before other plugins.
+
+This allows:
+- JWT to authenticate first if present
+- API key as fallback authentication
+- Both methods work independently
+
+## Authentication Flow
+
+### 1. JWT Only
+```
+Authorization: Bearer <jwt_token>
+-> JWT plugin validates
+-> keycloak-apikey plugin skips (no API key)
+-> Request authorized
+```
+
+### 2. API Key Only
+```
+X-API-Key: sk_abc123...
+-> JWT plugin skips (no JWT)
+-> keycloak-apikey plugin validates
+-> Request authorized
+```
+
+### 3. Both JWT + API Key
+```
+Authorization: Bearer <jwt_token>
+X-API-Key: sk_abc123...
+-> JWT plugin validates first
+-> keycloak-apikey plugin validates API key
+-> Request authorized (both must be valid)
+```
+
+### 4. Neither
+```
+(no auth headers)
+-> Both plugins skip
+-> request-termination plugin returns 401
+```
+
+## Local Development
+
+### 1. Load Plugin in Kong
+
+```bash
+# Docker Compose (automatic)
+docker-compose up -d kong
+
+# Verify plugin loaded
+curl http://localhost:8001/plugins/enabled
+```
+
+### 2. Test Plugin
+
+```bash
+# Create API key
+curl -X POST http://localhost:8000/auth/api-keys \
+  -H "Authorization: Bearer <jwt>" \
+  -H "Content-Type: application/json" \
+  -d '{"name": "test-key"}'
+
+# Use API key
+curl http://localhost:8000/v1/models \
+  -H "X-API-Key: sk_abc123..."
+```
+
+## Validation Endpoint
+
+The plugin calls `POST /auth/validate-api-key`:
+
+**Request:**
+```json
+{
+  "api_key": "sk_abc123..."
+}
+```
+
+**Response (200 OK):**
+```json
+{
+  "user_id": "123",
+  "subject": "uuid",
+  "username": "john",
+  "email": "john@example.com",
+  "first_name": "John",
+  "last_name": "Doe",
+  "roles": ["user"]
+}
+```
+
+**Response (401 Unauthorized):**
+```json
+{
+  "message": "Invalid API key"
+}
+```
+
+## Security Features
+
+OK **SHA-256 Hashed** - Keys stored as hash in Keycloak
+OK **Show Once** - Plain key shown only at creation
+OK **Hidden from Services** - `hide_credentials: true` removes header
+OK **Centralized** - All services protected by single plugin
+OK **Rate Limited** - Authenticated consumer enables per-user limits
+
+## Troubleshooting
+
+### Plugin Not Loaded
+
+```bash
+# Check Kong logs
+docker logs kong
+
+# Verify plugin in environment
+docker exec kong env | grep KONG_PLUGINS
+```
+
+### Validation Fails
+
+```bash
+# Test validation endpoint directly
+curl -X POST http://llm-api:8080/auth/validate-api-key \
+  -H "Content-Type: application/json" \
+  -d '{"api_key": "sk_test123"}'
+
+# Check Kong logs for errors
+docker logs kong --tail 100 -f
+```
+
+### Headers Not Injected
+
+Check that `hide_credentials` is set correctly:
+- `true` - Removes API key header (recommended)
+- `false` - Keeps API key header (for debugging)
+
+## Performance
+
+- **Validation Cache**: Consider adding Redis cache for validated keys
+- **Timeout**: Default 5s, adjust based on network latency
+- **Connection Pool**: Plugin reuses HTTP connections (`keepalive_pool: 10`)
+
+## Future Enhancements
+
+- [ ] Add Redis cache for validated keys (reduce latency)
+- [ ] Support multiple validation endpoints (failover)
+- [ ] Add metrics for validation success/failure rates
+- [ ] Implement key rotation detection
diff --git a/kong/plugins/keycloak-apikey/handler.lua b/kong/plugins/keycloak-apikey/handler.lua
new file mode 100644
index 00000000..698ca98b
--- /dev/null
+++ b/kong/plugins/keycloak-apikey/handler.lua
@@ -0,0 +1,90 @@
+local http = require "resty.http"
+local cjson = require "cjson.safe"
+
+local KeycloakAPIKeyHandler = {
+  PRIORITY = 1002, -- Run after JWT plugin (1005) but before other plugins
+  VERSION = "1.0.0",
+}
+
+function KeycloakAPIKeyHandler:access(conf)
+  -- Get API key from headers
+  local api_key = kong.request.get_header("X-API-Key") or 
+                  kong.request.get_header("X-Api-Key") or
+                  kong.request.get_header("apikey")
+  
+  -- If no API key, skip (let JWT or other auth handle it)
+  if not api_key or api_key == "" then
+    return
+  end
+  
+  -- Check if it's our format (sk_xxxxx)
+  if not string.match(api_key, "^sk_") then
+    kong.log.debug("API key doesn't match sk_ format, skipping")
+    return
+  end
+  
+  -- Call validation endpoint
+  local httpc = http.new()
+  httpc:set_timeout(conf.validation_timeout or 5000)
+  
+  local validation_url = conf.validation_url or "http://llm-api:8080/auth/validate-api-key"
+  
+  kong.log.debug("Validating API key via: ", validation_url)
+  
+  local res, err = httpc:request_uri(validation_url, {
+    method = "POST",
+    body = cjson.encode({ api_key = api_key }),
+    headers = {
+      ["Content-Type"] = "application/json",
+    },
+    keepalive_timeout = 60000,
+    keepalive_pool = 10,
+  })
+  
+  if not res then
+    kong.log.err("Failed to validate API key: ", err)
+    return kong.response.exit(500, { 
+      message = "API key validation service unavailable" 
+    })
+  end
+  
+  if res.status ~= 200 then
+    kong.log.debug("API key validation failed: ", res.status)
+    return kong.response.exit(401, { 
+      message = "Invalid API key" 
+    })
+  end
+  
+  -- Parse user info
+  local user_info, decode_err = cjson.decode(res.body)
+  if not user_info then
+    kong.log.err("Failed to decode validation response: ", decode_err)
+    return kong.response.exit(500, { 
+      message = "Invalid validation response" 
+    })
+  end
+  
+  -- Set headers for downstream services (like JWT does)
+  kong.service.request.set_header("X-User-ID", user_info.user_id)
+  kong.service.request.set_header("X-User-Subject", user_info.subject)
+  kong.service.request.set_header("X-User-Email", user_info.email or "")
+  kong.service.request.set_header("X-User-Username", user_info.username or "")
+  kong.service.request.set_header("X-Auth-Method", "apikey")
+  
+  -- Set authenticated credential for rate limiting
+  kong.client.authenticate(user_info, {
+    id = user_info.user_id,
+    custom_id = user_info.subject,
+  })
+  
+  -- Hide the API key from downstream services
+  if conf.hide_credentials then
+    kong.service.request.clear_header("X-API-Key")
+    kong.service.request.clear_header("X-Api-Key")
+    kong.service.request.clear_header("apikey")
+  end
+  
+  kong.log.info("API key validated successfully for user: ", user_info.user_id)
+end
+
+return KeycloakAPIKeyHandler
diff --git a/kong/plugins/keycloak-apikey/schema.lua b/kong/plugins/keycloak-apikey/schema.lua
new file mode 100644
index 00000000..dbd41361
--- /dev/null
+++ b/kong/plugins/keycloak-apikey/schema.lua
@@ -0,0 +1,36 @@
+local typedefs = require "kong.db.schema.typedefs"
+
+return {
+  name = "keycloak-apikey",
+  fields = {
+    { config = {
+        type = "record",
+        fields = {
+          { validation_url = {
+              type = "string",
+              required = true,
+              default = "http://llm-api:8080/auth/validate-api-key",
+              description = "URL of the API key validation endpoint"
+          }},
+          { validation_timeout = {
+              type = "number",
+              required = true,
+              default = 5000,
+              description = "Timeout for validation request in milliseconds"
+          }},
+          { hide_credentials = {
+              type = "boolean",
+              required = true,
+              default = true,
+              description = "Hide API key from downstream services"
+          }},
+          { run_on_preflight = {
+              type = "boolean",
+              required = true,
+              default = false,
+              description = "Run on CORS preflight requests"
+          }},
+        }
+    }},
+  },
+}
diff --git a/tests/grafana/grafana-provisioning/dashboards/dashboard.yml b/monitoring/grafana/provisioning/dashboards/dashboards.yml
similarity index 59%
rename from tests/grafana/grafana-provisioning/dashboards/dashboard.yml
rename to monitoring/grafana/provisioning/dashboards/dashboards.yml
index 7435f09d..6e60197d 100644
--- a/tests/grafana/grafana-provisioning/dashboards/dashboard.yml
+++ b/monitoring/grafana/provisioning/dashboards/dashboards.yml
@@ -1,12 +1,12 @@
 apiVersion: 1
 
 providers:
-  - name: 'default'
+  - name: 'Jan Server'
     orgId: 1
-    folder: ''
+    folder: 'Jan Server'
     type: file
     disableDeletion: false
     updateIntervalSeconds: 10
     allowUiUpdates: true
     options:
-      path: /var/lib/grafana/dashboards
+      path: /etc/grafana/provisioning/dashboards/json
diff --git a/monitoring/grafana/provisioning/dashboards/json/chat-completion-dashboard.json b/monitoring/grafana/provisioning/dashboards/json/chat-completion-dashboard.json
new file mode 100644
index 00000000..beddceb7
--- /dev/null
+++ b/monitoring/grafana/provisioning/dashboards/json/chat-completion-dashboard.json
@@ -0,0 +1,700 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [
+    {
+      "title": "View in Jaeger",
+      "url": "http://localhost:16686/search?service=llm-api&operation=ChatHandler.CreateChatCompletion",
+      "type": "link",
+      "icon": "external link",
+      "tags": [],
+      "targetBlank": true
+    }
+  ],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "type": "jaeger",
+        "uid": "jaeger"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 20,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "jaeger",
+            "uid": "jaeger"
+          },
+          "query": "ChatHandler.CreateChatCompletion",
+          "queryType": "search",
+          "refId": "A",
+          "service": "llm-api"
+        }
+      ],
+      "title": "Chat Completion Duration",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "jaeger",
+        "uid": "jaeger"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 1000
+              },
+              {
+                "color": "red",
+                "value": 3000
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 12,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "orientation": "auto",
+        "reduceOptions": {
+          "values": false,
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": ""
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "text": {}
+      },
+      "pluginVersion": "10.2.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "jaeger",
+            "uid": "jaeger"
+          },
+          "query": "ChatHandler.CreateChatCompletion",
+          "queryType": "search",
+          "refId": "A",
+          "service": "llm-api"
+        }
+      ],
+      "title": "Total Requests (24h)",
+      "type": "gauge"
+    },
+    {
+      "datasource": {
+        "type": "jaeger",
+        "uid": "jaeger"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 18,
+        "y": 0
+      },
+      "id": 3,
+      "options": {
+        "colorMode": "value",
+        "graphMode": "area",
+        "justifyMode": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "values": false,
+          "calcs": [
+            "mean"
+          ],
+          "fields": ""
+        },
+        "text": {},
+        "textMode": "auto"
+      },
+      "pluginVersion": "10.2.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "jaeger",
+            "uid": "jaeger"
+          },
+          "query": "ChatHandler.CreateChatCompletion",
+          "queryType": "search",
+          "refId": "A",
+          "service": "llm-api"
+        }
+      ],
+      "title": "Average Response Time",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "jaeger",
+        "uid": "jaeger"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            }
+          },
+          "mappings": []
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 0,
+        "y": 8
+      },
+      "id": 4,
+      "options": {
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": ["value", "percent"]
+        },
+        "pieType": "pie",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "jaeger",
+            "uid": "jaeger"
+          },
+          "query": "ChatHandler.CreateChatCompletion",
+          "queryType": "search",
+          "refId": "A",
+          "service": "llm-api"
+        }
+      ],
+      "title": "Requests by Model",
+      "type": "piechart"
+    },
+    {
+      "datasource": {
+        "type": "jaeger",
+        "uid": "jaeger"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            }
+          },
+          "mappings": []
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 8,
+        "y": 8
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": ["value", "percent"]
+        },
+        "pieType": "donut",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "jaeger",
+            "uid": "jaeger"
+          },
+          "query": "ChatHandler.CreateChatCompletion",
+          "queryType": "search",
+          "refId": "A",
+          "service": "llm-api"
+        }
+      ],
+      "title": "Streaming vs Non-Streaming",
+      "type": "piechart"
+    },
+    {
+      "datasource": {
+        "type": "jaeger",
+        "uid": "jaeger"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            }
+          },
+          "mappings": []
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 8,
+        "x": 16,
+        "y": 8
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true,
+          "values": ["value", "percent"]
+        },
+        "pieType": "pie",
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "jaeger",
+            "uid": "jaeger"
+          },
+          "query": "ChatHandler.CreateChatCompletion",
+          "queryType": "search",
+          "refId": "A",
+          "service": "llm-api"
+        }
+      ],
+      "title": "Requests by Provider",
+      "type": "piechart"
+    },
+    {
+      "datasource": {
+        "type": "jaeger",
+        "uid": "jaeger"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "bars",
+            "fillOpacity": 100,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "normal"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "none"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": ["sum"],
+          "displayMode": "table",
+          "placement": "right",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "jaeger",
+            "uid": "jaeger"
+          },
+          "query": "ChatHandler.CreateChatCompletion",
+          "queryType": "search",
+          "refId": "A",
+          "service": "llm-api"
+        }
+      ],
+      "title": "Token Usage (Prompt + Completion)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "jaeger",
+        "uid": "jaeger"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "lineInterpolation": "smooth",
+            "lineWidth": 2,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "ms"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "id": 8,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "jaeger",
+            "uid": "jaeger"
+          },
+          "query": "ChatHandler.CreateChatCompletion",
+          "queryType": "search",
+          "refId": "A",
+          "service": "llm-api"
+        }
+      ],
+      "title": "LLM Inference Duration vs Total Duration",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "jaeger",
+        "uid": "jaeger"
+      },
+      "gridPos": {
+        "h": 10,
+        "w": 24,
+        "x": 0,
+        "y": 24
+      },
+      "id": 9,
+      "options": {
+        "showHeader": true
+      },
+      "pluginVersion": "10.2.2",
+      "targets": [
+        {
+          "datasource": {
+            "type": "jaeger",
+            "uid": "jaeger"
+          },
+          "query": "ChatHandler.CreateChatCompletion",
+          "queryType": "search",
+          "refId": "A",
+          "service": "llm-api"
+        }
+      ],
+      "title": "Recent Chat Completion Traces",
+      "type": "table",
+      "transformations": [
+        {
+          "id": "organize",
+          "options": {
+            "excludeByName": {},
+            "indexByName": {},
+            "renameByName": {
+              "traceID": "Trace ID",
+              "spanID": "Span ID",
+              "operationName": "Operation",
+              "startTime": "Start Time",
+              "duration": "Duration (ms)"
+            }
+          }
+        }
+      ]
+    }
+  ],
+  "refresh": "10s",
+  "schemaVersion": 38,
+  "style": "dark",
+  "tags": ["jan-server", "llm-api", "chat", "opentelemetry"],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "selected": false,
+          "text": "Last 1 hour",
+          "value": "now-1h"
+        },
+        "hide": 0,
+        "includeAll": false,
+        "label": "Time Range",
+        "multi": false,
+        "name": "time_range",
+        "options": [
+          {
+            "selected": true,
+            "text": "Last 1 hour",
+            "value": "now-1h"
+          },
+          {
+            "selected": false,
+            "text": "Last 6 hours",
+            "value": "now-6h"
+          },
+          {
+            "selected": false,
+            "text": "Last 24 hours",
+            "value": "now-24h"
+          },
+          {
+            "selected": false,
+            "text": "Last 7 days",
+            "value": "now-7d"
+          }
+        ],
+        "query": "now-1h,now-6h,now-24h,now-7d",
+        "queryValue": "",
+        "skipUrlSync": false,
+        "type": "custom"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Chat Completion Monitoring",
+  "uid": "chat-completion-monitoring",
+  "version": 1,
+  "weekStart": ""
+}
diff --git a/monitoring/grafana/provisioning/datasources/datasources.yml b/monitoring/grafana/provisioning/datasources/datasources.yml
new file mode 100644
index 00000000..d4244405
--- /dev/null
+++ b/monitoring/grafana/provisioning/datasources/datasources.yml
@@ -0,0 +1,20 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
+    editable: true
+    jsonData:
+      timeInterval: 15s
+
+  - name: Jaeger
+    type: jaeger
+    access: proxy
+    url: http://jaeger:16686
+    editable: true
+    jsonData:
+      tracesToLogsV2:
+        datasourceUid: 'loki'
diff --git a/monitoring/otel-collector.yaml b/monitoring/otel-collector.yaml
new file mode 100644
index 00000000..0e3b7c85
--- /dev/null
+++ b/monitoring/otel-collector.yaml
@@ -0,0 +1,62 @@
+receivers:
+  otlp:
+    protocols:
+      http:
+        endpoint: 0.0.0.0:4318
+      grpc:
+        endpoint: 0.0.0.0:4317
+
+processors:
+  batch:
+    timeout: 10s
+    send_batch_size: 1024
+  memory_limiter:
+    check_interval: 1s
+    limit_mib: 512
+    spike_limit_mib: 128
+  resourcedetection:
+    detectors: [env, system]
+    timeout: 5s
+
+exporters:
+  logging:
+    loglevel: info
+    sampling_initial: 5
+    sampling_thereafter: 200
+  prometheus:
+    endpoint: 0.0.0.0:8889
+    namespace: jan
+    const_labels:
+      environment: ${env:ENVIRONMENT}
+  otlp/jaeger:
+    endpoint: jaeger:4317
+    tls:
+      insecure: true
+    retry_on_failure:
+      enabled: true
+      initial_interval: 5s
+      max_interval: 30s
+      max_elapsed_time: 5m
+    sending_queue:
+      enabled: true
+      num_consumers: 10
+      queue_size: 5000
+
+extensions:
+  health_check:
+    endpoint: :13133
+  pprof:
+    endpoint: :1777
+
+service:
+  extensions: [health_check, pprof]
+  pipelines:
+    metrics:
+      receivers: [otlp]
+      processors: [memory_limiter, batch]
+      exporters: [prometheus, logging]
+    traces:
+      receivers: [otlp]
+      processors: [memory_limiter, batch, resourcedetection]
+      exporters: [otlp/jaeger, logging]
+
diff --git a/monitoring/prometheus-alerts.yml b/monitoring/prometheus-alerts.yml
new file mode 100644
index 00000000..6f984662
--- /dev/null
+++ b/monitoring/prometheus-alerts.yml
@@ -0,0 +1,125 @@
+groups:
+  - name: jan_server_critical
+    interval: 30s
+    rules:
+      - alert: HighLLMLatency
+        expr: histogram_quantile(0.95, rate(jan_llm_api_request_duration_seconds_bucket[5m])) > 2
+        for: 5m
+        labels:
+          severity: warning
+          service: llm-api
+        annotations:
+          summary: "LLM API P95 latency {{ $value }}s exceeds 2s"
+          description: "95th percentile latency for {{ $labels.model }} above threshold for 5m"
+          runbook: "docs/runbooks/monitoring.md#high-llm-latency"
+          dashboard: "https://grafana/d/llm-overview"
+
+      - alert: ResponseAPIQueueBacklog
+        expr: jan_response_api_queue_depth > 100
+        for: 10m
+        labels:
+          severity: critical
+          service: response-api
+        annotations:
+          summary: "Response API queue depth {{ $value }}"
+          description: "Background job queue has {{ $value }} pending items for 10+ minutes"
+          runbook: "docs/runbooks/monitoring.md#queue-backlog"
+
+      - alert: MediaAPIStorageFailure
+        expr: rate(jan_media_api_s3_errors_total[5m]) > 0.1
+        for: 2m
+        labels:
+          severity: critical
+          service: media-api
+        annotations:
+          summary: "Media API S3 error rate {{ $value | humanizePercentage }}"
+          description: "S3 operations failing at {{ $value | humanizePercentage }} for 2+ minutes"
+          runbook: "docs/runbooks/monitoring.md#storage-failure"
+
+      - alert: OTELCollectorDown
+        expr: up{job="otel-collector"} == 0
+        for: 2m
+        labels:
+          severity: critical
+          service: observability
+        annotations:
+          summary: "OTEL Collector unavailable"
+          description: "OTEL Collector has been down for 2+ minutes. Observability data loss occurring."
+          runbook: "docs/runbooks/monitoring.md#collector-outage"
+
+      - alert: TraceExportFailure
+        expr: rate(otelcol_exporter_send_failed_spans[5m]) > 10
+        for: 5m
+        labels:
+          severity: warning
+          service: observability
+        annotations:
+          summary: "Jaeger export failing at {{ $value }} spans/s"
+          description: "OTEL Collector unable to export spans to Jaeger at {{ $value }} spans/second"
+          runbook: "docs/runbooks/monitoring.md#trace-export-failure"
+
+      - alert: ConversationInsightFailure
+        expr: rate(jan_response_api_classifier_errors_total[5m]) > 0.05
+        for: 5m
+        labels:
+          severity: warning
+          service: response-api
+        annotations:
+          summary: "Conversation classifier error rate {{ $value | humanizePercentage }}"
+          description: "Prompt classification failing at {{ $value | humanizePercentage }} for 5+ minutes"
+          runbook: "docs/runbooks/monitoring.md#classifier-errors"
+
+  - name: jan_server_performance
+    interval: 1m
+    rules:
+      - alert: HighErrorRate
+        expr: rate(jan_llm_api_requests_total{status=~"5.."}[5m]) / rate(jan_llm_api_requests_total[5m]) > 0.05
+        for: 5m
+        labels:
+          severity: warning
+          service: llm-api
+        annotations:
+          summary: "High error rate {{ $value | humanizePercentage }}"
+          description: "LLM API error rate above 5% for 5 minutes"
+
+      - alert: SlowDatabaseQueries
+        expr: histogram_quantile(0.95, rate(jan_response_api_db_query_duration_seconds_bucket[5m])) > 1
+        for: 10m
+        labels:
+          severity: warning
+          service: response-api
+        annotations:
+          summary: "Database P95 latency {{ $value }}s exceeds 1s"
+          description: "Database queries are slow, may impact response times"
+
+      - alert: HighMemoryUsage
+        expr: container_memory_usage_bytes{container=~"llm-api|response-api|media-api"} / container_spec_memory_limit_bytes > 0.9
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "Container {{ $labels.container }} memory usage at {{ $value | humanizePercentage }}"
+          description: "Memory usage approaching limit for {{ $labels.container }}"
+
+  - name: jan_server_capacity
+    interval: 2m
+    rules:
+      - alert: WorkerPoolExhaustion
+        expr: jan_response_api_workers_active / (jan_response_api_workers_active + jan_response_api_workers_idle) > 0.9
+        for: 15m
+        labels:
+          severity: warning
+          service: response-api
+        annotations:
+          summary: "Worker pool {{ $value | humanizePercentage }} utilized"
+          description: "Background worker pool near capacity for 15+ minutes"
+
+      - alert: HighRequestRate
+        expr: rate(jan_llm_api_requests_total[5m]) > 1000
+        for: 10m
+        labels:
+          severity: info
+          service: llm-api
+        annotations:
+          summary: "LLM API receiving {{ $value }} requests/second"
+          description: "Unusually high request rate detected. Monitor for capacity issues."
diff --git a/monitoring/prometheus.yml b/monitoring/prometheus.yml
new file mode 100644
index 00000000..f3a1f711
--- /dev/null
+++ b/monitoring/prometheus.yml
@@ -0,0 +1,33 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+  external_labels:
+    cluster: 'jan-server'
+    environment: 'development'
+
+scrape_configs:
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+
+  - job_name: 'otel-collector'
+    static_configs:
+      - targets: ['otel-collector:8889']
+        labels:
+          service: 'otel-collector'
+
+  - job_name: 'llm-api'
+    scrape_interval: 10s
+    static_configs:
+      - targets: ['llm-api:8080']
+        labels:
+          service: 'llm-api'
+    metrics_path: '/metrics'
+
+  - job_name: 'mcp-tools'
+    scrape_interval: 10s
+    static_configs:
+      - targets: ['mcp-tools:8091']
+        labels:
+          service: 'mcp-tools'
+    metrics_path: '/metrics'
diff --git a/pkg/config/README.md b/pkg/config/README.md
new file mode 100644
index 00000000..2ca0f877
--- /dev/null
+++ b/pkg/config/README.md
@@ -0,0 +1,154 @@
+# Configuration Management System
+
+This directory contains the unified configuration management system for Jan Server.
+
+## Overview
+
+All configuration is defined canonically in Go structs (`pkg/config/types.go`). From these structs, we automatically generate:
+
+- **JSON Schema** (`config/schema/*.schema.json`) - For validation and IDE autocomplete
+- **YAML Defaults** (`config/defaults.yaml`) - Default values for all settings
+- **Documentation** (future) - Auto-generated configuration reference
+
+## Structure
+
+```
+pkg/config/
++-- types.go              # Canonical source of truth (Go structs)
++-- codegen/
+|   +-- schema.go         # JSON Schema generator
+|   +-- yaml.go           # YAML defaults generator
++-- loader.go             # Configuration loader (Sprint 2)
+
+cmd/config-generate/
++-- main.go               # Code generation CLI tool
+
+config/
++-- schema/               # Generated JSON schemas
+|   +-- config.schema.json
+|   +-- infrastructure.schema.json
+|   +-- services.schema.json
+|   +-- inference.schema.json
+|   +-- monitoring.schema.json
++-- defaults.yaml         # Generated default configuration
++-- environments/         # Environment-specific overrides
+    +-- development.yaml
+    +-- staging.yaml
+    +-- production.yaml
+```
+
+## Usage
+
+### Generate Configuration Artifacts
+
+```bash
+# Generate all artifacts (JSON Schema + YAML defaults)
+make config-generate
+
+# Test configuration
+make config-test
+
+# Check for drift (CI check)
+make config-drift-check
+```
+
+### Adding New Configuration
+
+1. **Edit Go structs** in `pkg/config/types.go`:
+```go
+type MyNewConfig struct {
+    // Port for the new service
+    Port int `yaml:"port" json:"port" env:"MY_SERVICE_PORT" envDefault:"8080" 
+             jsonschema:"required,minimum=1,maximum=65535" 
+             description:"My service HTTP port"`
+}
+```
+
+2. **Regenerate artifacts**:
+```bash
+make config-generate
+```
+
+3. **Commit both** `types.go` and generated files:
+```bash
+git add pkg/config/types.go config/schema/ config/defaults.yaml
+git commit -m "config: add MyNewConfig"
+```
+
+## Struct Tags Reference
+
+Each field should have these tags:
+
+- `yaml:"field_name"` - YAML field name
+- `json:"field_name"` - JSON field name
+- `env:"ENV_VAR_NAME"` - Environment variable name
+- `envDefault:"value"` - Default value
+- `jsonschema:"..."` - JSON Schema constraints (required, minimum, maximum, enum, etc.)
+- `description:"..."` - Human-readable description
+
+### Example:
+```go
+// Database port
+Port int `yaml:"port" json:"port" env:"POSTGRES_PORT" envDefault:"5432" 
+         jsonschema:"required,minimum=1,maximum=65535" 
+         description:"PostgreSQL port"`
+```
+
+## Configuration Hierarchy
+
+### Root `/config` - Infrastructure & Environment
+- Database connections, ports, auth settings
+- Environment-specific overrides
+- Managed through YAML + env vars
+
+### Service `/config` or `/configs` - Pluggable Configs (CI/CD Managed)
+- `services/llm-api/config/providers.yml` - Model providers
+- `services/mcp-tools/configs/mcp-providers.yml` - MCP tools
+- These files are **replaced by CI/CD**, not loaded from root config
+
+## Design Principles
+
+1. **Go structs are the source of truth** - Everything generates from them
+2. **No manual editing of generated files** - CI enforces this
+3. **Service configs stay in service dirs** - CI/CD can replace them independently
+4. **Environment-specific overrides** - Only define what changes
+5. **Secrets externalized** - Never in config files
+
+## CI/CD Integration
+
+### Pre-commit Hook
+```bash
+# Regenerate and check for drift
+make config-drift-check
+```
+
+### CI Pipeline
+```yaml
+- name: Check config drift
+  run: |
+    make config-generate
+    git diff --exit-code config/
+```
+
+## Roadmap
+
+### Sprint 1 (Current)
+- [x] Define canonical Go structs
+- [x] JSON Schema generator
+- [x] YAML defaults generator
+- [ ] CI drift detection test
+
+### Sprint 2 (Next)
+- [ ] Configuration loader with precedence
+- [ ] Environment override support
+- [ ] Secret provider integration
+
+### Future
+- [ ] Documentation generator
+- [ ] CLI tool (`jan-config`)
+- [ ] Docker Compose generator
+- [ ] Kubernetes values generator
+
+## Questions?
+
+See `config-improve-todo.md` for the complete implementation plan.
diff --git a/pkg/config/codegen/schema.go b/pkg/config/codegen/schema.go
new file mode 100644
index 00000000..eb917aaf
--- /dev/null
+++ b/pkg/config/codegen/schema.go
@@ -0,0 +1,69 @@
+package codegen
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/invopop/jsonschema"
+	"github.com/janhq/jan-server/pkg/config"
+)
+
+// GenerateJSONSchema generates JSON Schema files from Go structs
+func GenerateJSONSchema(outputDir string) error {
+	if err := os.MkdirAll(outputDir, 0755); err != nil {
+		return fmt.Errorf("create output directory: %w", err)
+	}
+
+	// Generate main config schema
+	reflector := &jsonschema.Reflector{
+		AllowAdditionalProperties: false,
+		DoNotReference:            false,
+		ExpandedStruct:            true,
+	}
+
+	schema := reflector.Reflect(&config.Config{})
+	schema.Title = "Jan Server Configuration"
+	schema.Description = "Complete configuration schema for Jan Server infrastructure and services"
+	schema.Version = "1.0.0"
+
+	// Write main schema
+	mainSchemaPath := filepath.Join(outputDir, "config.schema.json")
+	if err := writeSchemaFile(mainSchemaPath, schema); err != nil {
+		return fmt.Errorf("write main schema: %w", err)
+	}
+
+	fmt.Printf("✓ Generated %s\n", mainSchemaPath)
+
+	// Generate per-section schemas for better modularity
+	sections := map[string]interface{}{
+		"infrastructure": config.InfrastructureConfig{},
+		"services":       config.ServicesConfig{},
+		"inference":      config.InferenceConfig{},
+		"monitoring":     config.MonitoringConfig{},
+	}
+
+	for name, typ := range sections {
+		sectionSchema := reflector.Reflect(typ)
+		sectionPath := filepath.Join(outputDir, fmt.Sprintf("%s.schema.json", name))
+		if err := writeSchemaFile(sectionPath, sectionSchema); err != nil {
+			return fmt.Errorf("write %s schema: %w", name, err)
+		}
+		fmt.Printf("✓ Generated %s\n", sectionPath)
+	}
+
+	return nil
+}
+
+func writeSchemaFile(path string, schema *jsonschema.Schema) error {
+	data, err := schema.MarshalJSON()
+	if err != nil {
+		return fmt.Errorf("marshal schema: %w", err)
+	}
+
+	if err := os.WriteFile(path, data, 0644); err != nil {
+		return fmt.Errorf("write file: %w", err)
+	}
+
+	return nil
+}
diff --git a/pkg/config/codegen/yaml.go b/pkg/config/codegen/yaml.go
new file mode 100644
index 00000000..44ba868f
--- /dev/null
+++ b/pkg/config/codegen/yaml.go
@@ -0,0 +1,204 @@
+package codegen
+
+import (
+	"fmt"
+	"os"
+	"reflect"
+	"time"
+
+	"github.com/janhq/jan-server/pkg/config"
+	"gopkg.in/yaml.v3"
+)
+
+// GenerateDefaultsYAML generates config/defaults.yaml from Go struct default tags
+func GenerateDefaultsYAML(outputPath string) error {
+	cfg := buildDefaultConfig()
+
+	// Create YAML encoder
+	f, err := os.Create(outputPath)
+	if err != nil {
+		return fmt.Errorf("create output file: %w", err)
+	}
+	defer f.Close()
+
+	// Write header comments
+	header := `# Jan Server Default Configuration
+# Generated from pkg/config/types.go
+# DO NOT EDIT MANUALLY - this file is auto-generated
+#
+# To customize, create environment-specific overrides in:
+#   - config/environments/development.yaml
+#   - config/environments/staging.yaml
+#   - config/environments/production.yaml
+
+`
+	if _, err := f.WriteString(header); err != nil {
+		return fmt.Errorf("write header: %w", err)
+	}
+
+	// Encode YAML
+	encoder := yaml.NewEncoder(f)
+	encoder.SetIndent(2)
+
+	if err := encoder.Encode(cfg); err != nil {
+		return fmt.Errorf("encode yaml: %w", err)
+	}
+
+	fmt.Printf("✓ Generated %s\n", outputPath)
+	return nil
+}
+
+// buildDefaultConfig creates a Config with all default values from struct tags
+func buildDefaultConfig() *config.Config {
+	return &config.Config{
+		Meta: config.MetaConfig{
+			Version:     "1.0.0",
+			Environment: "development",
+		},
+		Infrastructure: config.InfrastructureConfig{
+			Database: config.DatabaseConfig{
+				Postgres: config.PostgresConfig{
+					Host:            "api-db",
+					Port:            5432,
+					User:            "jan_user",
+					Database:        "jan_llm_api",
+					Password:        "", // From secrets
+					SSLMode:         "disable",
+					MaxConnections:  100,
+					MaxIdleConns:    5,
+					MaxOpenConns:    15,
+					ConnMaxLifetime: 30 * time.Minute,
+				},
+			},
+			Auth: config.AuthConfig{
+				Keycloak: config.KeycloakConfig{
+					BaseURL:             "http://keycloak:8085",
+					PublicURL:           "",
+					Realm:               "jan",
+					HTTPPort:            8085,
+					AdminUser:           "admin",
+					AdminPassword:       "", // From secrets
+					AdminRealm:          "master",
+					AdminClientID:       "admin-cli",
+					BackendClientID:     "backend",
+					BackendClientSecret: "", // From secrets
+					Client:              "jan-client",
+					OAuthRedirectURI:    "http://localhost:8000/auth/callback",
+					JWKSURL:             "",
+					OIDCDiscoveryURL:    "",
+					Issuer:              "http://localhost:8085/realms/jan",
+					Account:             "account",
+					RefreshJWKSInterval: 5 * time.Minute,
+					AuthClockSkew:       60 * time.Second,
+					GuestRole:           "guest",
+					Features:            []string{"token-exchange", "preview"},
+				},
+			},
+			Gateway: config.GatewayConfig{
+				Kong: config.KongConfig{
+					HTTPPort:  8000,
+					AdminPort: 8001,
+					AdminURL:  "http://kong:8001",
+					LogLevel:  "info",
+				},
+			},
+		},
+		Services: config.ServicesConfig{
+			LLMAPI: config.LLMAPIConfig{
+				HTTPPort:               8080,
+				MetricsPort:            9091,
+				LogLevel:               "info",
+				LogFormat:              "json",
+				AutoMigrate:            true,
+				ProviderConfigFile:     "config/providers.yml",
+				ProviderConfigSet:      "default",
+				ProviderConfigsEnabled: true,
+				APIKey: config.APIKeyConfig{
+					Prefix:     "sk_live",
+					DefaultTTL: 2160 * time.Hour,
+					MaxTTL:     2160 * time.Hour,
+					MaxPerUser: 5,
+				},
+				ModelProviderSecret:      "jan-model-provider-secret-2024",
+				ModelSyncEnabled:         true,
+				ModelSyncIntervalMinutes: 60,
+				MediaResolveURL:          "http://kong:8000/media/v1/media/resolve",
+				MediaResolveTimeout:      5 * time.Second,
+			},
+			MCPTools: config.MCPToolsConfig{
+				HTTPPort:               8091,
+				LogLevel:               "info",
+				LogFormat:              "json",
+				SearchEngine:           "serper",
+				SerperAPIKey:           "", // From secrets
+				SearxngURL:             "http://searxng:8080",
+				VectorStoreURL:         "http://vector-store:3015",
+				SandboxFusionURL:       "http://sandboxfusion:8080",
+				SandboxRequireApproval: true,
+				MCPConfigFile:          "configs/mcp-providers.yml",
+			},
+			MediaAPI: config.MediaAPIConfig{
+				HTTPPort:           8285,
+				LogLevel:           "info",
+				MaxUploadBytes:     20971520, // 20MB
+				RetentionDays:      30,
+				ProxyDownload:      true,
+				RemoteFetchTimeout: 15 * time.Second,
+				S3: config.S3Config{
+					Endpoint:       "https://s3.menlo.ai",
+					PublicEndpoint: "",
+					Region:         "us-west-2",
+					Bucket:         "platform-dev",
+					AccessKey:      "", // From secrets
+					SecretKey:      "", // From secrets
+					UsePathStyle:   true,
+					PresignTTL:     5 * time.Minute,
+				},
+			},
+			ResponseAPI: config.ResponseAPIConfig{
+				HTTPPort:     8082,
+				LogLevel:     "info",
+				LLMAPIURL:    "http://llm-api:8080",
+				MCPToolsURL:  "http://mcp-tools:8091",
+				MaxToolDepth: 8,
+				ToolTimeout:  45 * time.Second,
+			},
+		},
+		Inference: config.InferenceConfig{
+			VLLM: config.VLLMConfig{
+				Enabled:        true,
+				Port:           8101,
+				Model:          "Qwen/Qwen2.5-0.5B-Instruct",
+				ServedName:     "qwen2.5-0.5b-instruct",
+				GPUUtilization: 0.66,
+				InternalKey:    "", // From secrets
+				HFToken:        "", // From secrets
+			},
+		},
+		Monitoring: config.MonitoringConfig{
+			OTEL: config.OTELConfig{
+				Enabled:     false,
+				ServiceName: "llm-api",
+				Endpoint:    "http://otel-collector:4318",
+				HTTPPort:    4318,
+				GRPCPort:    4317,
+			},
+			Prometheus: config.PrometheusConfig{
+				Port: 9090,
+			},
+			Grafana: config.GrafanaConfig{
+				Port:          3001,
+				AdminUser:     "admin",
+				AdminPassword: "", // From secrets
+			},
+			Jaeger: config.JaegerConfig{
+				UIPort: 16686,
+			},
+		},
+	}
+}
+
+// getStructTag extracts a specific tag value from a struct field
+func getStructTag(field reflect.StructField, tagName string) string {
+	return field.Tag.Get(tagName)
+}
diff --git a/pkg/config/compose/generator.go b/pkg/config/compose/generator.go
new file mode 100644
index 00000000..cd18afe6
--- /dev/null
+++ b/pkg/config/compose/generator.go
@@ -0,0 +1,481 @@
+package compose
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"path/filepath"
+	"text/template"
+
+	"gopkg.in/yaml.v3"
+)
+
+// ConfigData represents the configuration data for templates
+type ConfigData map[string]interface{}
+
+// Generator generates docker-compose files from config
+type Generator struct {
+	config    ConfigData
+	templates map[string]*template.Template
+}
+
+// NewGenerator creates a new compose generator
+func NewGenerator(cfg ConfigData) *Generator {
+	return &Generator{
+		config:    cfg,
+		templates: make(map[string]*template.Template),
+	}
+}
+
+// GenerateInfrastructure generates docker-compose for infrastructure services
+func (g *Generator) GenerateInfrastructure(outputPath string) error {
+	tmpl := `# Generated from config/defaults.yaml
+# DO NOT EDIT - Changes will be overwritten
+# To modify, edit config YAML and run: make compose-generate
+
+services:
+  # PostgreSQL Database
+  api-db:
+    image: postgres:16-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_USER: {{ .Database.Postgres.User }}
+      POSTGRES_PASSWORD: {{ .Database.Postgres.Password }}
+      POSTGRES_DB: {{ .Database.Postgres.Database }}
+    ports:
+      - "{{ .Database.Postgres.Port }}:5432"
+    volumes:
+      - api-db-data:/var/lib/postgresql/data
+      - ./docker/postgres/init:/docker-entrypoint-initdb.d
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U {{ .Database.Postgres.User }} -d {{ .Database.Postgres.Database }}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - jan-network
+    profiles:
+      - infra
+      - full
+
+  # Keycloak Database
+  keycloak-db:
+    image: postgres:16-alpine
+    restart: unless-stopped
+    environment:
+      POSTGRES_USER: {{ .Auth.Keycloak.DbUser }}
+      POSTGRES_PASSWORD: {{ .Auth.Keycloak.DbPassword }}
+      POSTGRES_DB: {{ .Auth.Keycloak.DbDatabase }}
+    ports:
+      - "{{ .Auth.Keycloak.DbPort }}:5432"
+    volumes:
+      - keycloak-db-data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U {{ .Auth.Keycloak.DbUser }}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - jan-network
+    profiles:
+      - infra
+      - full
+
+  # Keycloak
+  keycloak:
+    image: quay.io/keycloak/keycloak:24.0.5
+    command: start-dev --import-realm
+    restart: unless-stopped
+    depends_on:
+      keycloak-db:
+        condition: service_healthy
+    environment:
+      KC_DB: postgres
+      KC_DB_URL_HOST: keycloak-db
+      KC_DB_URL_PORT: 5432
+      KC_DB_URL_DATABASE: {{ .Auth.Keycloak.DbDatabase }}
+      KC_DB_USERNAME: {{ .Auth.Keycloak.DbUser }}
+      KC_DB_PASSWORD: {{ .Auth.Keycloak.DbPassword }}
+      KC_HTTP_PORT: {{ .Auth.Keycloak.HttpPort }}
+      KEYCLOAK_ADMIN: {{ .Auth.Keycloak.AdminUser }}
+      KEYCLOAK_ADMIN_PASSWORD: {{ .Auth.Keycloak.AdminPassword }}
+    ports:
+      - "{{ .Auth.Keycloak.HttpPort }}:{{ .Auth.Keycloak.HttpPort }}"
+    volumes:
+      - ./keycloak/import:/opt/keycloak/data/import:ro
+    healthcheck:
+      test: ["CMD-SHELL", "exec 3<>/dev/tcp/localhost/{{ .Auth.Keycloak.HttpPort }} || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 30
+    networks:
+      - jan-network
+    profiles:
+      - infra
+      - full
+
+  # Kong Gateway
+  kong:
+    image: kong:3.7.1-ubuntu
+    restart: unless-stopped
+    environment:
+      KONG_DATABASE: "off"
+      KONG_DECLARATIVE_CONFIG: /kong/kong.yml
+      KONG_PROXY_ACCESS_LOG: /dev/stdout
+      KONG_ADMIN_ACCESS_LOG: /dev/stdout
+      KONG_PROXY_ERROR_LOG: /dev/stderr
+      KONG_ADMIN_ERROR_LOG: /dev/stderr
+      KONG_ADMIN_LISTEN: "0.0.0.0:{{ .Gateway.Kong.AdminPort }}"
+      KONG_LOG_LEVEL: {{ .Gateway.Kong.LogLevel }}
+    ports:
+      - "{{ .Gateway.Kong.HttpPort }}:8000"
+      - "{{ .Gateway.Kong.AdminPort }}:8001"
+    volumes:
+      - ./kong/kong.yml:/kong/kong.yml:ro
+      - ./kong/plugins:/usr/local/share/lua/5.1/kong/plugins:ro
+    healthcheck:
+      test: ["CMD", "kong", "health"]
+      interval: 10s
+      timeout: 10s
+      retries: 10
+    networks:
+      - jan-network
+    profiles:
+      - infra
+      - full
+
+volumes:
+  api-db-data:
+  keycloak-db-data:
+
+networks:
+  jan-network:
+    driver: bridge
+`
+
+	t, err := template.New("infrastructure").Parse(tmpl)
+	if err != nil {
+		return fmt.Errorf("parse template: %w", err)
+	}
+
+	var buf bytes.Buffer
+	if err := t.Execute(&buf, g.config); err != nil {
+		return fmt.Errorf("execute template: %w", err)
+	}
+
+	if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
+		return fmt.Errorf("create output directory: %w", err)
+	}
+
+	if err := os.WriteFile(outputPath, buf.Bytes(), 0644); err != nil {
+		return fmt.Errorf("write file: %w", err)
+	}
+
+	return nil
+}
+
+// GenerateServices generates docker-compose for API services
+func (g *Generator) GenerateServices(outputPath string) error {
+	tmpl := `# Generated from config/defaults.yaml
+# DO NOT EDIT - Changes will be overwritten
+# To modify, edit config YAML and run: make compose-generate
+
+services:
+  # LLM API Service
+  llm-api:
+    build: ../services/llm-api
+    restart: unless-stopped
+    depends_on:
+      api-db:
+        condition: service_healthy
+    environment:
+      # HTTP Server
+      HTTP_PORT: {{ .Services.LLMApi.HttpPort }}
+      METRICS_PORT: {{ .Services.LLMApi.MetricsPort }}
+      
+      # Database (constructed DSN)
+      DB_POSTGRESQL_WRITE_DSN: "postgres://{{ .Database.Postgres.User }}:{{ .Database.Postgres.Password }}@api-db:5432/{{ .Database.Postgres.Database }}?sslmode=disable"
+      DB_POSTGRESQL_READ1_DSN: ""
+      
+      # Auth
+      KEYCLOAK_BASE_URL: {{ .Auth.Keycloak.BaseUrl }}
+      KEYCLOAK_REALM: {{ .Auth.Keycloak.Realm }}
+      KEYCLOAK_ADMIN: {{ .Auth.Keycloak.AdminUser }}
+      KEYCLOAK_ADMIN_PASSWORD: {{ .Auth.Keycloak.AdminPassword }}
+      BACKEND_CLIENT_ID: {{ .Auth.Keycloak.BackendClientId }}
+      BACKEND_CLIENT_SECRET: {{ .Auth.Keycloak.BackendClientSecret }}
+      CLIENT: {{ .Auth.Keycloak.Client }}
+      OAUTH_REDIRECT_URI: {{ .Auth.Keycloak.OAuthRedirectUri }}
+      JWKS_URL: {{ .Auth.Keycloak.JwksUrl }}
+      ISSUER: {{ .Auth.Keycloak.Issuer }}
+      ACCOUNT: {{ .Auth.Keycloak.Account }}
+      
+      # API Keys
+      API_KEY_PREFIX: {{ .Services.LLMApi.ApiKeys.Prefix }}
+      API_KEY_DEFAULT_TTL: {{ .Services.LLMApi.ApiKeys.DefaultTtl }}
+      API_KEY_MAX_TTL: {{ .Services.LLMApi.ApiKeys.MaxTtl }}
+      API_KEY_MAX_PER_USER: {{ .Services.LLMApi.ApiKeys.MaxPerUser }}
+      
+      # Gateway
+      KONG_ADMIN_URL: {{ .Gateway.Kong.AdminUrl }}
+      
+      # Model Provider
+      MODEL_PROVIDER_SECRET: {{ .Services.LLMApi.ModelProvider.Secret }}
+      JAN_PROVIDER_CONFIGS: {{ .Services.LLMApi.ModelProvider.Enabled }}
+      JAN_PROVIDER_CONFIG_SET: {{ .Services.LLMApi.ModelProvider.ConfigSet }}
+      JAN_PROVIDER_CONFIGS_FILE: {{ .Services.LLMApi.ModelProvider.ConfigFile }}
+      
+      # Model Sync
+      MODEL_SYNC_ENABLED: {{ .Services.LLMApi.ModelSync.Enabled }}
+      MODEL_SYNC_INTERVAL_MINUTES: {{ .Services.LLMApi.ModelSync.IntervalMinutes }}
+      
+      # Logging
+      LOG_LEVEL: {{ .Services.LLMApi.LogLevel }}
+      LOG_FORMAT: {{ .Services.LLMApi.LogFormat }}
+      
+      # Features
+      AUTO_MIGRATE: {{ .Services.LLMApi.AutoMigrate }}
+      OTEL_ENABLED: {{ .Monitoring.Otel.Enabled }}
+    ports:
+      - "{{ .Services.LLMApi.HttpPort }}:{{ .Services.LLMApi.HttpPort }}"
+      - "{{ .Services.LLMApi.MetricsPort }}:{{ .Services.LLMApi.MetricsPort }}"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:{{ .Services.LLMApi.HttpPort }}/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - jan-network
+    profiles:
+      - api
+      - full
+
+  # Media API Service
+  media-api:
+    build: ../services/media-api
+    restart: unless-stopped
+    depends_on:
+      api-db:
+        condition: service_healthy
+    environment:
+      # HTTP Server
+      MEDIA_HTTP_PORT: {{ .Services.MediaApi.HttpPort }}
+      
+      # Database
+      DB_POSTGRESQL_WRITE_DSN: "postgres://{{ .Database.Postgres.User }}:{{ .Database.Postgres.Password }}@api-db:5432/{{ .Database.Postgres.Database }}?sslmode=disable"
+      DB_POSTGRESQL_READ1_DSN: ""
+      
+      # S3 Storage
+      MEDIA_S3_BUCKET_NAME: {{ .Services.MediaApi.S3.BucketName }}
+      MEDIA_S3_REGION: {{ .Services.MediaApi.S3.Region }}
+      MEDIA_S3_ENDPOINT: {{ .Services.MediaApi.S3.Endpoint }}
+      MEDIA_S3_ACCESS_KEY_ID: {{ .Services.MediaApi.S3.AccessKeyId }}
+      MEDIA_S3_SECRET_ACCESS_KEY: {{ .Services.MediaApi.S3.SecretAccessKey }}
+      MEDIA_S3_USE_SSL: {{ .Services.MediaApi.S3.UseSsl }}
+      MEDIA_S3_USE_PATH_STYLE: {{ .Services.MediaApi.S3.UsePathStyle }}
+      
+      # Features
+      MEDIA_MAX_UPLOAD_BYTES: {{ .Services.MediaApi.MaxUploadBytes }}
+      MEDIA_RETENTION_DAYS: {{ .Services.MediaApi.RetentionDays }}
+      
+      # Auth
+      MEDIA_JWKS_URL: {{ .Auth.Keycloak.JwksUrl }}
+      MEDIA_ISSUER: {{ .Auth.Keycloak.Issuer }}
+      MEDIA_AUDIENCE: {{ .Auth.Keycloak.Account }}
+      
+      # Logging
+      MEDIA_LOG_LEVEL: {{ .Services.MediaApi.LogLevel }}
+      MEDIA_LOG_FORMAT: {{ .Services.MediaApi.LogFormat }}
+      MEDIA_OTEL_ENABLED: {{ .Monitoring.Otel.Enabled }}
+    ports:
+      - "{{ .Services.MediaApi.HttpPort }}:{{ .Services.MediaApi.HttpPort }}"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:{{ .Services.MediaApi.HttpPort }}/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - jan-network
+    profiles:
+      - api
+      - full
+
+  # Response API Service
+  response-api:
+    build: ../services/response-api
+    restart: unless-stopped
+    depends_on:
+      api-db:
+        condition: service_healthy
+    environment:
+      # HTTP Server
+      RESPONSE_HTTP_PORT: {{ .Services.ResponseApi.HttpPort }}
+      
+      # Database
+      DB_POSTGRESQL_WRITE_DSN: "postgres://{{ .Database.Postgres.User }}:{{ .Database.Postgres.Password }}@api-db:5432/{{ .Database.Postgres.Database }}?sslmode=disable"
+      DB_POSTGRESQL_READ1_DSN: ""
+      
+      # Service Integration
+      RESPONSE_LLM_API_URL: {{ .Services.ResponseApi.LlmApiUrl }}
+      RESPONSE_MCP_TOOLS_URL: {{ .Services.ResponseApi.McpToolsUrl }}
+      
+      # Features
+      RESPONSE_MAX_TOOL_DEPTH: {{ .Services.ResponseApi.MaxToolDepth }}
+      
+      # Auth
+      RESPONSE_JWKS_URL: {{ .Auth.Keycloak.JwksUrl }}
+      RESPONSE_ISSUER: {{ .Auth.Keycloak.Issuer }}
+      RESPONSE_AUDIENCE: {{ .Auth.Keycloak.Account }}
+      
+      # Logging
+      RESPONSE_LOG_LEVEL: {{ .Services.ResponseApi.LogLevel }}
+      RESPONSE_LOG_FORMAT: {{ .Services.ResponseApi.LogFormat }}
+      RESPONSE_OTEL_ENABLED: {{ .Monitoring.Otel.Enabled }}
+    ports:
+      - "{{ .Services.ResponseApi.HttpPort }}:{{ .Services.ResponseApi.HttpPort }}"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:{{ .Services.ResponseApi.HttpPort }}/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - jan-network
+    profiles:
+      - api
+      - full
+
+networks:
+  jan-network:
+    external: true
+`
+
+	t, err := template.New("services").Parse(tmpl)
+	if err != nil {
+		return fmt.Errorf("parse template: %w", err)
+	}
+
+	var buf bytes.Buffer
+	if err := t.Execute(&buf, g.config); err != nil {
+		return fmt.Errorf("execute template: %w", err)
+	}
+
+	if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
+		return fmt.Errorf("create output directory: %w", err)
+	}
+
+	if err := os.WriteFile(outputPath, buf.Bytes(), 0644); err != nil {
+		return fmt.Errorf("write file: %w", err)
+	}
+
+	return nil
+}
+
+// GenerateMCP generates docker-compose for MCP services
+func (g *Generator) GenerateMCP(outputPath string) error {
+	tmpl := `# Generated from config/defaults.yaml
+# DO NOT EDIT - Changes will be overwritten
+# To modify, edit config YAML and run: make compose-generate
+
+services:
+  # MCP Tools Service
+  mcp-tools:
+    build: ../services/mcp-tools
+    restart: unless-stopped
+    environment:
+      MCP_TOOLS_HTTP_PORT: {{ .Services.McpTools.HttpPort }}
+      MCP_TOOLS_LOG_LEVEL: {{ .Services.McpTools.LogLevel }}
+      MCP_TOOLS_LOG_FORMAT: {{ .Services.McpTools.LogFormat }}
+      MCP_SEARCH_ENGINE: {{ .Services.McpTools.SearchEngine }}
+      MCP_TOOLS_OTEL_ENABLED: {{ .Monitoring.Otel.Enabled }}
+      
+      # External Services
+      VECTOR_STORE_URL: {{ .Services.McpTools.VectorStoreUrl }}
+      SEARXNG_URL: {{ .Services.McpTools.SearxngUrl }}
+      SANDBOXFUSION_URL: {{ .Services.McpTools.SandboxFusionUrl }}
+    ports:
+      - "{{ .Services.McpTools.HttpPort }}:{{ .Services.McpTools.HttpPort }}"
+    healthcheck:
+      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:{{ .Services.McpTools.HttpPort }}/healthz"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    networks:
+      - jan-network
+    profiles:
+      - mcp
+      - full
+
+  # Vector Store
+  vector-store:
+    image: qdrant/qdrant:v1.7.4
+    restart: unless-stopped
+    ports:
+      - "{{ .Services.McpTools.VectorStore.Port }}:6333"
+    volumes:
+      - vector-store-data:/qdrant/storage
+    networks:
+      - jan-network
+    profiles:
+      - mcp
+      - full
+
+volumes:
+  vector-store-data:
+
+networks:
+  jan-network:
+    external: true
+`
+
+	t, err := template.New("mcp").Parse(tmpl)
+	if err != nil {
+		return fmt.Errorf("parse template: %w", err)
+	}
+
+	var buf bytes.Buffer
+	if err := t.Execute(&buf, g.config); err != nil {
+		return fmt.Errorf("execute template: %w", err)
+	}
+
+	if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
+		return fmt.Errorf("create output directory: %w", err)
+	}
+
+	if err := os.WriteFile(outputPath, buf.Bytes(), 0644); err != nil {
+		return fmt.Errorf("write file: %w", err)
+	}
+
+	return nil
+}
+
+// GenerateAll generates all compose files
+func (g *Generator) GenerateAll(outputDir string) error {
+	if err := g.GenerateInfrastructure(filepath.Join(outputDir, "docker-compose.infrastructure.generated.yml")); err != nil {
+		return fmt.Errorf("generate infrastructure: %w", err)
+	}
+
+	if err := g.GenerateServices(filepath.Join(outputDir, "docker-compose.services.generated.yml")); err != nil {
+		return fmt.Errorf("generate services: %w", err)
+	}
+
+	if err := g.GenerateMCP(filepath.Join(outputDir, "docker-compose.mcp.generated.yml")); err != nil {
+		return fmt.Errorf("generate mcp: %w", err)
+	}
+
+	return nil
+}
+
+// ValidateGenerated validates that generated files are valid YAML
+func (g *Generator) ValidateGenerated(filePath string) error {
+	data, err := os.ReadFile(filePath)
+	if err != nil {
+		return fmt.Errorf("read file: %w", err)
+	}
+
+	var result map[string]interface{}
+	if err := yaml.Unmarshal(data, &result); err != nil {
+		return fmt.Errorf("invalid YAML: %w", err)
+	}
+
+	return nil
+}
diff --git a/pkg/config/k8s/examples/generate_values.go b/pkg/config/k8s/examples/generate_values.go
new file mode 100644
index 00000000..e2f54a62
--- /dev/null
+++ b/pkg/config/k8s/examples/generate_values.go
@@ -0,0 +1,55 @@
+// Package main demonstrates K8s values generation
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+
+	"github.com/janhq/jan-server/pkg/config"
+	"github.com/janhq/jan-server/pkg/config/k8s"
+	"gopkg.in/yaml.v3"
+)
+
+func main() {
+	// Load configuration
+	loader := config.NewConfigLoader("development", "../../config/defaults.yaml")
+	cfg, err := loader.Load(context.Background())
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error loading config: %v\n", err)
+		os.Exit(1)
+	}
+
+	// Create generator
+	generator := k8s.NewValuesGenerator(cfg)
+
+	// Generate for development
+	fmt.Println("=== Generating Helm Values ===")
+
+	// Write to file
+	outputPath := "values-dev.yaml"
+	if err := generator.GenerateToFile(outputPath); err != nil {
+		fmt.Fprintf(os.Stderr, "Error writing file: %v\n", err)
+		os.Exit(1)
+	}
+
+	fmt.Printf("✓ Development values written to: %s\n", outputPath)
+
+	// Generate production overrides
+	prodPath := "values-prod.yaml"
+	values, err := generator.GenerateWithOverrides("production")
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error generating production values: %v\n", err)
+		os.Exit(1)
+	}
+
+	data, _ := yaml.Marshal(values)
+	header := fmt.Sprintf("# Helm values generated from configuration\n# Environment: production\n# Version: %s\n\n", cfg.Meta.Version)
+	if err := os.WriteFile(prodPath, []byte(header+string(data)), 0644); err != nil {
+		fmt.Fprintf(os.Stderr, "Error writing production file: %v\n", err)
+		os.Exit(1)
+	}
+
+	fmt.Printf("✓ Production values written to: %s\n", prodPath)
+	fmt.Println("\nDone!")
+}
diff --git a/pkg/config/k8s/examples/values-dev.yaml b/pkg/config/k8s/examples/values-dev.yaml
new file mode 100644
index 00000000..2e9af234
--- /dev/null
+++ b/pkg/config/k8s/examples/values-dev.yaml
@@ -0,0 +1,158 @@
+# Helm values generated from configuration
+# Environment: development
+# Version: 1.0.0
+
+global:
+    environment: development
+    imagePullPolicy: IfNotPresent
+    labels:
+        app.kubernetes.io/environment: development
+        app.kubernetes.io/name: jan-server
+        app.kubernetes.io/version: 1.0.0
+services:
+    llm-api:
+        enabled: true
+        replicaCount: 2
+        image:
+            repository: jan-llm-api
+            tag: 1.0.0
+        service:
+            type: ClusterIP
+            port: 8080
+            targetPort: 8080
+        resources:
+            limits:
+                cpu: 1000m
+                memory: 1Gi
+            requests:
+                cpu: 500m
+                memory: 512Mi
+        configMap:
+            LOG_FORMAT: json
+            LOG_LEVEL: info
+        secrets:
+            - database-credentials
+            - keycloak-credentials
+        healthChecks:
+            livenessProbe:
+                httpGet:
+                    path: /health
+                    port: 8080
+                initialDelaySeconds: 30
+                periodSeconds: 10
+                timeoutSeconds: 5
+                failureThreshold: 3
+            readinessProbe:
+                httpGet:
+                    path: /health
+                    port: 8080
+                initialDelaySeconds: 10
+                periodSeconds: 5
+                timeoutSeconds: 3
+                failureThreshold: 3
+    mcp-tools:
+        enabled: true
+        replicaCount: 2
+        image:
+            repository: jan-mcp-tools
+            tag: 1.0.0
+        service:
+            type: ClusterIP
+            port: 8091
+            targetPort: 8091
+        resources:
+            limits:
+                cpu: 500m
+                memory: 512Mi
+            requests:
+                cpu: 250m
+                memory: 256Mi
+        configMap:
+            LOG_FORMAT: json
+            LOG_LEVEL: info
+        healthChecks:
+            livenessProbe:
+                httpGet:
+                    path: /health
+                    port: 8091
+                initialDelaySeconds: 20
+                periodSeconds: 10
+                timeoutSeconds: 5
+                failureThreshold: 3
+            readinessProbe:
+                httpGet:
+                    path: /health
+                    port: 8091
+                initialDelaySeconds: 10
+                periodSeconds: 5
+                timeoutSeconds: 3
+                failureThreshold: 3
+    media-api:
+        enabled: true
+        replicaCount: 2
+        image:
+            repository: jan-media-api
+            tag: 1.0.0
+        service:
+            type: ClusterIP
+            port: 8285
+            targetPort: 8285
+        resources:
+            limits:
+                cpu: 500m
+                memory: 512Mi
+            requests:
+                cpu: 250m
+                memory: 256Mi
+    response-api:
+        enabled: true
+        replicaCount: 2
+        image:
+            repository: jan-response-api
+            tag: 1.0.0
+        service:
+            type: ClusterIP
+            port: 8082
+            targetPort: 8082
+        resources:
+            limits:
+                cpu: 500m
+                memory: 512Mi
+            requests:
+                cpu: 250m
+                memory: 256Mi
+infrastructure:
+    database:
+        postgres:
+            enabled: true
+            host: api-db
+            port: 5432
+            database: jan_llm_api
+            user: jan_user
+            passwordSecret: postgres-password
+            sslMode: disable
+            maxConnections: 100
+            resources:
+                limits:
+                    cpu: 2000m
+                    memory: 2Gi
+                requests:
+                    cpu: 1000m
+                    memory: 1Gi
+            persistence:
+                enabled: true
+                size: 10Gi
+    auth:
+        keycloak:
+            enabled: true
+            baseURL: http://localhost:8085
+            adminUser: admin
+            adminRealm: master
+            passwordSecret: keycloak-admin-password
+            resources:
+                limits:
+                    cpu: 1000m
+                    memory: 1Gi
+                requests:
+                    cpu: 500m
+                    memory: 512Mi
diff --git a/pkg/config/k8s/examples/values-prod.yaml b/pkg/config/k8s/examples/values-prod.yaml
new file mode 100644
index 00000000..c1226a2a
--- /dev/null
+++ b/pkg/config/k8s/examples/values-prod.yaml
@@ -0,0 +1,158 @@
+# Helm values generated from configuration
+# Environment: production
+# Version: 1.0.0
+
+global:
+    environment: development
+    imagePullPolicy: Always
+    labels:
+        app.kubernetes.io/environment: development
+        app.kubernetes.io/name: jan-server
+        app.kubernetes.io/version: 1.0.0
+services:
+    llm-api:
+        enabled: true
+        replicaCount: 3
+        image:
+            repository: jan-llm-api
+            tag: 1.0.0
+        service:
+            type: ClusterIP
+            port: 8080
+            targetPort: 8080
+        resources:
+            limits:
+                cpu: 1000m
+                memory: 1Gi
+            requests:
+                cpu: 500m
+                memory: 512Mi
+        configMap:
+            LOG_FORMAT: json
+            LOG_LEVEL: info
+        secrets:
+            - database-credentials
+            - keycloak-credentials
+        healthChecks:
+            livenessProbe:
+                httpGet:
+                    path: /health
+                    port: 8080
+                initialDelaySeconds: 30
+                periodSeconds: 10
+                timeoutSeconds: 5
+                failureThreshold: 3
+            readinessProbe:
+                httpGet:
+                    path: /health
+                    port: 8080
+                initialDelaySeconds: 10
+                periodSeconds: 5
+                timeoutSeconds: 3
+                failureThreshold: 3
+    mcp-tools:
+        enabled: true
+        replicaCount: 3
+        image:
+            repository: jan-mcp-tools
+            tag: 1.0.0
+        service:
+            type: ClusterIP
+            port: 8091
+            targetPort: 8091
+        resources:
+            limits:
+                cpu: 500m
+                memory: 512Mi
+            requests:
+                cpu: 250m
+                memory: 256Mi
+        configMap:
+            LOG_FORMAT: json
+            LOG_LEVEL: info
+        healthChecks:
+            livenessProbe:
+                httpGet:
+                    path: /health
+                    port: 8091
+                initialDelaySeconds: 20
+                periodSeconds: 10
+                timeoutSeconds: 5
+                failureThreshold: 3
+            readinessProbe:
+                httpGet:
+                    path: /health
+                    port: 8091
+                initialDelaySeconds: 10
+                periodSeconds: 5
+                timeoutSeconds: 3
+                failureThreshold: 3
+    media-api:
+        enabled: true
+        replicaCount: 3
+        image:
+            repository: jan-media-api
+            tag: 1.0.0
+        service:
+            type: ClusterIP
+            port: 8285
+            targetPort: 8285
+        resources:
+            limits:
+                cpu: 500m
+                memory: 512Mi
+            requests:
+                cpu: 250m
+                memory: 256Mi
+    response-api:
+        enabled: true
+        replicaCount: 3
+        image:
+            repository: jan-response-api
+            tag: 1.0.0
+        service:
+            type: ClusterIP
+            port: 8082
+            targetPort: 8082
+        resources:
+            limits:
+                cpu: 500m
+                memory: 512Mi
+            requests:
+                cpu: 250m
+                memory: 256Mi
+infrastructure:
+    database:
+        postgres:
+            enabled: true
+            host: api-db
+            port: 5432
+            database: jan_llm_api
+            user: jan_user
+            passwordSecret: postgres-password
+            sslMode: disable
+            maxConnections: 100
+            resources:
+                limits:
+                    cpu: 2000m
+                    memory: 2Gi
+                requests:
+                    cpu: 1000m
+                    memory: 1Gi
+            persistence:
+                enabled: true
+                size: 50Gi
+    auth:
+        keycloak:
+            enabled: true
+            baseURL: http://localhost:8085
+            adminUser: admin
+            adminRealm: master
+            passwordSecret: keycloak-admin-password
+            resources:
+                limits:
+                    cpu: 1000m
+                    memory: 1Gi
+                requests:
+                    cpu: 500m
+                    memory: 512Mi
diff --git a/pkg/config/k8s/values_generator.go b/pkg/config/k8s/values_generator.go
new file mode 100644
index 00000000..0f4e870b
--- /dev/null
+++ b/pkg/config/k8s/values_generator.go
@@ -0,0 +1,493 @@
+// Package k8s provides Kubernetes Helm values generation from configuration
+package k8s
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/janhq/jan-server/pkg/config"
+	"gopkg.in/yaml.v3"
+)
+
+// ValuesGenerator generates Helm values.yaml from Config
+type ValuesGenerator struct {
+	config *config.Config
+}
+
+// NewValuesGenerator creates a new Helm values generator
+func NewValuesGenerator(cfg *config.Config) *ValuesGenerator {
+	return &ValuesGenerator{config: cfg}
+}
+
+// HelmValues represents the Helm chart values structure
+type HelmValues struct {
+	Global         GlobalValues             `yaml:"global"`
+	Services       map[string]ServiceValues `yaml:"services"`
+	Infrastructure InfrastructureValues     `yaml:"infrastructure"`
+}
+
+// GlobalValues contains global Helm chart settings
+type GlobalValues struct {
+	Environment     string            `yaml:"environment"`
+	ImageRegistry   string            `yaml:"imageRegistry,omitempty"`
+	ImagePullPolicy string            `yaml:"imagePullPolicy,omitempty"`
+	Labels          map[string]string `yaml:"labels,omitempty"`
+	Annotations     map[string]string `yaml:"annotations,omitempty"`
+}
+
+// ServiceValues contains service-specific Helm values
+type ServiceValues struct {
+	Enabled      bool              `yaml:"enabled"`
+	ReplicaCount int               `yaml:"replicaCount,omitempty"`
+	Image        ImageConfig       `yaml:"image,omitempty"`
+	Service      ServiceConfig     `yaml:"service,omitempty"`
+	Resources    ResourceConfig    `yaml:"resources,omitempty"`
+	Env          map[string]string `yaml:"env,omitempty"`
+	ConfigMap    map[string]string `yaml:"configMap,omitempty"`
+	Secrets      []string          `yaml:"secrets,omitempty"`
+	HealthChecks HealthCheckConfig `yaml:"healthChecks,omitempty"`
+}
+
+// ImageConfig contains Docker image configuration
+type ImageConfig struct {
+	Repository string `yaml:"repository"`
+	Tag        string `yaml:"tag"`
+	PullPolicy string `yaml:"pullPolicy,omitempty"`
+}
+
+// ServiceConfig contains Kubernetes service configuration
+type ServiceConfig struct {
+	Type       string `yaml:"type"`
+	Port       int    `yaml:"port"`
+	TargetPort int    `yaml:"targetPort,omitempty"`
+	NodePort   int    `yaml:"nodePort,omitempty"`
+}
+
+// ResourceConfig contains resource limits and requests
+type ResourceConfig struct {
+	Limits   ResourceSpec `yaml:"limits,omitempty"`
+	Requests ResourceSpec `yaml:"requests,omitempty"`
+}
+
+// ResourceSpec contains CPU and memory specs
+type ResourceSpec struct {
+	CPU    string `yaml:"cpu,omitempty"`
+	Memory string `yaml:"memory,omitempty"`
+}
+
+// HealthCheckConfig contains liveness and readiness probes
+type HealthCheckConfig struct {
+	LivenessProbe  ProbeConfig `yaml:"livenessProbe,omitempty"`
+	ReadinessProbe ProbeConfig `yaml:"readinessProbe,omitempty"`
+}
+
+// ProbeConfig contains probe configuration
+type ProbeConfig struct {
+	HTTPGet             HTTPGetAction `yaml:"httpGet,omitempty"`
+	InitialDelaySeconds int           `yaml:"initialDelaySeconds,omitempty"`
+	PeriodSeconds       int           `yaml:"periodSeconds,omitempty"`
+	TimeoutSeconds      int           `yaml:"timeoutSeconds,omitempty"`
+	SuccessThreshold    int           `yaml:"successThreshold,omitempty"`
+	FailureThreshold    int           `yaml:"failureThreshold,omitempty"`
+}
+
+// HTTPGetAction contains HTTP probe configuration
+type HTTPGetAction struct {
+	Path   string `yaml:"path"`
+	Port   int    `yaml:"port"`
+	Scheme string `yaml:"scheme,omitempty"`
+}
+
+// InfrastructureValues contains infrastructure component values
+type InfrastructureValues struct {
+	Database DatabaseValues `yaml:"database"`
+	Auth     AuthValues     `yaml:"auth"`
+}
+
+// DatabaseValues contains database configuration for Helm
+type DatabaseValues struct {
+	Postgres PostgresValues `yaml:"postgres"`
+}
+
+// PostgresValues contains PostgreSQL Helm values
+type PostgresValues struct {
+	Enabled        bool              `yaml:"enabled"`
+	Host           string            `yaml:"host"`
+	Port           int               `yaml:"port"`
+	Database       string            `yaml:"database"`
+	User           string            `yaml:"user"`
+	PasswordSecret string            `yaml:"passwordSecret"`
+	SSLMode        string            `yaml:"sslMode"`
+	MaxConnections int               `yaml:"maxConnections"`
+	Resources      ResourceConfig    `yaml:"resources,omitempty"`
+	Persistence    PersistenceConfig `yaml:"persistence,omitempty"`
+}
+
+// PersistenceConfig contains persistence configuration
+type PersistenceConfig struct {
+	Enabled      bool   `yaml:"enabled"`
+	StorageClass string `yaml:"storageClass,omitempty"`
+	Size         string `yaml:"size"`
+}
+
+// AuthValues contains authentication configuration for Helm
+type AuthValues struct {
+	Keycloak KeycloakValues `yaml:"keycloak"`
+}
+
+// KeycloakValues contains Keycloak Helm values
+type KeycloakValues struct {
+	Enabled        bool           `yaml:"enabled"`
+	BaseURL        string         `yaml:"baseURL"`
+	PublicURL      string         `yaml:"publicURL,omitempty"`
+	AdminUser      string         `yaml:"adminUser"`
+	AdminRealm     string         `yaml:"adminRealm"`
+	PasswordSecret string         `yaml:"passwordSecret"`
+	Resources      ResourceConfig `yaml:"resources,omitempty"`
+}
+
+// Generate creates Helm values from Config
+func (g *ValuesGenerator) Generate() (*HelmValues, error) {
+	values := &HelmValues{
+		Global: GlobalValues{
+			Environment:     g.config.Meta.Environment,
+			ImagePullPolicy: "IfNotPresent",
+			Labels: map[string]string{
+				"app.kubernetes.io/name":        "jan-server",
+				"app.kubernetes.io/version":     g.config.Meta.Version,
+				"app.kubernetes.io/environment": g.config.Meta.Environment,
+			},
+		},
+		Services:       make(map[string]ServiceValues),
+		Infrastructure: g.generateInfrastructure(),
+	}
+
+	// Generate service values
+	if err := g.generateServices(values); err != nil {
+		return nil, err
+	}
+
+	return values, nil
+}
+
+// generateServices creates service-specific Helm values
+func (g *ValuesGenerator) generateServices(values *HelmValues) error {
+	// LLM API
+	values.Services["llm-api"] = ServiceValues{
+		Enabled:      true,
+		ReplicaCount: 2,
+		Image: ImageConfig{
+			Repository: "jan-llm-api",
+			Tag:        g.config.Meta.Version,
+		},
+		Service: ServiceConfig{
+			Type:       "ClusterIP",
+			Port:       g.config.Services.LLMAPI.HTTPPort,
+			TargetPort: g.config.Services.LLMAPI.HTTPPort,
+		},
+		Resources: ResourceConfig{
+			Limits: ResourceSpec{
+				CPU:    "1000m",
+				Memory: "1Gi",
+			},
+			Requests: ResourceSpec{
+				CPU:    "500m",
+				Memory: "512Mi",
+			},
+		},
+		HealthChecks: HealthCheckConfig{
+			LivenessProbe: ProbeConfig{
+				HTTPGet: HTTPGetAction{
+					Path: "/health",
+					Port: g.config.Services.LLMAPI.HTTPPort,
+				},
+				InitialDelaySeconds: 30,
+				PeriodSeconds:       10,
+				TimeoutSeconds:      5,
+				FailureThreshold:    3,
+			},
+			ReadinessProbe: ProbeConfig{
+				HTTPGet: HTTPGetAction{
+					Path: "/health",
+					Port: g.config.Services.LLMAPI.HTTPPort,
+				},
+				InitialDelaySeconds: 10,
+				PeriodSeconds:       5,
+				TimeoutSeconds:      3,
+				FailureThreshold:    3,
+			},
+		},
+		ConfigMap: map[string]string{
+			"LOG_LEVEL":  g.config.Services.LLMAPI.LogLevel,
+			"LOG_FORMAT": g.config.Services.LLMAPI.LogFormat,
+		},
+		Secrets: []string{"database-credentials", "keycloak-credentials"},
+	}
+
+	// MCP Tools
+	values.Services["mcp-tools"] = ServiceValues{
+		Enabled:      true,
+		ReplicaCount: 2,
+		Image: ImageConfig{
+			Repository: "jan-mcp-tools",
+			Tag:        g.config.Meta.Version,
+		},
+		Service: ServiceConfig{
+			Type:       "ClusterIP",
+			Port:       g.config.Services.MCPTools.HTTPPort,
+			TargetPort: g.config.Services.MCPTools.HTTPPort,
+		},
+		Resources: ResourceConfig{
+			Limits: ResourceSpec{
+				CPU:    "500m",
+				Memory: "512Mi",
+			},
+			Requests: ResourceSpec{
+				CPU:    "250m",
+				Memory: "256Mi",
+			},
+		},
+		HealthChecks: HealthCheckConfig{
+			LivenessProbe: ProbeConfig{
+				HTTPGet: HTTPGetAction{
+					Path: "/health",
+					Port: g.config.Services.MCPTools.HTTPPort,
+				},
+				InitialDelaySeconds: 20,
+				PeriodSeconds:       10,
+				TimeoutSeconds:      5,
+				FailureThreshold:    3,
+			},
+			ReadinessProbe: ProbeConfig{
+				HTTPGet: HTTPGetAction{
+					Path: "/health",
+					Port: g.config.Services.MCPTools.HTTPPort,
+				},
+				InitialDelaySeconds: 10,
+				PeriodSeconds:       5,
+				TimeoutSeconds:      3,
+				FailureThreshold:    3,
+			},
+		},
+		ConfigMap: map[string]string{
+			"LOG_LEVEL":  g.config.Services.MCPTools.LogLevel,
+			"LOG_FORMAT": g.config.Services.MCPTools.LogFormat,
+		},
+	}
+
+	// Media API
+	values.Services["media-api"] = ServiceValues{
+		Enabled:      true,
+		ReplicaCount: 2,
+		Image: ImageConfig{
+			Repository: "jan-media-api",
+			Tag:        g.config.Meta.Version,
+		},
+		Service: ServiceConfig{
+			Type:       "ClusterIP",
+			Port:       g.config.Services.MediaAPI.HTTPPort,
+			TargetPort: g.config.Services.MediaAPI.HTTPPort,
+		},
+		Resources: ResourceConfig{
+			Limits: ResourceSpec{
+				CPU:    "500m",
+				Memory: "512Mi",
+			},
+			Requests: ResourceSpec{
+				CPU:    "250m",
+				Memory: "256Mi",
+			},
+		},
+	}
+
+	// Response API
+	values.Services["response-api"] = ServiceValues{
+		Enabled:      true,
+		ReplicaCount: 2,
+		Image: ImageConfig{
+			Repository: "jan-response-api",
+			Tag:        g.config.Meta.Version,
+		},
+		Service: ServiceConfig{
+			Type:       "ClusterIP",
+			Port:       g.config.Services.ResponseAPI.HTTPPort,
+			TargetPort: g.config.Services.ResponseAPI.HTTPPort,
+		},
+		Resources: ResourceConfig{
+			Limits: ResourceSpec{
+				CPU:    "500m",
+				Memory: "512Mi",
+			},
+			Requests: ResourceSpec{
+				CPU:    "250m",
+				Memory: "256Mi",
+			},
+		},
+	}
+
+	return nil
+}
+
+// generateInfrastructure creates infrastructure Helm values
+func (g *ValuesGenerator) generateInfrastructure() InfrastructureValues {
+	return InfrastructureValues{
+		Database: DatabaseValues{
+			Postgres: PostgresValues{
+				Enabled:        true,
+				Host:           g.config.Infrastructure.Database.Postgres.Host,
+				Port:           g.config.Infrastructure.Database.Postgres.Port,
+				Database:       g.config.Infrastructure.Database.Postgres.Database,
+				User:           g.config.Infrastructure.Database.Postgres.User,
+				PasswordSecret: "postgres-password",
+				SSLMode:        g.config.Infrastructure.Database.Postgres.SSLMode,
+				MaxConnections: g.config.Infrastructure.Database.Postgres.MaxConnections,
+				Resources: ResourceConfig{
+					Limits: ResourceSpec{
+						CPU:    "2000m",
+						Memory: "2Gi",
+					},
+					Requests: ResourceSpec{
+						CPU:    "1000m",
+						Memory: "1Gi",
+					},
+				},
+				Persistence: PersistenceConfig{
+					Enabled: true,
+					Size:    "10Gi",
+				},
+			},
+		},
+		Auth: AuthValues{
+			Keycloak: KeycloakValues{
+				Enabled:        true,
+				BaseURL:        g.config.Infrastructure.Auth.Keycloak.BaseURL,
+				PublicURL:      g.config.Infrastructure.Auth.Keycloak.PublicURL,
+				AdminUser:      g.config.Infrastructure.Auth.Keycloak.AdminUser,
+				AdminRealm:     g.config.Infrastructure.Auth.Keycloak.AdminRealm,
+				PasswordSecret: "keycloak-admin-password",
+				Resources: ResourceConfig{
+					Limits: ResourceSpec{
+						CPU:    "1000m",
+						Memory: "1Gi",
+					},
+					Requests: ResourceSpec{
+						CPU:    "500m",
+						Memory: "512Mi",
+					},
+				},
+			},
+		},
+	}
+}
+
+// GenerateToFile writes Helm values to a file
+func (g *ValuesGenerator) GenerateToFile(outputPath string) error {
+	values, err := g.Generate()
+	if err != nil {
+		return fmt.Errorf("generate values: %w", err)
+	}
+
+	data, err := yaml.Marshal(values)
+	if err != nil {
+		return fmt.Errorf("marshal YAML: %w", err)
+	}
+
+	// Add header comment
+	header := fmt.Sprintf("# Helm values generated from configuration\n# Environment: %s\n# Version: %s\n\n",
+		g.config.Meta.Environment, g.config.Meta.Version)
+
+	output := header + string(data)
+
+	if err := os.WriteFile(outputPath, []byte(output), 0644); err != nil {
+		return fmt.Errorf("write file: %w", err)
+	}
+
+	return nil
+}
+
+// GenerateToString returns Helm values as a YAML string
+func (g *ValuesGenerator) GenerateToString() (string, error) {
+	values, err := g.Generate()
+	if err != nil {
+		return "", fmt.Errorf("generate values: %w", err)
+	}
+
+	data, err := yaml.Marshal(values)
+	if err != nil {
+		return "", fmt.Errorf("marshal YAML: %w", err)
+	}
+
+	header := fmt.Sprintf("# Helm values generated from configuration\n# Environment: %s\n# Version: %s\n\n",
+		g.config.Meta.Environment, g.config.Meta.Version)
+
+	return header + string(data), nil
+}
+
+// GenerateWithOverrides generates Helm values with environment-specific overrides
+func (g *ValuesGenerator) GenerateWithOverrides(environment string) (*HelmValues, error) {
+	values, err := g.Generate()
+	if err != nil {
+		return nil, err
+	}
+
+	// Apply environment-specific overrides
+	switch strings.ToLower(environment) {
+	case "production":
+		g.applyProductionOverrides(values)
+	case "staging":
+		g.applyStagingOverrides(values)
+	case "development":
+		g.applyDevelopmentOverrides(values)
+	}
+
+	return values, nil
+}
+
+// applyProductionOverrides applies production-specific settings
+func (g *ValuesGenerator) applyProductionOverrides(values *HelmValues) {
+	values.Global.ImagePullPolicy = "Always"
+
+	// Increase replica counts
+	for name, svc := range values.Services {
+		svc.ReplicaCount = 3
+		values.Services[name] = svc
+	}
+
+	// Enable persistence
+	values.Infrastructure.Database.Postgres.Persistence.Enabled = true
+	values.Infrastructure.Database.Postgres.Persistence.Size = "50Gi"
+}
+
+// applyStagingOverrides applies staging-specific settings
+func (g *ValuesGenerator) applyStagingOverrides(values *HelmValues) {
+	values.Global.ImagePullPolicy = "IfNotPresent"
+
+	// Moderate replica counts
+	for name, svc := range values.Services {
+		svc.ReplicaCount = 2
+		values.Services[name] = svc
+	}
+
+	values.Infrastructure.Database.Postgres.Persistence.Enabled = true
+	values.Infrastructure.Database.Postgres.Persistence.Size = "20Gi"
+}
+
+// applyDevelopmentOverrides applies development-specific settings
+func (g *ValuesGenerator) applyDevelopmentOverrides(values *HelmValues) {
+	values.Global.ImagePullPolicy = "Never"
+
+	// Single replica for development
+	for name, svc := range values.Services {
+		svc.ReplicaCount = 1
+		// Reduce resources for development
+		svc.Resources.Limits.CPU = "500m"
+		svc.Resources.Limits.Memory = "512Mi"
+		svc.Resources.Requests.CPU = "100m"
+		svc.Resources.Requests.Memory = "128Mi"
+		values.Services[name] = svc
+	}
+
+	// Disable persistence in development
+	values.Infrastructure.Database.Postgres.Persistence.Enabled = false
+}
diff --git a/pkg/config/loader.go b/pkg/config/loader.go
new file mode 100644
index 00000000..1414f5bf
--- /dev/null
+++ b/pkg/config/loader.go
@@ -0,0 +1,254 @@
+package config
+
+import (
+	"context"
+	"fmt"
+	"reflect"
+	"strings"
+)
+
+// ConfigLoader loads configuration from multiple sources with explicit precedence
+type ConfigLoader struct {
+	config     *Config
+	sources    []ConfigSource
+	provenance map[string]ProvenanceInfo
+}
+
+// ConfigSource represents a source of configuration values
+type ConfigSource interface {
+	// Load applies configuration from this source to the config
+	Load(ctx context.Context, cfg *Config) error
+
+	// Priority returns the precedence priority (higher = takes precedence)
+	// 100 = Struct defaults (lowest)
+	// 200 = YAML defaults
+	// 300 = Environment YAML
+	// 400 = Secrets
+	// 500 = Environment variables
+	// 600 = CLI flags (highest)
+	Priority() int
+
+	// Name returns the human-readable name of this source
+	Name() string
+}
+
+// ProvenanceInfo tracks where a configuration value came from
+type ProvenanceInfo struct {
+	Source   string      // Name of the ConfigSource
+	Priority int         // Priority level
+	Value    interface{} // The actual value
+	Path     string      // Config path (e.g., "infrastructure.database.postgres.port")
+}
+
+// LoaderOption configures the ConfigLoader
+type LoaderOption func(*ConfigLoader) error
+
+// New creates a new ConfigLoader with the specified options
+func New(ctx context.Context, environment string, opts ...LoaderOption) (*ConfigLoader, error) {
+	loader := &ConfigLoader{
+		config:     &Config{},
+		provenance: make(map[string]ProvenanceInfo),
+	}
+
+	// Apply options
+	for _, opt := range opts {
+		if err := opt(loader); err != nil {
+			return nil, fmt.Errorf("apply option: %w", err)
+		}
+	}
+
+	// Build default source stack if not provided
+	if len(loader.sources) == 0 {
+		loader.sources = []ConfigSource{
+			&StructDefaultSource{},
+			&YAMLDefaultSource{path: "config/defaults.yaml"},
+			&YAMLEnvSource{environment: environment},
+			&EnvVarSource{},
+		}
+	}
+
+	// Load configuration in precedence order (low to high priority)
+	for _, source := range loader.sources {
+		if err := source.Load(ctx, loader.config); err != nil {
+			return nil, fmt.Errorf("load from %s: %w", source.Name(), err)
+		}
+
+		// Track provenance (will be implemented properly in next iteration)
+		loader.trackProvenance(source)
+	}
+
+	// Validate final configuration
+	if err := loader.Validate(loader.config); err != nil {
+		return nil, fmt.Errorf("validation failed: %w", err)
+	}
+
+	return loader, nil
+}
+
+// NewConfigLoader is a convenience function that creates a loader with default configuration
+func NewConfigLoader(environment, defaultsPath string) *ConfigLoader {
+	if defaultsPath == "" {
+		defaultsPath = "config/defaults.yaml"
+	}
+
+	return &ConfigLoader{
+		config:     &Config{},
+		provenance: make(map[string]ProvenanceInfo),
+		sources: []ConfigSource{
+			&StructDefaultSource{},                   // Priority 100
+			&YAMLDefaultSource{path: defaultsPath},   // Priority 200
+			&YAMLEnvSource{environment: environment}, // Priority 300
+			&EnvVarSource{},                          // Priority 500
+		},
+	}
+}
+
+// Load executes the configuration loading process
+func (l *ConfigLoader) Load(ctx context.Context) (*Config, error) {
+	// Load configuration in precedence order (low to high priority)
+	for _, source := range l.sources {
+		if err := source.Load(ctx, l.config); err != nil {
+			return nil, fmt.Errorf("load from %s: %w", source.Name(), err)
+		}
+
+		// Track provenance
+		l.trackProvenance(source)
+	}
+
+	// Validate final configuration
+	if err := l.Validate(l.config); err != nil {
+		return nil, fmt.Errorf("validation failed: %w", err)
+	}
+
+	return l.config, nil
+}
+
+// WithSources sets custom configuration sources
+func WithSources(sources ...ConfigSource) LoaderOption {
+	return func(l *ConfigLoader) error {
+		l.sources = sources
+		return nil
+	}
+}
+
+// Get returns the loaded configuration
+func (l *ConfigLoader) Get() *Config {
+	return l.config
+}
+
+// AllProvenance returns all provenance information
+func (l *ConfigLoader) AllProvenance() map[string]ProvenanceInfo {
+	return l.provenance
+}
+
+// Provenance returns a human-readable string of all configuration sources
+func (l *ConfigLoader) Provenance() string {
+	var result strings.Builder
+	result.WriteString("Configuration Sources (priority order):\n")
+
+	// Sort sources by priority
+	sortedSources := make([]ConfigSource, len(l.sources))
+	copy(sortedSources, l.sources)
+
+	for i := 0; i < len(sortedSources)-1; i++ {
+		for j := i + 1; j < len(sortedSources); j++ {
+			if sortedSources[i].Priority() > sortedSources[j].Priority() {
+				sortedSources[i], sortedSources[j] = sortedSources[j], sortedSources[i]
+			}
+		}
+	}
+
+	for _, source := range sortedSources {
+		result.WriteString(fmt.Sprintf("  [%d] %s\n", source.Priority(), source.Name()))
+	}
+
+	result.WriteString("\nConfiguration Values by Source:\n")
+	for path, info := range l.provenance {
+		result.WriteString(fmt.Sprintf("  %s: %s (priority %d)\n", path, info.Source, info.Priority))
+	}
+
+	return result.String()
+}
+
+// Validate performs validation on the loaded configuration
+func (l *ConfigLoader) Validate(cfg *Config) error {
+	// Basic validation - check required fields
+	if cfg.Meta.Version == "" {
+		return fmt.Errorf("meta.version is required")
+	}
+	if cfg.Meta.Environment == "" {
+		return fmt.Errorf("meta.environment is required")
+	}
+
+	// Infrastructure validation
+	if cfg.Infrastructure.Database.Postgres.Host == "" {
+		return fmt.Errorf("infrastructure.database.postgres.host is required")
+	}
+	if cfg.Infrastructure.Database.Postgres.Port < 1 || cfg.Infrastructure.Database.Postgres.Port > 65535 {
+		return fmt.Errorf("infrastructure.database.postgres.port must be between 1 and 65535")
+	}
+
+	// Auth validation
+	if cfg.Infrastructure.Auth.Keycloak.BaseURL == "" {
+		return fmt.Errorf("infrastructure.auth.keycloak.base_url is required")
+	}
+
+	// Services validation
+	if cfg.Services.LLMAPI.HTTPPort < 1 || cfg.Services.LLMAPI.HTTPPort > 65535 {
+		return fmt.Errorf("services.llm_api.http_port must be between 1 and 65535")
+	}
+
+	return nil
+}
+
+// trackProvenance records where configuration values came from
+// This is a simplified implementation - full implementation will track all fields
+func (l *ConfigLoader) trackProvenance(source ConfigSource) {
+	// This will be expanded to track individual fields in the next iteration
+	// For now, we just record that this source was applied
+	l.provenance[source.Name()] = ProvenanceInfo{
+		Source:   source.Name(),
+		Priority: source.Priority(),
+		Path:     source.Name(),
+	}
+}
+
+// MergeStrategy defines how to merge configuration values
+type MergeStrategy int
+
+const (
+	// Replace strategy: higher priority completely replaces lower priority
+	Replace MergeStrategy = iota
+	// Merge strategy: merge maps and slices (for complex objects)
+	Merge
+)
+
+// merge applies a value from a source to the target based on strategy
+func merge(target, source reflect.Value, strategy MergeStrategy) {
+	if !source.IsValid() || source.IsZero() {
+		return // Don't override with zero values
+	}
+
+	switch strategy {
+	case Replace:
+		if target.CanSet() {
+			target.Set(source)
+		}
+	case Merge:
+		// For maps and slices, merge instead of replace
+		if target.Kind() == reflect.Map && source.Kind() == reflect.Map {
+			if target.IsNil() {
+				target.Set(reflect.MakeMap(target.Type()))
+			}
+			iter := source.MapRange()
+			for iter.Next() {
+				target.SetMapIndex(iter.Key(), iter.Value())
+			}
+		} else {
+			// Fall back to replace for other types
+			if target.CanSet() {
+				target.Set(source)
+			}
+		}
+	}
+}
diff --git a/pkg/config/sources.go b/pkg/config/sources.go
new file mode 100644
index 00000000..ca7eb2b1
--- /dev/null
+++ b/pkg/config/sources.go
@@ -0,0 +1,390 @@
+package config
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+	"reflect"
+	"strconv"
+	"strings"
+	"time"
+
+	"gopkg.in/yaml.v3"
+)
+
+// StructDefaultSource provides defaults from Go struct tags (envDefault)
+type StructDefaultSource struct{}
+
+func (s *StructDefaultSource) Load(ctx context.Context, cfg *Config) error {
+	// Defaults are already set in buildDefaultConfig from codegen/yaml.go
+	// We just need to populate the config with those values
+	defaults := buildDefaultConfigForLoader()
+	*cfg = *defaults
+	return nil
+}
+
+func (s *StructDefaultSource) Priority() int {
+	return 100 // Lowest priority
+}
+
+func (s *StructDefaultSource) Name() string {
+	return "struct-defaults"
+}
+
+// buildDefaultConfigForLoader creates a Config with all default values
+func buildDefaultConfigForLoader() *Config {
+	return &Config{
+		Meta: MetaConfig{
+			Version:     "1.0.0",
+			Environment: "development",
+		},
+		Infrastructure: InfrastructureConfig{
+			Database: DatabaseConfig{
+				Postgres: PostgresConfig{
+					Host:            "api-db",
+					Port:            5432,
+					User:            "jan_user",
+					Database:        "jan_llm_api",
+					SSLMode:         "disable",
+					MaxConnections:  100,
+					MaxIdleConns:    5,
+					MaxOpenConns:    15,
+					ConnMaxLifetime: 30 * time.Minute,
+				},
+			},
+			Auth: AuthConfig{
+				Keycloak: KeycloakConfig{
+					BaseURL:             "http://keycloak:8085",
+					Realm:               "jan",
+					HTTPPort:            8085,
+					AdminUser:           "admin",
+					AdminRealm:          "master",
+					AdminClientID:       "admin-cli",
+					BackendClientID:     "backend",
+					Client:              "jan-client",
+					OAuthRedirectURI:    "http://localhost:8000/auth/callback",
+					Issuer:              "http://localhost:8085/realms/jan",
+					Account:             "account",
+					RefreshJWKSInterval: 5 * time.Minute,
+					AuthClockSkew:       60 * time.Second,
+					GuestRole:           "guest",
+					Features:            []string{"token-exchange", "preview"},
+				},
+			},
+			Gateway: GatewayConfig{
+				Kong: KongConfig{
+					HTTPPort:  8000,
+					AdminPort: 8001,
+					AdminURL:  "http://kong:8001",
+					LogLevel:  "info",
+				},
+			},
+		},
+		Services: ServicesConfig{
+			LLMAPI: LLMAPIConfig{
+				HTTPPort:               8080,
+				MetricsPort:            9091,
+				LogLevel:               "info",
+				LogFormat:              "json",
+				AutoMigrate:            true,
+				ProviderConfigFile:     "config/providers.yml",
+				ProviderConfigSet:      "default",
+				ProviderConfigsEnabled: true,
+				APIKey: APIKeyConfig{
+					Prefix:     "sk_live",
+					DefaultTTL: 2160 * time.Hour,
+					MaxTTL:     2160 * time.Hour,
+					MaxPerUser: 5,
+				},
+				ModelProviderSecret:      "jan-model-provider-secret-2024",
+				ModelSyncEnabled:         true,
+				ModelSyncIntervalMinutes: 60,
+				MediaResolveURL:          "http://kong:8000/media/v1/media/resolve",
+				MediaResolveTimeout:      5 * time.Second,
+			},
+			MCPTools: MCPToolsConfig{
+				HTTPPort:               8091,
+				LogLevel:               "info",
+				LogFormat:              "json",
+				SearchEngine:           "serper",
+				SearxngURL:             "http://searxng:8080",
+				VectorStoreURL:         "http://vector-store:3015",
+				SandboxFusionURL:       "http://sandboxfusion:8080",
+				SandboxRequireApproval: true,
+				MCPConfigFile:          "configs/mcp-providers.yml",
+			},
+			MediaAPI: MediaAPIConfig{
+				HTTPPort:           8285,
+				LogLevel:           "info",
+				MaxUploadBytes:     20971520,
+				RetentionDays:      30,
+				ProxyDownload:      true,
+				RemoteFetchTimeout: 15 * time.Second,
+				S3: S3Config{
+					Endpoint:     "https://s3.menlo.ai",
+					Region:       "us-west-2",
+					Bucket:       "platform-dev",
+					UsePathStyle: true,
+					PresignTTL:   5 * time.Minute,
+				},
+			},
+			ResponseAPI: ResponseAPIConfig{
+				HTTPPort:     8082,
+				LogLevel:     "info",
+				LLMAPIURL:    "http://llm-api:8080",
+				MCPToolsURL:  "http://mcp-tools:8091",
+				MaxToolDepth: 8,
+				ToolTimeout:  45 * time.Second,
+			},
+		},
+		Inference: InferenceConfig{
+			VLLM: VLLMConfig{
+				Enabled:        true,
+				Port:           8101,
+				Model:          "Qwen/Qwen2.5-0.5B-Instruct",
+				ServedName:     "qwen2.5-0.5b-instruct",
+				GPUUtilization: 0.66,
+			},
+		},
+		Monitoring: MonitoringConfig{
+			OTEL: OTELConfig{
+				Enabled:     false,
+				ServiceName: "llm-api",
+				Endpoint:    "http://otel-collector:4318",
+				HTTPPort:    4318,
+				GRPCPort:    4317,
+			},
+			Prometheus: PrometheusConfig{
+				Port: 9090,
+			},
+			Grafana: GrafanaConfig{
+				Port:      3001,
+				AdminUser: "admin",
+			},
+			Jaeger: JaegerConfig{
+				UIPort: 16686,
+			},
+		},
+	}
+}
+
+// YAMLDefaultSource loads defaults from config/defaults.yaml
+type YAMLDefaultSource struct {
+	path string
+}
+
+func (s *YAMLDefaultSource) Load(ctx context.Context, cfg *Config) error {
+	data, err := os.ReadFile(s.path)
+	if err != nil {
+		// It's OK if defaults.yaml doesn't exist yet, struct defaults will be used
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return fmt.Errorf("read yaml file: %w", err)
+	}
+
+	var yamlCfg Config
+	if err := yaml.Unmarshal(data, &yamlCfg); err != nil {
+		return fmt.Errorf("unmarshal yaml: %w", err)
+	}
+
+	// Merge YAML config into cfg (non-zero values override)
+	mergeConfigs(cfg, &yamlCfg)
+	return nil
+}
+
+func (s *YAMLDefaultSource) Priority() int {
+	return 200
+}
+
+func (s *YAMLDefaultSource) Name() string {
+	return "yaml-defaults"
+}
+
+// YAMLEnvSource loads environment-specific overrides from config/environments/{env}.yaml
+type YAMLEnvSource struct {
+	environment string
+}
+
+func (s *YAMLEnvSource) Load(ctx context.Context, cfg *Config) error {
+	path := filepath.Join("config", "environments", fmt.Sprintf("%s.yaml", s.environment))
+
+	data, err := os.ReadFile(path)
+	if err != nil {
+		// It's OK if environment file doesn't exist
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return fmt.Errorf("read environment yaml: %w", err)
+	}
+
+	var envCfg Config
+	if err := yaml.Unmarshal(data, &envCfg); err != nil {
+		return fmt.Errorf("unmarshal environment yaml: %w", err)
+	}
+
+	mergeConfigs(cfg, &envCfg)
+	return nil
+}
+
+func (s *YAMLEnvSource) Priority() int {
+	return 300
+}
+
+func (s *YAMLEnvSource) Name() string {
+	return fmt.Sprintf("yaml-env-%s", s.environment)
+}
+
+// EnvVarSource loads configuration from environment variables
+type EnvVarSource struct{}
+
+func (s *EnvVarSource) Load(ctx context.Context, cfg *Config) error {
+	// Use reflection to find all env tags and apply environment variables
+	applyEnvVars(reflect.ValueOf(cfg).Elem())
+	return nil
+}
+
+func (s *EnvVarSource) Priority() int {
+	return 500
+}
+
+func (s *EnvVarSource) Name() string {
+	return "env-vars"
+}
+
+// applyEnvVars recursively applies environment variables to config fields
+func applyEnvVars(v reflect.Value) {
+	if !v.IsValid() {
+		return
+	}
+
+	t := v.Type()
+
+	switch v.Kind() {
+	case reflect.Struct:
+		for i := 0; i < v.NumField(); i++ {
+			field := v.Field(i)
+			fieldType := t.Field(i)
+
+			// Check for env tag
+			envTag := fieldType.Tag.Get("env")
+			if envTag != "" {
+				// Get environment variable value
+				if envVal := os.Getenv(envTag); envVal != "" {
+					setFieldFromString(field, envVal)
+				}
+			}
+
+			// Recurse into nested structs
+			if field.Kind() == reflect.Struct {
+				applyEnvVars(field)
+			}
+		}
+
+	case reflect.Ptr:
+		if !v.IsNil() {
+			applyEnvVars(v.Elem())
+		}
+	}
+}
+
+// setFieldFromString sets a field value from a string representation
+func setFieldFromString(field reflect.Value, value string) {
+	if !field.CanSet() {
+		return
+	}
+
+	switch field.Kind() {
+	case reflect.String:
+		field.SetString(value)
+
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		if field.Type() == reflect.TypeOf(time.Duration(0)) {
+			// Handle time.Duration
+			if d, err := time.ParseDuration(value); err == nil {
+				field.Set(reflect.ValueOf(d))
+			}
+		} else {
+			if i, err := strconv.ParseInt(value, 10, 64); err == nil {
+				field.SetInt(i)
+			}
+		}
+
+	case reflect.Float32, reflect.Float64:
+		if f, err := strconv.ParseFloat(value, 64); err == nil {
+			field.SetFloat(f)
+		}
+
+	case reflect.Bool:
+		if b, err := strconv.ParseBool(value); err == nil {
+			field.SetBool(b)
+		}
+
+	case reflect.Slice:
+		// Handle string slices
+		if field.Type().Elem().Kind() == reflect.String {
+			// Split by comma for slice values
+			parts := strings.Split(value, ",")
+			slice := reflect.MakeSlice(field.Type(), len(parts), len(parts))
+			for i, part := range parts {
+				slice.Index(i).SetString(strings.TrimSpace(part))
+			}
+			field.Set(slice)
+		}
+	}
+}
+
+// mergeConfigs merges source into target (non-zero source values override target)
+func mergeConfigs(target, source *Config) {
+	mergeStruct(reflect.ValueOf(target).Elem(), reflect.ValueOf(source).Elem())
+}
+
+// mergeStruct recursively merges source struct into target struct
+func mergeStruct(target, source reflect.Value) {
+	if !target.IsValid() || !source.IsValid() {
+		return
+	}
+
+	for i := 0; i < source.NumField(); i++ {
+		sourceField := source.Field(i)
+		targetField := target.Field(i)
+
+		if !targetField.CanSet() {
+			continue
+		}
+
+		// Skip zero values (don't override with empty/zero values)
+		if sourceField.IsZero() {
+			continue
+		}
+
+		switch sourceField.Kind() {
+		case reflect.Struct:
+			// Recurse into nested structs
+			mergeStruct(targetField, sourceField)
+
+		case reflect.Slice:
+			// For slices, replace if source has values
+			if sourceField.Len() > 0 {
+				targetField.Set(sourceField)
+			}
+
+		case reflect.Map:
+			// For maps, merge keys
+			if sourceField.Len() > 0 {
+				if targetField.IsNil() {
+					targetField.Set(reflect.MakeMap(targetField.Type()))
+				}
+				iter := sourceField.MapRange()
+				for iter.Next() {
+					targetField.SetMapIndex(iter.Key(), iter.Value())
+				}
+			}
+
+		default:
+			// For primitive types, just set the value
+			targetField.Set(sourceField)
+		}
+	}
+}
diff --git a/pkg/config/types.go b/pkg/config/types.go
new file mode 100644
index 00000000..42b4e6dc
--- /dev/null
+++ b/pkg/config/types.go
@@ -0,0 +1,533 @@
+package config
+
+import "time"
+
+// Config is the root configuration structure for Jan Server.
+// This is the canonical source of truth - all schemas, defaults, and documentation
+// are generated from these struct definitions.
+type Config struct {
+	Meta           MetaConfig           `yaml:"meta" json:"meta" jsonschema:"required"`
+	Infrastructure InfrastructureConfig `yaml:"infrastructure" json:"infrastructure" jsonschema:"required"`
+	Services       ServicesConfig       `yaml:"services" json:"services" jsonschema:"required"`
+	Inference      InferenceConfig      `yaml:"inference" json:"inference"`
+	Monitoring     MonitoringConfig     `yaml:"monitoring" json:"monitoring"`
+}
+
+// MetaConfig contains metadata about the configuration itself
+type MetaConfig struct {
+	// Version of the configuration schema
+	Version string `yaml:"version" json:"version" env:"CONFIG_VERSION" envDefault:"1.0.0" jsonschema:"required" description:"Configuration schema version"`
+
+	// Environment name (development, staging, production, etc.)
+	Environment string `yaml:"environment" json:"environment" env:"ENVIRONMENT" envDefault:"development" jsonschema:"required" description:"Deployment environment name"`
+}
+
+// InfrastructureConfig contains settings for core infrastructure services
+type InfrastructureConfig struct {
+	Database DatabaseConfig `yaml:"database" json:"database" jsonschema:"required"`
+	Auth     AuthConfig     `yaml:"auth" json:"auth" jsonschema:"required"`
+	Gateway  GatewayConfig  `yaml:"gateway" json:"gateway" jsonschema:"required"`
+}
+
+// DatabaseConfig contains PostgreSQL database settings
+type DatabaseConfig struct {
+	Postgres PostgresConfig `yaml:"postgres" json:"postgres" jsonschema:"required"`
+}
+
+// PostgresConfig contains PostgreSQL-specific settings
+type PostgresConfig struct {
+	// Database host (Docker internal DNS or FQDN)
+	Host string `yaml:"host" json:"host" env:"POSTGRES_HOST" envDefault:"api-db" jsonschema:"required" description:"PostgreSQL host (Docker service name or FQDN)"`
+
+	// Database port
+	Port int `yaml:"port" json:"port" env:"POSTGRES_PORT" envDefault:"5432" jsonschema:"required,minimum=1,maximum=65535" description:"PostgreSQL port"`
+
+	// Database user
+	User string `yaml:"user" json:"user" env:"POSTGRES_USER" envDefault:"jan_user" jsonschema:"required" description:"PostgreSQL username"`
+
+	// Database name
+	Database string `yaml:"database" json:"database" env:"POSTGRES_DB" envDefault:"jan_llm_api" jsonschema:"required" description:"PostgreSQL database name"`
+
+	// Database password (loaded from secrets)
+	Password string `yaml:"password,omitempty" json:"password,omitempty" env:"POSTGRES_PASSWORD" jsonschema:"required" description:"PostgreSQL password (from secret provider)"`
+
+	// SSL mode (disable, require, verify-ca, verify-full)
+	SSLMode string `yaml:"ssl_mode" json:"ssl_mode" env:"POSTGRES_SSL_MODE" envDefault:"disable" jsonschema:"enum=disable,enum=require,enum=verify-ca,enum=verify-full" description:"PostgreSQL SSL mode"`
+
+	// Maximum number of open connections
+	MaxConnections int `yaml:"max_connections" json:"max_connections" env:"POSTGRES_MAX_CONNECTIONS" envDefault:"100" jsonschema:"minimum=1,maximum=1000" description:"Maximum number of database connections"`
+
+	// Maximum idle connections
+	MaxIdleConns int `yaml:"max_idle_conns" json:"max_idle_conns" env:"DB_MAX_IDLE_CONNS" envDefault:"5" jsonschema:"minimum=1" description:"Maximum idle connections in pool"`
+
+	// Maximum open connections
+	MaxOpenConns int `yaml:"max_open_conns" json:"max_open_conns" env:"DB_MAX_OPEN_CONNS" envDefault:"15" jsonschema:"minimum=1" description:"Maximum open connections in pool"`
+
+	// Connection max lifetime
+	ConnMaxLifetime time.Duration `yaml:"conn_max_lifetime" json:"conn_max_lifetime" env:"DB_CONN_MAX_LIFETIME" envDefault:"30m" description:"Maximum connection lifetime"`
+}
+
+// AuthConfig contains authentication and authorization settings
+type AuthConfig struct {
+	Keycloak KeycloakConfig `yaml:"keycloak" json:"keycloak" jsonschema:"required"`
+}
+
+// KeycloakConfig contains Keycloak authentication server settings
+type KeycloakConfig struct {
+	// Keycloak base URL (internal service URL)
+	BaseURL string `yaml:"base_url" json:"base_url" env:"KEYCLOAK_BASE_URL" envDefault:"http://keycloak:8085" jsonschema:"required,format=uri" description:"Keycloak base URL (internal)"`
+
+	// Keycloak public URL (browser-accessible, defaults to BaseURL)
+	PublicURL string `yaml:"public_url" json:"public_url" env:"KEYCLOAK_PUBLIC_URL" jsonschema:"format=uri" description:"Keycloak public URL (browser-accessible)"`
+
+	// Keycloak realm name
+	Realm string `yaml:"realm" json:"realm" env:"KEYCLOAK_REALM" envDefault:"jan" jsonschema:"required" description:"Keycloak realm name"`
+
+	// Keycloak HTTP port
+	HTTPPort int `yaml:"http_port" json:"http_port" env:"KEYCLOAK_HTTP_PORT" envDefault:"8085" jsonschema:"minimum=1,maximum=65535" description:"Keycloak HTTP port"`
+
+	// Keycloak admin username
+	AdminUser string `yaml:"admin_user" json:"admin_user" env:"KEYCLOAK_ADMIN" envDefault:"admin" jsonschema:"required" description:"Keycloak admin username"`
+
+	// Keycloak admin password (from secrets)
+	AdminPassword string `yaml:"admin_password,omitempty" json:"admin_password,omitempty" env:"KEYCLOAK_ADMIN_PASSWORD" jsonschema:"required" description:"Keycloak admin password (from secret provider)"`
+
+	// Keycloak admin realm
+	AdminRealm string `yaml:"admin_realm" json:"admin_realm" env:"KEYCLOAK_ADMIN_REALM" envDefault:"master" description:"Keycloak admin realm"`
+
+	// Keycloak admin client ID
+	AdminClientID string `yaml:"admin_client_id" json:"admin_client_id" env:"KEYCLOAK_ADMIN_CLIENT_ID" envDefault:"admin-cli" description:"Keycloak admin client ID"`
+
+	// Backend client ID for service-to-service auth
+	BackendClientID string `yaml:"backend_client_id" json:"backend_client_id" env:"BACKEND_CLIENT_ID" envDefault:"backend" jsonschema:"required" description:"Backend service client ID"`
+
+	// Backend client secret (from secrets)
+	BackendClientSecret string `yaml:"backend_client_secret,omitempty" json:"backend_client_secret,omitempty" env:"BACKEND_CLIENT_SECRET" jsonschema:"required" description:"Backend client secret (from secret provider)"`
+
+	// Client ID used for token exchange
+	Client string `yaml:"client" json:"client" env:"CLIENT" envDefault:"jan-client" jsonschema:"required" description:"Client ID for token exchange"`
+
+	// OAuth redirect URI
+	OAuthRedirectURI string `yaml:"oauth_redirect_uri" json:"oauth_redirect_uri" env:"OAUTH_REDIRECT_URI" envDefault:"http://localhost:8000/auth/callback" jsonschema:"required,format=uri" description:"OAuth redirect URI"`
+
+	// JWKS URL for JWT verification
+	JWKSURL string `yaml:"jwks_url" json:"jwks_url" env:"JWKS_URL" jsonschema:"format=uri" description:"JWKS URL for JWT verification"`
+
+	// OIDC discovery URL (alternative to JWKS URL)
+	OIDCDiscoveryURL string `yaml:"oidc_discovery_url" json:"oidc_discovery_url" env:"OIDC_DISCOVERY_URL" jsonschema:"format=uri" description:"OIDC discovery URL"`
+
+	// JWT issuer
+	Issuer string `yaml:"issuer" json:"issuer" env:"ISSUER" envDefault:"http://localhost:8085/realms/jan" jsonschema:"required,format=uri" description:"JWT issuer URL"`
+
+	// Account identifier (audience claim)
+	Account string `yaml:"account" json:"account" env:"ACCOUNT" envDefault:"account" jsonschema:"required" description:"Account/audience claim"`
+
+	// JWKS refresh interval
+	RefreshJWKSInterval time.Duration `yaml:"refresh_jwks_interval" json:"refresh_jwks_interval" env:"JWKS_REFRESH_INTERVAL" envDefault:"5m" description:"JWKS refresh interval"`
+
+	// Auth clock skew tolerance
+	AuthClockSkew time.Duration `yaml:"auth_clock_skew" json:"auth_clock_skew" env:"AUTH_CLOCK_SKEW" envDefault:"60s" description:"Clock skew tolerance for auth"`
+
+	// Guest role name
+	GuestRole string `yaml:"guest_role" json:"guest_role" env:"GUEST_ROLE" envDefault:"guest" description:"Guest role name"`
+
+	// Keycloak features to enable
+	Features []string `yaml:"features" json:"features" env:"KEYCLOAK_FEATURES" envSeparator:"," envDefault:"token-exchange,preview" description:"Keycloak features to enable"`
+}
+
+// GatewayConfig contains API gateway settings
+type GatewayConfig struct {
+	Kong KongConfig `yaml:"kong" json:"kong" jsonschema:"required"`
+}
+
+// KongConfig contains Kong API Gateway settings
+type KongConfig struct {
+	// Kong HTTP port
+	HTTPPort int `yaml:"http_port" json:"http_port" env:"KONG_HTTP_PORT" envDefault:"8000" jsonschema:"minimum=1,maximum=65535" description:"Kong HTTP port"`
+
+	// Kong admin port
+	AdminPort int `yaml:"admin_port" json:"admin_port" env:"KONG_ADMIN_PORT" envDefault:"8001" jsonschema:"minimum=1,maximum=65535" description:"Kong admin API port"`
+
+	// Kong admin URL (internal)
+	AdminURL string `yaml:"admin_url" json:"admin_url" env:"KONG_ADMIN_URL" envDefault:"http://kong:8001" jsonschema:"format=uri" description:"Kong admin API URL"`
+
+	// Kong log level
+	LogLevel string `yaml:"log_level" json:"log_level" env:"KONG_LOG_LEVEL" envDefault:"info" jsonschema:"enum=debug,enum=info,enum=warn,enum=error" description:"Kong log level"`
+}
+
+// ServicesConfig contains settings for all Jan Server services
+type ServicesConfig struct {
+	LLMAPI      LLMAPIConfig      `yaml:"llm_api" json:"llm_api" jsonschema:"required"`
+	MCPTools    MCPToolsConfig    `yaml:"mcp_tools" json:"mcp_tools" jsonschema:"required"`
+	MediaAPI    MediaAPIConfig    `yaml:"media_api" json:"media_api" jsonschema:"required"`
+	ResponseAPI ResponseAPIConfig `yaml:"response_api" json:"response_api" jsonschema:"required"`
+	MemoryTools MemoryToolsConfig `yaml:"memory_tools" json:"memory_tools" jsonschema:"required"`
+}
+
+// LLMAPIConfig contains settings for the LLM API service
+type LLMAPIConfig struct {
+	// HTTP port
+	HTTPPort int `yaml:"http_port" json:"http_port" env:"HTTP_PORT" envDefault:"8080" jsonschema:"minimum=1,maximum=65535" description:"LLM API HTTP port"`
+
+	// Metrics port
+	MetricsPort int `yaml:"metrics_port" json:"metrics_port" env:"METRICS_PORT" envDefault:"9091" jsonschema:"minimum=1,maximum=65535" description:"Metrics port"`
+
+	// Log level
+	LogLevel string `yaml:"log_level" json:"log_level" env:"LOG_LEVEL" envDefault:"info" jsonschema:"enum=debug,enum=info,enum=warn,enum=error" description:"Log level"`
+
+	// Log format
+	LogFormat string `yaml:"log_format" json:"log_format" env:"LOG_FORMAT" envDefault:"json" jsonschema:"enum=json,enum=console" description:"Log format"`
+
+	// Auto-migrate database on startup
+	AutoMigrate bool `yaml:"auto_migrate" json:"auto_migrate" env:"AUTO_MIGRATE" envDefault:"true" description:"Auto-migrate database on startup"`
+
+	// Provider config file path (relative to service root)
+	ProviderConfigFile string `yaml:"provider_config_file" json:"provider_config_file" env:"JAN_PROVIDER_CONFIGS_FILE" envDefault:"config/providers.yml" description:"Provider config file path (CI/CD managed)"`
+
+	// Provider config set to use
+	ProviderConfigSet string `yaml:"provider_config_set" json:"provider_config_set" env:"JAN_PROVIDER_CONFIG_SET" envDefault:"default" description:"Provider config set name"`
+
+	// Enable provider configs
+	ProviderConfigsEnabled bool `yaml:"provider_configs_enabled" json:"provider_configs_enabled" env:"JAN_PROVIDER_CONFIGS" envDefault:"true" description:"Enable provider config file"`
+
+	// API key settings
+	APIKey APIKeyConfig `yaml:"api_key" json:"api_key"`
+
+	// Model provider secret
+	ModelProviderSecret string `yaml:"model_provider_secret,omitempty" json:"model_provider_secret,omitempty" env:"MODEL_PROVIDER_SECRET" envDefault:"jan-model-provider-secret-2024" description:"Model provider secret"`
+
+	// Model sync settings
+	ModelSyncEnabled         bool `yaml:"model_sync_enabled" json:"model_sync_enabled" env:"MODEL_SYNC_ENABLED" envDefault:"true" description:"Enable model synchronization"`
+	ModelSyncIntervalMinutes int  `yaml:"model_sync_interval_minutes" json:"model_sync_interval_minutes" env:"MODEL_SYNC_INTERVAL_MINUTES" envDefault:"60" jsonschema:"minimum=1" description:"Model sync interval in minutes"`
+
+	// Prompt orchestration settings
+	PromptOrchestration PromptOrchestrationConfig `yaml:"prompt_orchestration" json:"prompt_orchestration"`
+
+	// Media integration
+	MediaResolveURL     string        `yaml:"media_resolve_url" json:"media_resolve_url" env:"MEDIA_RESOLVE_URL" envDefault:"http://kong:8000/media/v1/media/resolve" jsonschema:"format=uri" description:"Media resolve URL"`
+	MediaResolveTimeout time.Duration `yaml:"media_resolve_timeout" json:"media_resolve_timeout" env:"MEDIA_RESOLVE_TIMEOUT" envDefault:"5s" description:"Media resolve timeout"`
+}
+
+// PromptOrchestrationConfig contains settings for prompt orchestration processor
+type PromptOrchestrationConfig struct {
+	// Enable prompt orchestration
+	Enabled bool `yaml:"enabled" json:"enabled" env:"PROMPT_ORCHESTRATION_ENABLED" envDefault:"true" description:"Enable prompt orchestration processor"`
+
+	// Enable memory module
+	EnableMemory bool `yaml:"enable_memory" json:"enable_memory" env:"PROMPT_ORCHESTRATION_MEMORY" envDefault:"false" description:"Enable memory injection in prompts"`
+
+	// Enable templates module
+	EnableTemplates bool `yaml:"enable_templates" json:"enable_templates" env:"PROMPT_ORCHESTRATION_TEMPLATES" envDefault:"true" description:"Enable template-based prompts"`
+
+	// Enable tools module
+	EnableTools bool `yaml:"enable_tools" json:"enable_tools" env:"PROMPT_ORCHESTRATION_TOOLS" envDefault:"false" description:"Enable tool usage instructions"`
+
+	// Default persona
+	DefaultPersona string `yaml:"default_persona" json:"default_persona" env:"PROMPT_ORCHESTRATION_PERSONA" envDefault:"helpful assistant" description:"Default assistant persona"`
+}
+
+// APIKeyConfig contains API key management settings
+type APIKeyConfig struct {
+	// API key prefix
+	Prefix string `yaml:"prefix" json:"prefix" env:"API_KEY_PREFIX" envDefault:"sk_live" description:"API key prefix"`
+
+	// Default TTL for new API keys
+	DefaultTTL time.Duration `yaml:"default_ttl" json:"default_ttl" env:"API_KEY_DEFAULT_TTL" envDefault:"2160h" description:"Default API key TTL (90 days)"`
+
+	// Maximum TTL for API keys
+	MaxTTL time.Duration `yaml:"max_ttl" json:"max_ttl" env:"API_KEY_MAX_TTL" envDefault:"2160h" description:"Maximum API key TTL"`
+
+	// Maximum API keys per user
+	MaxPerUser int `yaml:"max_per_user" json:"max_per_user" env:"API_KEY_MAX_PER_USER" envDefault:"5" jsonschema:"minimum=1" description:"Maximum API keys per user"`
+}
+
+// MCPToolsConfig contains settings for the MCP Tools service
+type MCPToolsConfig struct {
+	// HTTP port
+	HTTPPort int `yaml:"http_port" json:"http_port" env:"MCP_TOOLS_HTTP_PORT" envDefault:"8091" jsonschema:"minimum=1,maximum=65535" description:"MCP Tools HTTP port"`
+
+	// Log level
+	LogLevel string `yaml:"log_level" json:"log_level" env:"LOG_LEVEL" envDefault:"info" jsonschema:"enum=debug,enum=info,enum=warn,enum=error" description:"Log level"`
+
+	// Log format
+	LogFormat string `yaml:"log_format" json:"log_format" env:"LOG_FORMAT" envDefault:"json" jsonschema:"enum=json,enum=console" description:"Log format"`
+
+	// Search engine to use
+	SearchEngine string `yaml:"search_engine" json:"search_engine" env:"SEARCH_ENGINE" envDefault:"serper" jsonschema:"enum=serper,enum=searxng" description:"Search engine to use"`
+
+	// Serper API key (from secrets)
+	SerperAPIKey string `yaml:"serper_api_key,omitempty" json:"serper_api_key,omitempty" env:"SERPER_API_KEY" description:"Serper API key (from secret provider)"`
+
+	// SearXNG URL
+	SearxngURL string `yaml:"searxng_url" json:"searxng_url" env:"SEARXNG_URL" envDefault:"http://searxng:8080" jsonschema:"format=uri" description:"SearXNG service URL"`
+
+	// Vector store URL
+	VectorStoreURL string `yaml:"vector_store_url" json:"vector_store_url" env:"VECTOR_STORE_URL" envDefault:"http://vector-store:3015" jsonschema:"format=uri" description:"Vector store service URL"`
+
+	// Sandbox Fusion URL
+	SandboxFusionURL string `yaml:"sandbox_fusion_url" json:"sandbox_fusion_url" env:"SANDBOX_FUSION_URL" envDefault:"http://sandboxfusion:8080" jsonschema:"format=uri" description:"SandboxFusion service URL"`
+
+	// Sandbox require approval
+	SandboxRequireApproval bool `yaml:"sandbox_require_approval" json:"sandbox_require_approval" env:"SANDBOX_FUSION_REQUIRE_APPROVAL" envDefault:"true" description:"Require approval for sandbox execution"`
+
+	// MCP config file path (relative to service root)
+	MCPConfigFile string `yaml:"mcp_config_file" json:"mcp_config_file" env:"MCP_CONFIG_FILE" envDefault:"configs/mcp-providers.yml" description:"MCP provider config file path (CI/CD managed)"`
+}
+
+// MediaAPIConfig contains settings for the Media API service
+type MediaAPIConfig struct {
+	// HTTP port
+	HTTPPort int `yaml:"http_port" json:"http_port" env:"MEDIA_API_PORT" envDefault:"8285" jsonschema:"minimum=1,maximum=65535" description:"Media API HTTP port"`
+
+	// Log level
+	LogLevel string `yaml:"log_level" json:"log_level" env:"LOG_LEVEL" envDefault:"info" jsonschema:"enum=debug,enum=info,enum=warn,enum=error" description:"Log level"`
+
+	// Maximum upload size in bytes
+	MaxUploadBytes int64 `yaml:"max_upload_bytes" json:"max_upload_bytes" env:"MEDIA_MAX_BYTES" envDefault:"20971520" jsonschema:"minimum=1" description:"Maximum upload size in bytes (20MB)"`
+
+	// Retention days for media files
+	RetentionDays int `yaml:"retention_days" json:"retention_days" env:"MEDIA_RETENTION_DAYS" envDefault:"30" jsonschema:"minimum=1" description:"Media retention in days"`
+
+	// Proxy download through API
+	ProxyDownload bool `yaml:"proxy_download" json:"proxy_download" env:"MEDIA_PROXY_DOWNLOAD" envDefault:"true" description:"Proxy downloads through API"`
+
+	// Remote fetch timeout
+	RemoteFetchTimeout time.Duration `yaml:"remote_fetch_timeout" json:"remote_fetch_timeout" env:"MEDIA_REMOTE_FETCH_TIMEOUT" envDefault:"15s" description:"Remote fetch timeout"`
+
+	// S3 settings
+	S3 S3Config `yaml:"s3" json:"s3"`
+}
+
+// S3Config contains S3/object storage settings
+type S3Config struct {
+	// S3 endpoint URL
+	Endpoint string `yaml:"endpoint" json:"endpoint" env:"MEDIA_S3_ENDPOINT" envDefault:"https://s3.menlo.ai" jsonschema:"format=uri" description:"S3 endpoint URL"`
+
+	// S3 public endpoint (for presigned URLs)
+	PublicEndpoint string `yaml:"public_endpoint" json:"public_endpoint" env:"MEDIA_S3_PUBLIC_ENDPOINT" jsonschema:"format=uri" description:"S3 public endpoint URL"`
+
+	// S3 region
+	Region string `yaml:"region" json:"region" env:"MEDIA_S3_REGION" envDefault:"us-west-2" description:"S3 region"`
+
+	// S3 bucket name
+	Bucket string `yaml:"bucket" json:"bucket" env:"MEDIA_S3_BUCKET" envDefault:"platform-dev" description:"S3 bucket name"`
+
+	// S3 access key (from secrets)
+	AccessKey string `yaml:"access_key,omitempty" json:"access_key,omitempty" env:"MEDIA_S3_ACCESS_KEY_ID" description:"S3 access key ID (AWS standard naming)"`
+
+	// S3 secret key (from secrets)
+	SecretKey string `yaml:"secret_key,omitempty" json:"secret_key,omitempty" env:"MEDIA_S3_SECRET_ACCESS_KEY" description:"S3 secret access key (AWS standard naming)"`
+
+	// Use path-style addressing
+	UsePathStyle bool `yaml:"use_path_style" json:"use_path_style" env:"MEDIA_S3_USE_PATH_STYLE" envDefault:"true" description:"Use S3 path-style addressing"`
+
+	// Presigned URL TTL
+	PresignTTL time.Duration `yaml:"presign_ttl" json:"presign_ttl" env:"MEDIA_S3_PRESIGN_TTL" envDefault:"5m" description:"Presigned URL TTL"`
+}
+
+// ResponseAPIConfig contains settings for the Response API service
+type ResponseAPIConfig struct {
+	// HTTP port
+	HTTPPort int `yaml:"http_port" json:"http_port" env:"RESPONSE_API_PORT" envDefault:"8082" jsonschema:"minimum=1,maximum=65535" description:"Response API HTTP port"`
+
+	// Log level
+	LogLevel string `yaml:"log_level" json:"log_level" env:"LOG_LEVEL" envDefault:"info" jsonschema:"enum=debug,enum=info,enum=warn,enum=error" description:"Log level"`
+
+	// LLM API URL
+	LLMAPIURL string `yaml:"llm_api_url" json:"llm_api_url" env:"LLM_API_URL" envDefault:"http://llm-api:8080" jsonschema:"format=uri" description:"LLM API service URL"`
+
+	// MCP Tools URL
+	MCPToolsURL string `yaml:"mcp_tools_url" json:"mcp_tools_url" env:"MCP_TOOLS_URL" envDefault:"http://mcp-tools:8091" jsonschema:"format=uri" description:"MCP Tools service URL"`
+
+	// Maximum tool execution depth
+	MaxToolDepth int `yaml:"max_tool_depth" json:"max_tool_depth" env:"MAX_TOOL_EXECUTION_DEPTH" envDefault:"8" jsonschema:"minimum=1,maximum=20" description:"Maximum tool execution depth"`
+
+	// Tool execution timeout
+	ToolTimeout time.Duration `yaml:"tool_timeout" json:"tool_timeout" env:"TOOL_EXECUTION_TIMEOUT" envDefault:"45s" description:"Tool execution timeout"`
+}
+
+// InferenceConfig contains settings for inference services
+type InferenceConfig struct {
+	VLLM VLLMConfig `yaml:"vllm" json:"vllm"`
+}
+
+// VLLMConfig contains vLLM inference server settings
+type VLLMConfig struct {
+	// Enable vLLM
+	Enabled bool `yaml:"enabled" json:"enabled" env:"VLLM_ENABLED" envDefault:"true" description:"Enable vLLM inference"`
+
+	// vLLM port
+	Port int `yaml:"port" json:"port" env:"VLLM_PORT" envDefault:"8101" jsonschema:"minimum=1,maximum=65535" description:"vLLM HTTP port"`
+
+	// Model to load
+	Model string `yaml:"model" json:"model" env:"VLLM_MODEL" envDefault:"Qwen/Qwen2.5-0.5B-Instruct" description:"vLLM model name"`
+
+	// Served model name
+	ServedName string `yaml:"served_name" json:"served_name" env:"VLLM_SERVED_NAME" envDefault:"qwen2.5-0.5b-instruct" description:"vLLM served model name"`
+
+	// GPU utilization (0.0-1.0)
+	GPUUtilization float64 `yaml:"gpu_utilization" json:"gpu_utilization" env:"VLLM_GPU_UTIL" envDefault:"0.66" jsonschema:"minimum=0,maximum=1" description:"GPU utilization ratio"`
+
+	// vLLM internal API key (from secrets)
+	InternalKey string `yaml:"internal_key,omitempty" json:"internal_key,omitempty" env:"VLLM_INTERNAL_KEY" description:"vLLM internal API key (from secret provider)"`
+
+	// HuggingFace token (from secrets)
+	HFToken string `yaml:"hf_token,omitempty" json:"hf_token,omitempty" env:"HF_TOKEN" description:"HuggingFace token (from secret provider)"`
+}
+
+// MonitoringConfig contains observability and monitoring settings
+type MonitoringConfig struct {
+	OTEL       OTELConfig       `yaml:"otel" json:"otel"`
+	Prometheus PrometheusConfig `yaml:"prometheus" json:"prometheus"`
+	Grafana    GrafanaConfig    `yaml:"grafana" json:"grafana"`
+	Jaeger     JaegerConfig     `yaml:"jaeger" json:"jaeger"`
+}
+
+// OTELConfig contains OpenTelemetry settings
+type OTELConfig struct {
+	// Enable OpenTelemetry
+	Enabled bool `yaml:"enabled" json:"enabled" env:"OTEL_ENABLED" envDefault:"true" description:"Enable OpenTelemetry tracing"`
+
+	// Enable tracing
+	TracingEnabled bool `yaml:"tracing_enabled" json:"tracing_enabled" env:"ENABLE_TRACING" envDefault:"true" description:"Enable distributed tracing"`
+
+	// Service name
+	ServiceName string `yaml:"service_name" json:"service_name" env:"OTEL_SERVICE_NAME" envDefault:"llm-api" description:"OpenTelemetry service name"`
+
+	// Service version
+	ServiceVersion string `yaml:"service_version" json:"service_version" env:"OTEL_SERVICE_VERSION" envDefault:"unknown" description:"Service version for telemetry"`
+
+	// OTLP exporter endpoint
+	Endpoint string `yaml:"endpoint" json:"endpoint" env:"OTEL_EXPORTER_OTLP_ENDPOINT" envDefault:"http://otel-collector:4318" jsonschema:"format=uri" description:"OTLP exporter endpoint"`
+
+	// OTLP headers
+	Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty" description:"OTLP exporter headers"`
+
+	// Sampling rate (0.0 - 1.0)
+	SamplingRate float64 `yaml:"sampling_rate" json:"sampling_rate" env:"OTEL_TRACES_SAMPLER_ARG" envDefault:"1.0" jsonschema:"minimum=0,maximum=1" description:"Trace sampling rate (0.0 to 1.0)"`
+
+	// PII sanitization level
+	PIILevel string `yaml:"pii_level" json:"pii_level" env:"TELEMETRY_PII_LEVEL" envDefault:"hashed" jsonschema:"enum=none,enum=hashed,enum=full" description:"PII sanitization level: none (redact all), hashed (hash PII), full (no sanitization)"`
+
+	// Metric interval
+	MetricInterval string `yaml:"metric_interval" json:"metric_interval" env:"OTEL_METRIC_EXPORT_INTERVAL" envDefault:"15s" description:"Metric export interval (e.g., 15s, 1m)"`
+
+	// HTTP port
+	HTTPPort int `yaml:"http_port" json:"http_port" env:"OTEL_HTTP_PORT" envDefault:"4318" jsonschema:"minimum=1,maximum=65535" description:"OTLP HTTP port"`
+
+	// gRPC port
+	GRPCPort int `yaml:"grpc_port" json:"grpc_port" env:"OTEL_GRPC_PORT" envDefault:"4317" jsonschema:"minimum=1,maximum=65535" description:"OTLP gRPC port"`
+}
+
+// PrometheusConfig contains Prometheus settings
+type PrometheusConfig struct {
+	// Prometheus port
+	Port int `yaml:"port" json:"port" env:"PROMETHEUS_PORT" envDefault:"9090" jsonschema:"minimum=1,maximum=65535" description:"Prometheus HTTP port"`
+}
+
+// GrafanaConfig contains Grafana settings
+type GrafanaConfig struct {
+	// Grafana port
+	Port int `yaml:"port" json:"port" env:"GRAFANA_PORT" envDefault:"3001" jsonschema:"minimum=1,maximum=65535" description:"Grafana HTTP port"`
+
+	// Grafana admin user
+	AdminUser string `yaml:"admin_user" json:"admin_user" env:"GRAFANA_ADMIN_USER" envDefault:"admin" description:"Grafana admin username"`
+
+	// Grafana admin password (from secrets)
+	AdminPassword string `yaml:"admin_password,omitempty" json:"admin_password,omitempty" env:"GRAFANA_ADMIN_PASSWORD" description:"Grafana admin password (from secret provider)"`
+}
+
+// JaegerConfig contains Jaeger settings
+type JaegerConfig struct {
+	// Jaeger UI port
+	UIPort int `yaml:"ui_port" json:"ui_port" env:"JAEGER_UI_PORT" envDefault:"16686" jsonschema:"minimum=1,maximum=65535" description:"Jaeger UI port"`
+}
+
+// MemoryToolsConfig contains settings for the Memory Tools service
+type MemoryToolsConfig struct {
+	// Enable memory tools
+	Enabled bool `yaml:"enabled" json:"enabled" env:"MEMORY_TOOLS_ENABLED" envDefault:"false" description:"Enable memory tools service"`
+
+	// HTTP port
+	HTTPPort int `yaml:"http_port" json:"http_port" env:"MEMORY_TOOLS_PORT" envDefault:"8090" jsonschema:"minimum=1,maximum=65535" description:"Memory Tools HTTP port"`
+
+	// Embedding service configuration
+	Embedding EmbeddingConfig `yaml:"embedding" json:"embedding"`
+}
+
+// EmbeddingConfig contains settings for the embedding service
+type EmbeddingConfig struct {
+	// Base URL for the embedding service
+	BaseURL string `yaml:"base_url" json:"base_url" env:"EMBEDDING_SERVICE_URL" jsonschema:"format=uri" description:"Embedding service base URL"`
+
+	// API key for the embedding service
+	APIKey string `yaml:"api_key,omitempty" json:"api_key,omitempty" env:"EMBEDDING_SERVICE_API_KEY" description:"Embedding service API key"`
+
+	// Timeout for embedding requests
+	Timeout time.Duration `yaml:"timeout" json:"timeout" env:"EMBEDDING_SERVICE_TIMEOUT" envDefault:"30s" description:"Embedding service timeout"`
+
+	// Validate server on startup
+	ValidateOnStartup bool `yaml:"validate_on_startup" json:"validate_on_startup" env:"EMBEDDING_VALIDATE_ON_STARTUP" envDefault:"true" description:"Validate embedding server on startup"`
+
+	// Expected model ID
+	ExpectedModel string `yaml:"expected_model" json:"expected_model" env:"EMBEDDING_EXPECTED_MODEL" envDefault:"BAAI/bge-m3" description:"Expected embedding model ID"`
+
+	// Expected embedding dimension
+	ExpectedDimension int `yaml:"expected_dimension" json:"expected_dimension" env:"EMBEDDING_EXPECTED_DIMENSION" envDefault:"1024" jsonschema:"minimum=1" description:"Expected embedding dimension"`
+
+	// Retry configuration
+	Retry RetryConfig `yaml:"retry" json:"retry"`
+
+	// Cache configuration
+	Cache CacheConfig `yaml:"cache" json:"cache"`
+
+	// Batch configuration
+	Batch BatchConfig `yaml:"batch" json:"batch"`
+
+	// Circuit breaker configuration
+	CircuitBreaker CircuitBreakerConfig `yaml:"circuit_breaker" json:"circuit_breaker"`
+}
+
+// RetryConfig contains retry settings
+type RetryConfig struct {
+	Enabled        bool          `yaml:"enabled" json:"enabled" env:"EMBEDDING_RETRY_ENABLED" envDefault:"true" description:"Enable retries"`
+	MaxAttempts    int           `yaml:"max_attempts" json:"max_attempts" env:"EMBEDDING_RETRY_MAX_ATTEMPTS" envDefault:"3" jsonschema:"minimum=1" description:"Maximum retry attempts"`
+	InitialBackoff time.Duration `yaml:"initial_backoff" json:"initial_backoff" env:"EMBEDDING_RETRY_INITIAL_BACKOFF" envDefault:"1s" description:"Initial retry backoff"`
+	MaxBackoff     time.Duration `yaml:"max_backoff" json:"max_backoff" env:"EMBEDDING_RETRY_MAX_BACKOFF" envDefault:"10s" description:"Maximum retry backoff"`
+}
+
+// CacheConfig contains cache settings
+type CacheConfig struct {
+	Enabled bool   `yaml:"enabled" json:"enabled" env:"EMBEDDING_CACHE_ENABLED" envDefault:"true" description:"Enable embedding cache"`
+	Type    string `yaml:"type" json:"type" env:"EMBEDDING_CACHE_TYPE" envDefault:"redis" jsonschema:"enum=redis,enum=memory,enum=noop" description:"Cache type (redis, memory, noop)"`
+	Redis   RedisCacheConfig `yaml:"redis" json:"redis"`
+	Memory  MemoryCacheConfig `yaml:"memory" json:"memory"`
+}
+
+// RedisCacheConfig contains Redis cache settings
+type RedisCacheConfig struct {
+	URL       string        `yaml:"url" json:"url" env:"EMBEDDING_CACHE_REDIS_URL" envDefault:"redis://redis:6379/3" jsonschema:"format=uri" description:"Redis connection URL"`
+	KeyPrefix string        `yaml:"key_prefix" json:"key_prefix" env:"EMBEDDING_CACHE_REDIS_PREFIX" envDefault:"emb:" description:"Redis key prefix"`
+	TTL       time.Duration `yaml:"ttl" json:"ttl" env:"EMBEDDING_CACHE_TTL" envDefault:"1h" description:"Cache TTL"`
+}
+
+// MemoryCacheConfig contains in-memory cache settings
+type MemoryCacheConfig struct {
+	MaxSize int           `yaml:"max_size" json:"max_size" env:"EMBEDDING_CACHE_MAX_SIZE" envDefault:"10000" jsonschema:"minimum=1" description:"Maximum cache size"`
+	TTL     time.Duration `yaml:"ttl" json:"ttl" env:"EMBEDDING_CACHE_TTL" envDefault:"1h" description:"Cache TTL"`
+}
+
+// BatchConfig contains batch processing settings
+type BatchConfig struct {
+	Enabled bool          `yaml:"enabled" json:"enabled" env:"EMBEDDING_BATCH_ENABLED" envDefault:"true" description:"Enable batch processing"`
+	MaxSize int           `yaml:"max_size" json:"max_size" env:"EMBEDDING_BATCH_MAX_SIZE" envDefault:"32" jsonschema:"minimum=1" description:"Maximum batch size"`
+	Timeout time.Duration `yaml:"timeout" json:"timeout" env:"EMBEDDING_BATCH_TIMEOUT" envDefault:"5s" description:"Batch timeout"`
+}
+
+// CircuitBreakerConfig contains circuit breaker settings
+type CircuitBreakerConfig struct {
+	Enabled       bool          `yaml:"enabled" json:"enabled" env:"EMBEDDING_CB_ENABLED" envDefault:"true" description:"Enable circuit breaker"`
+	Threshold     int           `yaml:"threshold" json:"threshold" env:"EMBEDDING_CB_THRESHOLD" envDefault:"5" jsonschema:"minimum=1" description:"Failure threshold"`
+	Timeout       time.Duration `yaml:"timeout" json:"timeout" env:"EMBEDDING_CB_TIMEOUT" envDefault:"30s" description:"Circuit breaker timeout"`
+	MaxConcurrent int           `yaml:"max_concurrent" json:"max_concurrent" env:"EMBEDDING_CB_MAX_CONCURRENT" envDefault:"100" jsonschema:"minimum=1" description:"Maximum concurrent requests"`
+}
diff --git a/pkg/observability/README.md b/pkg/observability/README.md
new file mode 100644
index 00000000..256b4393
--- /dev/null
+++ b/pkg/observability/README.md
@@ -0,0 +1,267 @@
+# Observability Library
+
+This package provides a shared observability library for Jan Server services, encapsulating OpenTelemetry (OTEL) setup and providing consistent instrumentation patterns.
+
+## Features
+
+- **Unified Configuration**: Single config structure for all OTEL settings
+- **Automatic Instrumentation**: HTTP middleware, background worker tracking
+- **PII Sanitization**: Built-in privacy controls with tenant-specific hashing
+- **Standard Attributes**: Consistent span/metric attributes across services
+- **Easy Integration**: Drop-in initialization for any Go service
+
+## Quick Start
+
+```go
+package main
+
+import (
+    "context"
+    "log"
+    "net/http"
+
+    "github.com/janhq/jan-server/pkg/config"
+    "github.com/janhq/jan-server/pkg/observability"
+    "github.com/janhq/jan-server/pkg/observability/middleware"
+)
+
+func main() {
+    ctx := context.Background()
+
+    // Load config
+    cfg := config.Load()
+
+    // Initialize observability
+    obsCfg := observability.DefaultConfig("my-service")
+    obsCfg.Environment = cfg.Environment
+    obsCfg.TracingEnabled = cfg.Monitoring.OTEL.TracingEnabled
+    obsCfg.PIILevel = cfg.Monitoring.OTEL.PIILevel
+
+    provider, err := observability.Init(ctx, obsCfg)
+    if err != nil {
+        log.Fatalf("Failed to initialize observability: %v", err)
+    }
+    defer provider.Shutdown(ctx)
+
+    // Setup HTTP server with middleware
+    mux := http.NewServeMux()
+    mux.HandleFunc("/health", handleHealth)
+
+    handler := middleware.HTTPMiddleware(provider.Tracer, provider.Meter, "my-service")(mux)
+
+    log.Fatal(http.ListenAndServe(":8080", handler))
+}
+```
+
+## Configuration
+
+### Environment Variables
+
+```bash
+# Tracing
+ENABLE_TRACING=true
+OTEL_ENABLED=true
+OTEL_SERVICE_NAME=my-service
+OTEL_SERVICE_VERSION=1.0.0
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
+OTEL_TRACES_SAMPLER_ARG=1.0
+
+# Privacy
+TELEMETRY_PII_LEVEL=hashed  # none|hashed|full
+
+# Metrics
+OTEL_METRIC_EXPORT_INTERVAL=15s
+```
+
+### Programmatic Configuration
+
+```go
+cfg := observability.Config{
+    ServiceName:    "my-service",
+    ServiceVersion: "1.0.0",
+    Environment:    "production",
+    TracingEnabled: true,
+    MetricsEnabled: true,
+    OTLPEndpoint:   "http://otel-collector:4318",
+    SamplingRate:   0.1,  // Sample 10% of traces
+    PIILevel:       "hashed",
+}
+```
+
+## Usage Patterns
+
+### Adding Correlation Attributes
+
+```go
+import (
+    "github.com/janhq/jan-server/pkg/observability"
+    "go.opentelemetry.io/otel/trace"
+)
+
+func handleRequest(w http.ResponseWriter, r *http.Request) {
+    ctx := r.Context()
+    span := trace.SpanFromContext(ctx)
+
+    // Add standard correlation attributes
+    observability.AddConversationAttrsToSpan(
+        span,
+        conversationID,
+        tenantID,
+        userID,
+        provider.Sanitizer,
+    )
+
+    // Add LLM-specific attributes
+    span.SetAttributes(observability.WithLLMAttrs(
+        "gpt-4",
+        1500, // prompt tokens
+        300,  // completion tokens
+    )...)
+}
+```
+
+### Instrumenting Background Workers
+
+```go
+import (
+    "github.com/janhq/jan-server/pkg/observability/worker"
+)
+
+func main() {
+    // ... init provider ...
+
+    instrumenter, err := worker.NewWorkerInstrumenter(
+        provider.Tracer,
+        provider.Meter,
+        "my-service",
+    )
+
+    // Use in worker pool
+    err = instrumenter.InstrumentJob(ctx, "webhook", jobID, func(ctx context.Context) error {
+        // Your job logic here
+        return sendWebhook(ctx, payload)
+    })
+}
+```
+
+### PII Sanitization
+
+```go
+// Sanitize user prompts
+sanitizedPrompt := provider.Sanitizer.SanitizePrompt(userPrompt)
+
+// Sanitize user IDs
+hashedUserID := provider.Sanitizer.SanitizeUserID(userID)
+
+// Sanitize metadata
+sanitizedMetadata := provider.Sanitizer.SanitizeMetadata(metadata)
+```
+
+## Standard Attributes
+
+All services should use these standard attributes for correlation:
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `conversation_id` | string | Unique conversation identifier |
+| `tenant_id` | string | Tenant identifier |
+| `user_id` | string | Sanitized user identifier |
+| `request_id` | string | Unique request identifier |
+| `llm.model` | string | LLM model name |
+| `llm.tokens.prompt` | int64 | Prompt token count |
+| `llm.tokens.completion` | int64 | Completion token count |
+| `mcp.tool.name` | string | MCP tool name |
+| `prompt.category` | string | Prompt category |
+| `prompt.persona` | string | Prompt persona |
+| `prompt.language` | string | Prompt language |
+
+## Metrics Naming Convention
+
+Follow the pattern: `jan_<service>_<metric>_<unit>`
+
+Examples:
+- `jan_llm_api_request_duration_seconds`
+- `jan_response_api_queue_depth`
+- `jan_media_api_s3_errors_total`
+
+## Privacy Levels
+
+### None (`PIILevel = "none"`)
+- All user content redacted as `[REDACTED]`
+- Maximum privacy, minimal debugging utility
+
+### Hashed (`PIILevel = "hashed"`) - Default
+- PII detected and replaced with tenant-specific hashes
+- Emails: `[EMAIL:a1b2c3d4]`
+- Phones: `[PHONE:e5f6g7h8]`
+- SSNs/Credit Cards: `[SSN:REDACTED]`, `[CC:REDACTED]`
+- User IDs: 8-character hash
+- Balances privacy and debugging
+
+### Full (`PIILevel = "full"`)
+- No sanitization
+- Use only in development/testing
+- Never use in production
+
+## Testing
+
+```bash
+# Run observability tests
+cd pkg/observability
+go test -v ./...
+```
+
+## Architecture
+
+```
+pkg/observability/
+├── config.go           # Configuration structures
+├── provider.go         # OTEL provider initialization
+├── attributes.go       # Standard attribute helpers
+├── middleware/
+│   └── http.go        # HTTP instrumentation
+└── worker/
+    └── worker.go      # Background job instrumentation
+
+pkg/telemetry/
+├── sanitizer.go        # PII detection and hashing
+└── sanitizer_test.go   # Comprehensive test suite
+```
+
+## Best Practices
+
+1. **Always sanitize user content** before adding to spans/metrics
+2. **Use standard attributes** for correlation across services
+3. **Sample in production** - Set `SamplingRate < 1.0` for high-traffic services
+4. **Include request IDs** for cross-service trace correlation
+5. **Test with PII** - Verify sanitization catches real-world patterns
+6. **Monitor overhead** - Keep instrumentation latency <100ms P95
+
+## Troubleshooting
+
+### Spans not appearing in Jaeger
+
+1. Check OTEL Collector health: `curl http://otel-collector:13133/`
+2. Verify sampling rate: `OTEL_TRACES_SAMPLER_ARG=1.0`
+3. Check service logs for export errors
+4. Verify trace propagation headers: `traceparent`, `tracestate`
+
+### High memory usage
+
+1. Reduce batch size in provider.go
+2. Lower sampling rate
+3. Increase metric export interval
+
+### PII leaking to telemetry
+
+1. Verify `PIILevel` is set to `hashed` or `none`
+2. Check sanitizer is initialized correctly
+3. Review custom span attributes for unsanitized data
+4. Run `sanitizer_test.go` to validate patterns
+
+## Related Documentation
+
+- [Monitoring Guide](../../docs/guides/monitoring.md)
+- [Monitoring Runbook](../../docs/runbooks/monitoring.md)
+- [Observability Conventions](../../docs/conventions/observability.md)
+- [Security Policy](../../docs/architecture/security.md)
diff --git a/pkg/observability/attributes.go b/pkg/observability/attributes.go
new file mode 100644
index 00000000..c9fb86b4
--- /dev/null
+++ b/pkg/observability/attributes.go
@@ -0,0 +1,96 @@
+package observability
+
+import (
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/trace"
+
+	"github.com/janhq/jan-server/pkg/telemetry"
+)
+
+// Standard attribute keys
+const (
+	AttrConversationID   = "conversation_id"
+	AttrTenantID         = "tenant_id"
+	AttrUserID           = "user_id"
+	AttrRequestID        = "request_id"
+	AttrModel            = "llm.model"
+	AttrTokensPrompt     = "llm.tokens.prompt"
+	AttrTokensCompletion = "llm.tokens.completion"
+	AttrToolName         = "mcp.tool.name"
+	AttrPromptCategory   = "prompt.category"
+	AttrPromptPersona    = "prompt.persona"
+	AttrPromptLanguage   = "prompt.language"
+)
+
+// WithConversationAttrs returns standard attributes for correlation
+func WithConversationAttrs(conversationID, tenantID, userID string, sanitizer *telemetry.Sanitizer) []attribute.KeyValue {
+	attrs := []attribute.KeyValue{
+		attribute.String(AttrConversationID, conversationID),
+	}
+
+	if tenantID != "" {
+		attrs = append(attrs, attribute.String(AttrTenantID, tenantID))
+	}
+
+	if userID != "" && sanitizer != nil {
+		attrs = append(attrs, attribute.String(AttrUserID, sanitizer.SanitizeUserID(userID)))
+	}
+
+	return attrs
+}
+
+// AddConversationAttrsToSpan adds correlation attributes to current span
+func AddConversationAttrsToSpan(span trace.Span, conversationID, tenantID, userID string, sanitizer *telemetry.Sanitizer) {
+	if span == nil {
+		return
+	}
+	span.SetAttributes(WithConversationAttrs(conversationID, tenantID, userID, sanitizer)...)
+}
+
+// WithLLMAttrs returns LLM-specific attributes
+func WithLLMAttrs(model string, promptTokens, completionTokens int64) []attribute.KeyValue {
+	attrs := []attribute.KeyValue{}
+
+	if model != "" {
+		attrs = append(attrs, attribute.String(AttrModel, model))
+	}
+
+	if promptTokens > 0 {
+		attrs = append(attrs, attribute.Int64(AttrTokensPrompt, promptTokens))
+	}
+
+	if completionTokens > 0 {
+		attrs = append(attrs, attribute.Int64(AttrTokensCompletion, completionTokens))
+	}
+
+	return attrs
+}
+
+// WithPromptMetadata returns prompt classification attributes
+func WithPromptMetadata(category, persona, language string) []attribute.KeyValue {
+	attrs := []attribute.KeyValue{}
+
+	if category != "" {
+		attrs = append(attrs, attribute.String(AttrPromptCategory, category))
+	}
+
+	if persona != "" {
+		attrs = append(attrs, attribute.String(AttrPromptPersona, persona))
+	}
+
+	if language != "" {
+		attrs = append(attrs, attribute.String(AttrPromptLanguage, language))
+	}
+
+	return attrs
+}
+
+// WithRequestID returns a request ID attribute
+func WithRequestID(requestID string) attribute.KeyValue {
+	return attribute.String(AttrRequestID, requestID)
+}
+
+// WithToolName returns a tool name attribute
+func WithToolName(toolName string) attribute.KeyValue {
+	return attribute.String(AttrToolName, toolName)
+}
diff --git a/pkg/observability/config.go b/pkg/observability/config.go
new file mode 100644
index 00000000..2656be29
--- /dev/null
+++ b/pkg/observability/config.go
@@ -0,0 +1,43 @@
+package observability
+
+import (
+	"time"
+
+	"go.opentelemetry.io/otel/attribute"
+)
+
+// Config wraps monitoring settings from pkg/config
+type Config struct {
+	ServiceName    string
+	ServiceVersion string
+	Environment    string // dev, staging, production
+	TracingEnabled bool
+	MetricsEnabled bool
+	OTLPEndpoint   string
+	OTLPHeaders    map[string]string
+	SamplingRate   float64 // 0.0 - 1.0
+	PIILevel       string  // none|hashed|full
+	MetricsPort    int
+
+	// Advanced settings
+	TraceBatchTimeout time.Duration
+	MetricInterval    time.Duration
+	ResourceAttrs     []attribute.KeyValue
+}
+
+// DefaultConfig returns sensible defaults
+func DefaultConfig(serviceName string) Config {
+	return Config{
+		ServiceName:       serviceName,
+		ServiceVersion:    "unknown",
+		Environment:       "development",
+		TracingEnabled:    true,
+		MetricsEnabled:    true,
+		OTLPEndpoint:      "http://otel-collector:4318",
+		SamplingRate:      1.0,
+		PIILevel:          "hashed",
+		MetricsPort:       8080,
+		TraceBatchTimeout: 5 * time.Second,
+		MetricInterval:    15 * time.Second,
+	}
+}
diff --git a/pkg/observability/middleware/http.go b/pkg/observability/middleware/http.go
new file mode 100644
index 00000000..2e31f793
--- /dev/null
+++ b/pkg/observability/middleware/http.go
@@ -0,0 +1,77 @@
+package middleware
+
+import (
+	"fmt"
+	"net/http"
+	"time"
+
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/metric"
+	semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
+	"go.opentelemetry.io/otel/trace"
+)
+
+// HTTPMiddleware instruments HTTP handlers
+func HTTPMiddleware(tracer trace.Tracer, meter metric.Meter, serviceName string) func(http.Handler) http.Handler {
+	// Create metrics
+	requestDuration, _ := meter.Float64Histogram(
+		fmt.Sprintf("jan_%s_request_duration_seconds", serviceName),
+		metric.WithDescription("HTTP request duration in seconds"),
+		metric.WithUnit("s"),
+	)
+
+	requestsTotal, _ := meter.Int64Counter(
+		fmt.Sprintf("jan_%s_requests_total", serviceName),
+		metric.WithDescription("Total HTTP requests"),
+	)
+
+	return func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			start := time.Now()
+
+			// Start span
+			ctx, span := tracer.Start(r.Context(), r.URL.Path,
+				trace.WithSpanKind(trace.SpanKindServer),
+				trace.WithAttributes(
+					semconv.HTTPMethod(r.Method),
+					semconv.HTTPRoute(r.URL.Path),
+					semconv.HTTPScheme(r.URL.Scheme),
+				),
+			)
+			defer span.End()
+
+			// Wrap response writer to capture status
+			rw := &responseWriter{ResponseWriter: w, statusCode: http.StatusOK}
+
+			// Process request
+			next.ServeHTTP(rw, r.WithContext(ctx))
+
+			// Record metrics
+			duration := time.Since(start).Seconds()
+			attrs := metric.WithAttributes(
+				attribute.String("method", r.Method),
+				attribute.String("route", r.URL.Path),
+				attribute.Int("status", rw.statusCode),
+			)
+
+			requestDuration.Record(ctx, duration, attrs)
+			requestsTotal.Add(ctx, 1, attrs)
+
+			// Add status to span
+			span.SetAttributes(semconv.HTTPStatusCode(rw.statusCode))
+			if rw.statusCode >= 400 {
+				span.RecordError(fmt.Errorf("HTTP %d", rw.statusCode))
+			}
+		})
+	}
+}
+
+type responseWriter struct {
+	http.ResponseWriter
+	statusCode int
+}
+
+func (rw *responseWriter) WriteHeader(code int) {
+	rw.statusCode = code
+	rw.ResponseWriter.WriteHeader(code)
+}
diff --git a/pkg/observability/provider.go b/pkg/observability/provider.go
new file mode 100644
index 00000000..8a2f866d
--- /dev/null
+++ b/pkg/observability/provider.go
@@ -0,0 +1,146 @@
+package observability
+
+import (
+	"context"
+	"fmt"
+
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
+	"go.opentelemetry.io/otel/metric"
+	"go.opentelemetry.io/otel/propagation"
+	sdkmetric "go.opentelemetry.io/otel/sdk/metric"
+	"go.opentelemetry.io/otel/sdk/resource"
+	sdktrace "go.opentelemetry.io/otel/sdk/trace"
+	semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
+	"go.opentelemetry.io/otel/trace"
+
+	"github.com/janhq/jan-server/pkg/telemetry"
+)
+
+// Provider holds initialized OTEL components
+type Provider struct {
+	Tracer         trace.Tracer
+	Meter          metric.Meter
+	TracerProvider *sdktrace.TracerProvider
+	MeterProvider  *sdkmetric.MeterProvider
+	Sanitizer      *telemetry.Sanitizer
+
+	shutdownFuncs []func(context.Context) error
+}
+
+// Init initializes OTEL for a service
+func Init(ctx context.Context, cfg Config) (*Provider, error) {
+	provider := &Provider{
+		Sanitizer: telemetry.NewSanitizer(
+			telemetry.PIILevel(cfg.PIILevel),
+			cfg.ServiceName,
+		),
+	}
+
+	// Create resource with service metadata
+	res, err := resource.New(ctx,
+		resource.WithAttributes(
+			semconv.ServiceName(cfg.ServiceName),
+			semconv.ServiceVersion(cfg.ServiceVersion),
+			semconv.DeploymentEnvironment(cfg.Environment),
+		),
+		resource.WithAttributes(cfg.ResourceAttrs...),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create resource: %w", err)
+	}
+
+	// Initialize tracing if enabled
+	if cfg.TracingEnabled {
+		tp, err := initTracerProvider(ctx, cfg, res)
+		if err != nil {
+			return nil, fmt.Errorf("failed to init tracer: %w", err)
+		}
+		provider.TracerProvider = tp
+		provider.Tracer = tp.Tracer(cfg.ServiceName)
+		provider.shutdownFuncs = append(provider.shutdownFuncs, tp.Shutdown)
+
+		// Set global tracer provider
+		otel.SetTracerProvider(tp)
+
+		// Set propagator for trace context
+		otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
+			propagation.TraceContext{},
+			propagation.Baggage{},
+		))
+	}
+
+	// Initialize metrics if enabled
+	if cfg.MetricsEnabled {
+		mp, err := initMeterProvider(ctx, cfg, res)
+		if err != nil {
+			return nil, fmt.Errorf("failed to init meter: %w", err)
+		}
+		provider.MeterProvider = mp
+		provider.Meter = mp.Meter(cfg.ServiceName)
+		provider.shutdownFuncs = append(provider.shutdownFuncs, mp.Shutdown)
+
+		// Set global meter provider
+		otel.SetMeterProvider(mp)
+	}
+
+	return provider, nil
+}
+
+// Shutdown gracefully shuts down all providers
+func (p *Provider) Shutdown(ctx context.Context) error {
+	for _, shutdown := range p.shutdownFuncs {
+		if err := shutdown(ctx); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func initTracerProvider(ctx context.Context, cfg Config, res *resource.Resource) (*sdktrace.TracerProvider, error) {
+	exporter, err := otlptracehttp.New(ctx,
+		otlptracehttp.WithEndpoint(cfg.OTLPEndpoint),
+		otlptracehttp.WithHeaders(cfg.OTLPHeaders),
+		otlptracehttp.WithInsecure(), // TODO: Use TLS in production
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	sampler := sdktrace.ParentBased(
+		sdktrace.TraceIDRatioBased(cfg.SamplingRate),
+	)
+
+	tp := sdktrace.NewTracerProvider(
+		sdktrace.WithBatcher(exporter,
+			sdktrace.WithBatchTimeout(cfg.TraceBatchTimeout),
+		),
+		sdktrace.WithResource(res),
+		sdktrace.WithSampler(sampler),
+	)
+
+	return tp, nil
+}
+
+func initMeterProvider(ctx context.Context, cfg Config, res *resource.Resource) (*sdkmetric.MeterProvider, error) {
+	exporter, err := otlpmetrichttp.New(ctx,
+		otlpmetrichttp.WithEndpoint(cfg.OTLPEndpoint),
+		otlpmetrichttp.WithHeaders(cfg.OTLPHeaders),
+		otlpmetrichttp.WithInsecure(), // TODO: Use TLS in production
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	mp := sdkmetric.NewMeterProvider(
+		sdkmetric.WithReader(
+			sdkmetric.NewPeriodicReader(exporter,
+				sdkmetric.WithInterval(cfg.MetricInterval),
+			),
+		),
+		sdkmetric.WithResource(res),
+	)
+
+	return mp, nil
+}
diff --git a/pkg/observability/worker/worker.go b/pkg/observability/worker/worker.go
new file mode 100644
index 00000000..510d9c19
--- /dev/null
+++ b/pkg/observability/worker/worker.go
@@ -0,0 +1,106 @@
+package worker
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/metric"
+	"go.opentelemetry.io/otel/trace"
+)
+
+// WorkerInstrumenter instruments background workers
+type WorkerInstrumenter struct {
+	tracer        trace.Tracer
+	workersActive metric.Int64UpDownCounter
+	workersIdle   metric.Int64UpDownCounter
+	jobDuration   metric.Float64Histogram
+	jobsTotal     metric.Int64Counter
+}
+
+// NewWorkerInstrumenter creates a new worker instrumenter
+func NewWorkerInstrumenter(tracer trace.Tracer, meter metric.Meter, serviceName string) (*WorkerInstrumenter, error) {
+	workersActive, err := meter.Int64UpDownCounter(
+		fmt.Sprintf("jan_%s_workers_active", serviceName),
+		metric.WithDescription("Number of active workers"),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	workersIdle, err := meter.Int64UpDownCounter(
+		fmt.Sprintf("jan_%s_workers_idle", serviceName),
+		metric.WithDescription("Number of idle workers"),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	jobDuration, err := meter.Float64Histogram(
+		fmt.Sprintf("jan_%s_job_duration_seconds", serviceName),
+		metric.WithDescription("Background job duration"),
+		metric.WithUnit("s"),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	jobsTotal, err := meter.Int64Counter(
+		fmt.Sprintf("jan_%s_jobs_total", serviceName),
+		metric.WithDescription("Total background jobs processed"),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	return &WorkerInstrumenter{
+		tracer:        tracer,
+		workersActive: workersActive,
+		workersIdle:   workersIdle,
+		jobDuration:   jobDuration,
+		jobsTotal:     jobsTotal,
+	}, nil
+}
+
+// InstrumentJob wraps a job execution with observability
+func (w *WorkerInstrumenter) InstrumentJob(ctx context.Context, jobType string, jobID string, fn func(context.Context) error) error {
+	// Update worker status
+	w.workersIdle.Add(ctx, -1)
+	w.workersActive.Add(ctx, 1)
+	defer func() {
+		w.workersActive.Add(ctx, -1)
+		w.workersIdle.Add(ctx, 1)
+	}()
+
+	// Start span
+	ctx, span := w.tracer.Start(ctx, fmt.Sprintf("worker.%s", jobType),
+		trace.WithAttributes(
+			attribute.String("job.type", jobType),
+			attribute.String("job.id", jobID),
+		),
+	)
+	defer span.End()
+
+	// Execute job
+	start := time.Now()
+	err := fn(ctx)
+	duration := time.Since(start).Seconds()
+
+	// Record metrics
+	status := "success"
+	if err != nil {
+		status = "error"
+		span.RecordError(err)
+	}
+
+	attrs := metric.WithAttributes(
+		attribute.String("job.type", jobType),
+		attribute.String("status", status),
+	)
+
+	w.jobDuration.Record(ctx, duration, attrs)
+	w.jobsTotal.Add(ctx, 1, attrs)
+
+	return err
+}
diff --git a/pkg/telemetry/sanitizer.go b/pkg/telemetry/sanitizer.go
new file mode 100644
index 00000000..c5eedd64
--- /dev/null
+++ b/pkg/telemetry/sanitizer.go
@@ -0,0 +1,156 @@
+package telemetry
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+	"regexp"
+)
+
+// PIILevel defines the level of PII sanitization
+type PIILevel string
+
+const (
+	// PIILevelNone redacts all user content
+	PIILevelNone PIILevel = "none"
+	// PIILevelHashed hashes PII with tenant salt
+	PIILevelHashed PIILevel = "hashed"
+	// PIILevelFull performs no sanitization
+	PIILevelFull PIILevel = "full"
+)
+
+// Sanitizer handles PII detection and sanitization for telemetry
+type Sanitizer struct {
+	level      PIILevel
+	tenantSalt string
+
+	// Regex patterns for PII detection
+	emailPattern      *regexp.Regexp
+	phonePattern      *regexp.Regexp
+	ssnPattern        *regexp.Regexp
+	creditCardPattern *regexp.Regexp
+	ipv4Pattern       *regexp.Regexp
+	ipv6Pattern       *regexp.Regexp
+}
+
+// NewSanitizer creates a new PII sanitizer with tenant-specific salt
+func NewSanitizer(level PIILevel, tenantID string) *Sanitizer {
+	return &Sanitizer{
+		level:             level,
+		tenantSalt:        tenantID,
+		emailPattern:      regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`),
+		phonePattern:      regexp.MustCompile(`\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b`),
+		ssnPattern:        regexp.MustCompile(`\b\d{3}-\d{2}-\d{4}\b`),
+		creditCardPattern: regexp.MustCompile(`\b\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}\b`),
+		ipv4Pattern:       regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`),
+		ipv6Pattern:       regexp.MustCompile(`\b(?:[A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4}\b`),
+	}
+}
+
+// SanitizePrompt sanitizes a user prompt based on the configured PII level
+func (s *Sanitizer) SanitizePrompt(input string) string {
+	switch s.level {
+	case PIILevelNone:
+		return "[REDACTED]"
+	case PIILevelHashed:
+		return s.hashPII(input)
+	case PIILevelFull:
+		return input
+	default:
+		// Default to hashed for safety
+		return s.hashPII(input)
+	}
+}
+
+// SanitizeResponse sanitizes an LLM response based on the configured PII level
+func (s *Sanitizer) SanitizeResponse(response string) string {
+	// Use same logic as prompts
+	return s.SanitizePrompt(response)
+}
+
+// hashPII detects and hashes PII in the input string
+func (s *Sanitizer) hashPII(input string) string {
+	result := input
+
+	// Replace emails
+	result = s.emailPattern.ReplaceAllStringFunc(result, func(match string) string {
+		return fmt.Sprintf("[EMAIL:%s]", s.hash(match))
+	})
+
+	// Replace phone numbers
+	result = s.phonePattern.ReplaceAllStringFunc(result, func(match string) string {
+		return fmt.Sprintf("[PHONE:%s]", s.hash(match))
+	})
+
+	// Replace SSNs
+	result = s.ssnPattern.ReplaceAllStringFunc(result, func(match string) string {
+		return "[SSN:REDACTED]"
+	})
+
+	// Replace credit cards
+	result = s.creditCardPattern.ReplaceAllStringFunc(result, func(match string) string {
+		return "[CC:REDACTED]"
+	})
+
+	// Replace IPv4 addresses
+	result = s.ipv4Pattern.ReplaceAllStringFunc(result, func(match string) string {
+		return fmt.Sprintf("[IP:%s]", s.hash(match))
+	})
+
+	// Replace IPv6 addresses
+	result = s.ipv6Pattern.ReplaceAllStringFunc(result, func(match string) string {
+		return fmt.Sprintf("[IP:%s]", s.hash(match))
+	})
+
+	return result
+}
+
+// hash creates a SHA-256 hash with tenant salt
+func (s *Sanitizer) hash(data string) string {
+	h := sha256.New()
+	h.Write([]byte(data + s.tenantSalt))
+	hash := hex.EncodeToString(h.Sum(nil))
+	// Return first 8 chars for readability
+	return hash[:8]
+}
+
+// SanitizeUserID sanitizes a user ID based on the configured PII level
+func (s *Sanitizer) SanitizeUserID(userID string) string {
+	if userID == "" {
+		return ""
+	}
+
+	switch s.level {
+	case PIILevelNone:
+		return "[REDACTED]"
+	case PIILevelHashed:
+		return s.hash(userID)
+	case PIILevelFull:
+		return userID
+	default:
+		return s.hash(userID)
+	}
+}
+
+// SanitizeTenantID sanitizes a tenant ID (usually just hashed)
+func (s *Sanitizer) SanitizeTenantID(tenantID string) string {
+	if tenantID == "" {
+		return ""
+	}
+
+	// Tenant IDs are less sensitive, always hash
+	return s.hash(tenantID)
+}
+
+// SanitizeMetadata sanitizes a map of metadata
+func (s *Sanitizer) SanitizeMetadata(metadata map[string]string) map[string]string {
+	if metadata == nil {
+		return nil
+	}
+
+	result := make(map[string]string, len(metadata))
+	for k, v := range metadata {
+		result[k] = s.SanitizePrompt(v)
+	}
+	return result
+}
diff --git a/scripts/run.sh b/scripts/run.sh
deleted file mode 100644
index 950d10c8..00000000
--- a/scripts/run.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-set -e
-minikube start
-eval $(minikube docker-env)
-
-docker build -t menloltd/jan-server:latest ./apps/jan-api-gateway
-
-helm dependency update ./charts/jan-server
-helm install jan-server ./charts/jan-server --set gateway.image.tag=latest
-
-kubectl port-forward svc/jan-server-jan-api-gateway 8080:8080
-# helm uninstall jan-server
-# check http://localhost:8080/api/swagger/index.html#/
\ No newline at end of file
diff --git a/services/llm-api/Dockerfile b/services/llm-api/Dockerfile
new file mode 100644
index 00000000..bbd06d28
--- /dev/null
+++ b/services/llm-api/Dockerfile
@@ -0,0 +1,23 @@
+ARG GO_VERSION=1.25
+
+FROM golang:${GO_VERSION} AS builder
+WORKDIR /src
+COPY go.mod go.sum ./
+RUN go mod download
+COPY . ./
+RUN go mod tidy
+RUN CGO_ENABLED=0 GOOS=linux go build -o /out/llm-api ./cmd/server
+
+FROM debian:bookworm-slim
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ca-certificates curl && \
+    rm -rf /var/lib/apt/lists/*
+RUN useradd --system --home /app --no-create-home --uid 10001 appuser
+WORKDIR /app
+COPY --from=builder /out/llm-api /app/llm-api
+COPY config /app/config
+COPY docs /app/docs
+COPY migrations /app/migrations
+USER appuser
+ENTRYPOINT ["/app/llm-api"]
diff --git a/services/llm-api/Makefile b/services/llm-api/Makefile
new file mode 100644
index 00000000..1143b695
--- /dev/null
+++ b/services/llm-api/Makefile
@@ -0,0 +1,22 @@
+.PHONY: install
+install:
+	@echo "🔧 Installing tools and setting up hooks..."
+	@go run scripts/install_hooks.go
+
+.PHONY: doc
+doc:
+	@swag init --parseDependency -g cmd/server/server.go -o swagger
+
+.PHONY: wire
+wire:
+	@wire ./cmd/server
+
+.PHONY: gormgen
+gormgen:
+	@echo "🔄 Generating GORM gen code..."
+	@go run cmd/gormgen/gormgen.go
+
+.PHONY: setup
+setup:
+	@make doc
+	@make wire
diff --git a/services/llm-api/cmd/gormgen/gormgen.go b/services/llm-api/cmd/gormgen/gormgen.go
new file mode 100644
index 00000000..e1f8f8c8
--- /dev/null
+++ b/services/llm-api/cmd/gormgen/gormgen.go
@@ -0,0 +1,50 @@
+package main
+
+import (
+	"os"
+
+	"gorm.io/driver/postgres"
+	"gorm.io/gen"
+	"gorm.io/gorm"
+	"gorm.io/gorm/schema"
+
+	"jan-server/services/llm-api/internal/infrastructure/database"
+	_ "jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+)
+
+var GormGenerator *gen.Generator
+
+func init() {
+	// Get database DSN from environment - fail fast if not set
+	databaseDSN := os.Getenv("DB_POSTGRESQL_WRITE_DSN")
+	if databaseDSN == "" {
+		panic("DB_POSTGRESQL_WRITE_DSN environment variable is required")
+	}
+
+	// Connect directly without table prefix for schema inspection
+	db, err := gorm.Open(postgres.Open(databaseDSN), &gorm.Config{
+		NamingStrategy: schema.NamingStrategy{
+			SingularTable: false,
+		},
+	})
+	if err != nil {
+		panic(err)
+	}
+
+	GormGenerator = gen.NewGenerator(gen.Config{
+		OutPath:       "./internal/infrastructure/database/gormgen",
+		Mode:          gen.WithDefaultQuery | gen.WithQueryInterface | gen.WithoutContext,
+		FieldNullable: true,
+	})
+	GormGenerator.UseDB(db)
+}
+
+func main() {
+	for _, model := range database.SchemaRegistry {
+		GormGenerator.ApplyBasic(model)
+		type Querier interface {
+		}
+		GormGenerator.ApplyInterface(func(Querier) {}, model)
+	}
+	GormGenerator.Execute()
+}
diff --git a/services/llm-api/cmd/server/dataInitializer.go b/services/llm-api/cmd/server/dataInitializer.go
new file mode 100644
index 00000000..df5a1403
--- /dev/null
+++ b/services/llm-api/cmd/server/dataInitializer.go
@@ -0,0 +1,146 @@
+package main
+
+import (
+	"context"
+	"fmt"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/infrastructure/inference"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type DataInitializer struct {
+	provider            *model.ProviderService
+	modelCatalogService *model.ModelCatalogService
+	inferenceProvider   *inference.InferenceProvider
+}
+
+func (d *DataInitializer) Install(ctx context.Context) error {
+	cfg := config.GetGlobal()
+
+	if entries := cfg.ProviderBootstrapEntries(); len(entries) > 0 {
+		if err := d.setupConfiguredProviders(ctx, entries); err != nil {
+			return err
+		}
+		return nil
+	}
+
+	if cfg.JanDefaultNodeSetup {
+		if err := d.setupJanDefaultProvider(ctx); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (d *DataInitializer) setupConfiguredProviders(ctx context.Context, entries []config.ProviderBootstrapEntry) error {
+	for i := range entries {
+		entry := entries[i]
+		if err := d.bootstrapProvider(ctx, entry); err != nil {
+			return platformerrors.AsError(ctx, platformerrors.LayerDomain, err, fmt.Sprintf("failed to bootstrap provider %q", entry.Name))
+		}
+	}
+	return nil
+}
+
+func (d *DataInitializer) setupJanDefaultProvider(ctx context.Context) error {
+	entry := config.ProviderBootstrapEntry{
+		Name:    "vLLM Provider",
+		Vendor:  string(model.ProviderJan),
+		BaseURL: config.GetGlobal().JanDefaultNodeURL,
+		APIKey:  config.GetGlobal().JanDefaultNodeAPIKey,
+		Active:  true,
+		Metadata: map[string]string{
+			"description":            "Default access to vLLM Provider",
+			"auto_enable_new_models": "true",
+		},
+		AutoEnableNewModels: true,
+		SyncModels:          true,
+	}
+
+	if err := d.bootstrapProvider(ctx, entry); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to setup Jan provider")
+	}
+	return nil
+}
+
+func (d *DataInitializer) bootstrapProvider(ctx context.Context, entry config.ProviderBootstrapEntry) error {
+	provider, err := d.ensureProvider(ctx, entry)
+	if err != nil {
+		return err
+	}
+
+	if !entry.SyncModels {
+		return nil
+	}
+
+	models, err := d.inferenceProvider.ListModels(ctx, provider)
+	if err != nil {
+		return err
+	}
+
+	_, err = d.provider.SyncProviderModelsWithOptions(ctx, provider, models, entry.AutoEnableNewModels)
+	return err
+}
+
+func (d *DataInitializer) ensureProvider(ctx context.Context, entry config.ProviderBootstrapEntry) (*model.Provider, error) {
+	kind := model.ProviderKindFromVendor(entry.Vendor)
+	metadata := cloneMetadata(entry.Metadata)
+
+	if kind == model.ProviderCustom {
+		return d.provider.UpsertProvider(ctx, model.UpsertProviderInput{
+			Name:     entry.Name,
+			Vendor:   entry.Vendor,
+			BaseURL:  entry.BaseURL,
+			APIKey:   entry.APIKey,
+			Metadata: metadata,
+			Active:   entry.Active,
+		})
+	}
+
+	existing, err := d.provider.FindProviderByVendor(ctx, entry.Vendor)
+	if err != nil {
+		return nil, err
+	}
+
+	if existing == nil {
+		return d.provider.RegisterProvider(ctx, model.RegisterProviderInput{
+			Name:     entry.Name,
+			Vendor:   entry.Vendor,
+			BaseURL:  entry.BaseURL,
+			APIKey:   entry.APIKey,
+			Metadata: metadata,
+			Active:   entry.Active,
+		})
+	}
+
+	updateMetadata := metadata
+	updateInput := model.UpdateProviderInput{
+		BaseURL:  &entry.BaseURL,
+		APIKey:   &entry.APIKey,
+		Metadata: &updateMetadata,
+		Active:   &entry.Active,
+	}
+	if entry.Name != "" && entry.Name != existing.DisplayName {
+		updateInput.Name = &entry.Name
+	}
+
+	updated, err := d.provider.UpdateProvider(ctx, existing, updateInput)
+	if err != nil {
+		return nil, err
+	}
+	return updated, nil
+}
+
+func cloneMetadata(src map[string]string) map[string]string {
+	if len(src) == 0 {
+		return nil
+	}
+	dst := make(map[string]string, len(src))
+	for k, v := range src {
+		dst[k] = v
+	}
+	return dst
+}
diff --git a/services/llm-api/cmd/server/server.go b/services/llm-api/cmd/server/server.go
new file mode 100644
index 00000000..71ddd457
--- /dev/null
+++ b/services/llm-api/cmd/server/server.go
@@ -0,0 +1,120 @@
+package main
+
+import (
+	"context"
+	"net/http"
+	"time"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/infrastructure/crontab"
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+	"jan-server/services/llm-api/internal/infrastructure/observability"
+	"jan-server/services/llm-api/internal/interfaces/httpserver"
+
+	"golang.org/x/sync/errgroup"
+
+	_ "net/http/pprof"
+)
+
+type Application struct {
+	httpServer *httpserver.HTTPServer
+	crontab    *crontab.Crontab
+}
+
+func init() {
+	logger.GetLogger()
+	_, err := config.Load()
+	if err != nil {
+		log := logger.GetLogger()
+		log.Fatal().Err(err).Msg("failed to load config")
+	}
+}
+
+// @title Jan Server LLM API
+// @version 2.0
+// @description OpenAI-compatible LLM API platform with enterprise authentication, conversation management, and streaming support.
+// @contact.name Jan Server Team
+// @contact.url https://github.com/janhq/jan-server
+// @BasePath /
+
+// @securityDefinitions.apikey BearerAuth
+// @in header
+// @name Authorization
+// @description Type "Bearer" followed by a space and JWT token.
+func (application *Application) Start() {
+	background := context.Background()
+	ctx, cancel := context.WithCancel(background)
+	defer cancel()
+
+	var eg errgroup.Group
+	eg.Go(func() error {
+		err := http.ListenAndServe("0.0.0.0:6060", nil)
+		if err != nil {
+			cancel()
+		}
+		return err
+	})
+	eg.Go(func() error {
+		err := application.crontab.Run(ctx)
+		if err != nil {
+			cancel()
+		}
+		return err
+	})
+	eg.Go(func() error {
+		err := application.httpServer.Run()
+		if err != nil {
+			cancel()
+		}
+		return err
+	})
+
+	if err := eg.Wait(); err != nil {
+		panic(err)
+	}
+}
+
+func main() {
+	ctx := context.Background()
+	log := logger.GetLogger()
+
+	cfg := config.GetGlobal()
+	if cfg == nil {
+		log.Fatal().Msg("config not loaded")
+	}
+
+	application, err := CreateApplication()
+	if err != nil {
+		log.Fatal().Err(err).Msg("create application")
+	}
+
+	dataInitializer, err := CreateDataInitializer()
+	if err != nil {
+		log.Fatal().Err(err).Msg("create data initializer")
+	}
+
+	jwksURL, err := cfg.ResolveJWKSURL(ctx)
+	if err != nil {
+		log.Fatal().Err(err).Msg("resolve jwks url")
+	}
+	_ = jwksURL // Will be used by auth middleware
+
+	otelShutdown, err := observability.Setup(ctx, cfg, log)
+	if err != nil {
+		log.Error().Err(err).Msg("initialize observability")
+	} else {
+		defer func() {
+			shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+			defer cancel()
+			if err := otelShutdown(shutdownCtx); err != nil {
+				log.Error().Err(err).Msg("shutdown telemetry")
+			}
+		}()
+	}
+
+	if err := dataInitializer.Install(ctx); err != nil {
+		log.Fatal().Err(err).Msg("install data")
+	}
+
+	application.Start()
+}
diff --git a/services/llm-api/cmd/server/wire.go b/services/llm-api/cmd/server/wire.go
new file mode 100644
index 00000000..ad45e486
--- /dev/null
+++ b/services/llm-api/cmd/server/wire.go
@@ -0,0 +1,32 @@
+//go:build wireinject
+
+package main
+
+import (
+	"jan-server/services/llm-api/internal/domain"
+	"jan-server/services/llm-api/internal/infrastructure"
+	"jan-server/services/llm-api/internal/interfaces"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes"
+
+	"github.com/google/wire"
+)
+
+func CreateApplication() (*Application, error) {
+	wire.Build(
+		domain.ServiceProvider,
+		infrastructure.InfrastructureProvider,
+		routes.RouteProvider,
+		interfaces.InterfacesProvider,
+		wire.Struct(new(Application), "*"),
+	)
+	return nil, nil
+}
+
+func CreateDataInitializer() (*DataInitializer, error) {
+	wire.Build(
+		domain.ServiceProvider,
+		infrastructure.InfrastructureProvider,
+		wire.Struct(new(DataInitializer), "*"),
+	)
+	return nil, nil
+}
diff --git a/services/llm-api/cmd/server/wire_gen.go b/services/llm-api/cmd/server/wire_gen.go
new file mode 100644
index 00000000..978f222e
--- /dev/null
+++ b/services/llm-api/cmd/server/wire_gen.go
@@ -0,0 +1,156 @@
+// Code generated by Wire. DO NOT EDIT.
+
+//go:generate go run -mod=mod github.com/google/wire/cmd/wire
+//go:build !wireinject
+// +build !wireinject
+
+package main
+
+import (
+	"jan-server/services/llm-api/internal/domain"
+	"jan-server/services/llm-api/internal/domain/apikey"
+	"jan-server/services/llm-api/internal/domain/conversation"
+	"jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/domain/project"
+	"jan-server/services/llm-api/internal/domain/prompt"
+	"jan-server/services/llm-api/internal/domain/user"
+	"jan-server/services/llm-api/internal/domain/usersettings"
+	"jan-server/services/llm-api/internal/infrastructure"
+	"jan-server/services/llm-api/internal/infrastructure/crontab"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/apikeyrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/conversationrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/modelrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/projectrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/userrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/usersettingsrepo"
+	"jan-server/services/llm-api/internal/infrastructure/inference"
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+	"jan-server/services/llm-api/internal/interfaces/httpserver"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/apikeyhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/chathandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/conversationhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/guesthandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/projecthandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/usersettingshandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/auth"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/admin"
+	model3 "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/model"
+	provider2 "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/provider"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/chat"
+	conversation2 "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/conversation"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/llm/projects"
+	model2 "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/model"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/model/provider"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/users"
+)
+
+import (
+	_ "net/http/pprof"
+)
+
+// Injectors from wire.go:
+
+func CreateApplication() (*Application, error) {
+	config, err := infrastructure.ProvideConfig()
+	if err != nil {
+		return nil, err
+	}
+	zerologLogger := logger.GetLogger()
+	db, err := infrastructure.ProvideDatabase(config, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	database := infrastructure.ProvideTransactionDatabase(db)
+	providerRepository := modelrepo.NewProviderGormRepository(database)
+	providerModelRepository := modelrepo.NewProviderModelGormRepository(database)
+	modelCatalogRepository := modelrepo.NewModelCatalogGormRepository(database)
+	providerModelService := model.NewProviderModelService(providerModelRepository, modelCatalogRepository)
+	modelCatalogService := model.NewModelCatalogService(modelCatalogRepository)
+	providerService := model.NewProviderService(providerRepository, providerModelService, modelCatalogService)
+	modelHandler := modelhandler.NewModelHandler(providerService, providerModelService)
+	modelCatalogHandler := modelhandler.NewModelCatalogHandler(modelCatalogService, providerModelService)
+	modelProviderRoute := provider.NewModelProviderRoute(modelHandler)
+	repository := userrepo.NewUserGormRepository(db)
+	service := user.NewService(repository)
+	authHandler := authhandler.NewAuthHandler(service, zerologLogger)
+	modelRoute := model2.NewModelRoute(modelHandler, modelCatalogHandler, modelProviderRoute, authHandler)
+	inferenceProvider := inference.NewInferenceProvider()
+	providerHandler := modelhandler.NewProviderHandler(providerService, providerModelService, inferenceProvider)
+	conversationRepository := conversationrepo.NewConversationGormRepository(database)
+	conversationService := conversation.NewConversationService(conversationRepository)
+	projectRepository := projectrepo.NewProjectGormRepository(db)
+	projectService := project.NewProjectService(projectRepository)
+	conversationHandler := conversationhandler.NewConversationHandler(conversationService, projectService)
+	client := infrastructure.ProvideKeycloakClient(config, zerologLogger)
+	resolver := infrastructure.ProvideMediaResolver(config, zerologLogger, client)
+	processorConfig := domain.ProvidePromptProcessorConfig(config, zerologLogger)
+	processorImpl := prompt.NewProcessor(processorConfig, zerologLogger)
+	memoryClient := infrastructure.ProvideMemoryClient(config, zerologLogger)
+	usersettingsRepository := usersettingsrepo.NewUserSettingsGormRepository(db)
+	usersettingsService := usersettings.NewService(usersettingsRepository)
+	memoryHandler := handlers.ProvideMemoryHandler(memoryClient, config, usersettingsService)
+	chatHandler := chathandler.NewChatHandler(inferenceProvider, providerHandler, conversationHandler, conversationService, projectService, resolver, processorImpl, memoryHandler, usersettingsService)
+	chatCompletionRoute := chat.NewChatCompletionRoute(chatHandler, authHandler)
+	chatRoute := chat.NewChatRoute(chatCompletionRoute)
+	conversationRoute := conversation2.NewConversationRoute(conversationHandler, authHandler)
+	projectHandler := projecthandler.NewProjectHandler(projectService)
+	projectRoute := projects.NewProjectRoute(projectHandler, authHandler)
+	providerModelHandler := modelhandler.NewProviderModelHandler(providerModelService, providerService, modelCatalogService)
+	adminModelRoute := model3.NewAdminModelRoute(modelHandler, modelCatalogHandler, providerModelHandler)
+	adminProviderRoute := provider2.NewAdminProviderRoute(providerHandler)
+	adminRoute := admin.NewAdminRoute(adminModelRoute, adminProviderRoute)
+	userSettingsHandler := usersettingshandler.NewUserSettingsHandler(usersettingsService, zerologLogger)
+	usersRoute := users.NewUsersRoute(userSettingsHandler, authHandler)
+	v1Route := v1.NewV1Route(modelRoute, chatRoute, conversationRoute, projectRoute, adminRoute, usersRoute)
+	guestHandler := guestauth.NewGuestHandler(client, zerologLogger)
+	upgradeHandler := guestauth.NewUpgradeHandler(client, zerologLogger)
+	tokenHandler := authhandler.NewTokenHandler(client, zerologLogger)
+	apikeyRepository := apikeyrepo.NewAPIKeyRepository(db)
+	apikeyConfig := domain.ProvideAPIKeyConfig(config)
+	apikeyService := apikey.NewService(apikeyRepository, repository, client, apikeyConfig, zerologLogger)
+	handler := apikeyhandler.NewHandler(apikeyService, zerologLogger)
+	keycloakOAuthHandler := authhandler.ProvideKeycloakOAuthHandler(config)
+	authRoute := auth.NewAuthRoute(guestHandler, upgradeHandler, tokenHandler, handler, authHandler, keycloakOAuthHandler)
+	keycloakValidator, err := infrastructure.ProvideKeycloakValidator(config, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	infrastructureInfrastructure := infrastructure.NewInfrastructure(db, keycloakValidator, zerologLogger)
+	httpServer := httpserver.NewHttpServer(v1Route, authRoute, infrastructureInfrastructure, config)
+	crontabCrontab := crontab.NewCrontab(providerService, inferenceProvider)
+	application := &Application{
+		httpServer: httpServer,
+		crontab:    crontabCrontab,
+	}
+	return application, nil
+}
+
+func CreateDataInitializer() (*DataInitializer, error) {
+	config, err := infrastructure.ProvideConfig()
+	if err != nil {
+		return nil, err
+	}
+	zerologLogger := logger.GetLogger()
+	db, err := infrastructure.ProvideDatabase(config, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	database := infrastructure.ProvideTransactionDatabase(db)
+	providerRepository := modelrepo.NewProviderGormRepository(database)
+	providerModelRepository := modelrepo.NewProviderModelGormRepository(database)
+	modelCatalogRepository := modelrepo.NewModelCatalogGormRepository(database)
+	providerModelService := model.NewProviderModelService(providerModelRepository, modelCatalogRepository)
+	modelCatalogService := model.NewModelCatalogService(modelCatalogRepository)
+	providerService := model.NewProviderService(providerRepository, providerModelService, modelCatalogService)
+	inferenceProvider := inference.NewInferenceProvider()
+	dataInitializer := &DataInitializer{
+		provider:            providerService,
+		modelCatalogService: modelCatalogService,
+		inferenceProvider:   inferenceProvider,
+	}
+	return dataInitializer, nil
+}
diff --git a/services/llm-api/config/providers.yml b/services/llm-api/config/providers.yml
new file mode 100644
index 00000000..412eb130
--- /dev/null
+++ b/services/llm-api/config/providers.yml
@@ -0,0 +1,60 @@
+providers:
+  default:
+    - name: Local vLLM Provider
+      type: vllm
+      url: ${VLLM_PROVIDER_URL}
+      api_key: ${VLLM_INTERNAL_KEY}
+      description: Default access to vLLM Provider
+      auto_enable_new_models: true
+      sync_models: true
+      metadata:
+        environment: local-gpu
+        tool_support: ${VLLM_TOOL_SUPPORT:-false}
+        # Provider capabilities - Override defaults with JSON structure
+        image_input: '{"supported":true,"url":true,"base64":true,"schema":"..."}'
+        file_attachment: '{"supported":false,"url":false,"base64":false,"file_upload":false}'
+        
+    # - name: Local Jan vLLM Provider
+    #   type: jan
+    #   url: https://inference.jan.ai/v1
+    #   api_key: ${VLLM_INTERNAL_KEY}
+    #   description: "Local Jan vLLM instance (Docker profile: full)"
+    #   auto_enable_new_models: true
+    #   sync_models: true
+    #   metadata:
+    #     environment: internal
+    #     image_input: '{"supported":true,"url":true,"base64":true,"schema":"messages[].content[].type=image_url; supports https:// or data:image/...;base64,..."}'
+    #     file_attachment: '{"supported":true,"url":true,"base64":true,"file_upload":true,"schema":"messages[].content[].type=input_file; supports url, inline base64, or file_id"}'
+      
+    # - name: External Gemini
+    #   type: gemini
+    #   url: https://generativelanguage.googleapis.com/v1beta/openai
+    #   api_key: ${GEMINI_API_KEY}
+    #   description: Shared Gemini workspace
+    #   auto_enable_new_models: true
+    #   sync_models: true
+    #   Note: Gemini capabilities loaded from providers_metadata_default.yml
+    #   Override if needed:
+    #   metadata:
+    #     image_input: '{"supported":true,"url":false,"base64":true,"schema":"Gemini inline_data format"}'
+      
+  production:
+    # - name: External OpenAI
+    #   type: openai
+    #   url: https://api.openai.com/v1
+    #   api_key: ${OPENAI_API_KEY}
+    #   description: Shared OpenAI workspace
+    #   auto_enable_new_models: false
+    #   metadata:
+    #     # OpenAI capabilities loaded from providers_metadata_default.yml by default
+    #     # Override example - disable URL support:
+    #     image_input: '{"supported":true,"url":false,"base64":true,"schema":"Base64 only"}'
+    #     file_attachment: '{"supported":true,"url":false,"base64":false,"file_upload":true}'
+    
+    # - name: External Gemini
+    #   type: gemini
+    #   url: https://generativelanguage.googleapis.com/v1beta/openai
+    #   api_key: ${GEMINI_API_KEY}
+    #   description: Shared Gemini workspace
+    #   auto_enable_new_models: true
+    #   sync_models: true
diff --git a/services/llm-api/config/providers_metadata_default.yml b/services/llm-api/config/providers_metadata_default.yml
new file mode 100644
index 00000000..33cba1f2
--- /dev/null
+++ b/services/llm-api/config/providers_metadata_default.yml
@@ -0,0 +1,257 @@
+{
+  "vllm": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "Python: prompt + multi_modal_data.image (PIL/URL/base64). OpenAI-compatible: messages[].content[].type='image_url'."
+    },
+    "file_attachment": {
+      "supported": false,
+      "url": false,
+      "base64": false,
+      "file_upload": false,
+      "schema": "No native file attachments. Preprocess to text or images; or use tool-calling to fetch/convert."
+    }
+  },
+  "openai": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "messages[].content[].type='image_url'; image_url.url=https:// or data:image/...;base64,..."
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": false,
+      "base64": false,
+      "file_upload": true,
+      "schema": "messages[].content[].type='input_file'; file_id from Files API upload"
+    }
+  },
+  "azure_openai": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "identical to OpenAI vision; supports https:// and data:image/...;base64,..."
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": false,
+      "base64": false,
+      "file_upload": true,
+      "schema": "Files uploaded to Azure resource → reference with file_id"
+    }
+  },
+  "google": {
+    "image_input": {
+      "supported": true,
+      "url": false,
+      "base64": true,
+      "schema": "messages[].parts[].inline_data={mime_type,data} or file_data={file_uri,mime_type}"
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": false,
+      "base64": true,
+      "file_upload": true,
+      "schema": "Gemini: inline_data for small files; file_data.file_uri for uploaded files"
+    }
+  },
+  "anthropic": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "messages[].content[].type='image'; image_url=https:// or data:image/...;base64,... or file_id"
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": true,
+      "schema": "messages[].content[].type='input_file'; supports url, inline base64, or file_id"
+    }
+  },
+  "mistral": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "messages[].content[].type='image_url'; supports https:// or data:image/...;base64,..."
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": true,
+      "schema": "OpenAI-compatible input_file with url, base64, or file_id"
+    }
+  },
+  "groq": {
+    "image_input": {
+      "supported": false,
+      "url": false,
+      "base64": false,
+      "schema": ""
+    },
+    "file_attachment": {
+      "supported": false,
+      "url": false,
+      "base64": false,
+      "file_upload": false,
+      "schema": ""
+    }
+  },
+  "cohere": {
+    "image_input": {
+      "supported": true,
+      "url": false,
+      "base64": true,
+      "schema": "embed/image endpoints use inline base64 (data:image/...;base64,...)"
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": false,
+      "base64": true,
+      "file_upload": false,
+      "schema": "inline base64 content; no separate Files API"
+    }
+  },
+  "ollama": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "local path or data URI; multimodal models like llava accept both"
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": false,
+      "schema": "file={path or base64}; handled locally by Ollama server"
+    }
+  },
+  "replicate": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "inputs[].url=https:// or data:image/...;base64,..."
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": true,
+      "schema": "inputs[].url=https:// or data URI (base64 <1MB) or uploaded file link"
+    }
+  },
+  "openrouter": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "messages[].content[].type='image_url'; image_url.url=https:// or data:image/...;base64,..."
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": false,
+      "base64": true,
+      "file_upload": false,
+      "schema": "messages[].content[].type='file'; file={filename, file_data:data:<mime>;base64,<...>}; optional annotations reuse"
+    }
+  },
+  "togetherai": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "image_url.url=https:// or data:image/...;base64,...; OpenAI-compatible"
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": true,
+      "schema": "input_file type; supports url, base64, or file_id"
+    }
+  },
+  "perplexity": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "type='image_url'; image_url.url=https:// or data:image/...;base64,... (≤50MB)"
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": false,
+      "schema": "type='input_file'; supports public https:// URLs or data:<mime>;base64,..."
+    }
+  },
+  "vercel_ai": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "{type:'file', mediaType:'image/...', url or base64}; AI SDK forwards to provider"
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": true,
+      "schema": "same as image but for any file; SDK handles upload or inline data"
+    }
+  },
+  "deepinfra": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "OpenAI-style image_url.url=https:// or data:image/...;base64,..."
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": false,
+      "schema": "type='input_file'; supports https:// or base64; no file_id API"
+    }
+  },
+  "huggingface": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "Inference API: accepts image URL or base64 string; multipart upload supported"
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": true,
+      "schema": "multipart/form-data or URL/base64; client auto-detects type"
+    }
+  },
+  "aws_bedrock": {
+    "image_input": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "schema": "ImageSource={s3Uri:..., bytes:base64,...}"
+    },
+    "file_attachment": {
+      "supported": true,
+      "url": true,
+      "base64": true,
+      "file_upload": true,
+      "schema": "files attached via S3 URI or inline base64 bytes"
+    }
+  }
+}
diff --git a/apps/jan-api-gateway/application/docs/docs.go b/services/llm-api/docs/swagger/docs.go
similarity index 53%
rename from apps/jan-api-gateway/application/docs/docs.go
rename to services/llm-api/docs/swagger/docs.go
index 39eb2e89..ed100c70 100644
--- a/apps/jan-api-gateway/application/docs/docs.go
+++ b/services/llm-api/docs/swagger/docs.go
@@ -1,5 +1,6 @@
-// Package docs Code generated by swaggo/swag. DO NOT EDIT
-package docs
+// Code generated by swaggo/swag. DO NOT EDIT.
+
+package swagger
 
 import "github.com/swaggo/swag"
 
@@ -9,15 +10,61 @@ const docTemplate = `{
     "info": {
         "description": "{{escape .Description}}",
         "title": "{{.Title}}",
-        "contact": {},
+        "contact": {
+            "name": "Jan Server Team",
+            "url": "https://github.com/janhq/jan-server"
+        },
         "version": "{{.Version}}"
     },
     "host": "{{.Host}}",
     "basePath": "{{.BasePath}}",
     "paths": {
-        "/v1/auth/google/callback": {
+        "/auth/api-keys": {
+            "get": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Returns all API keys created by the authenticated user. Key values are not returned, only metadata.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Authentication API"
+                ],
+                "summary": "List user's API keys",
+                "responses": {
+                    "200": {
+                        "description": "List of API keys with metadata",
+                        "schema": {
+                            "type": "object"
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized - invalid or expired token",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    }
+                }
+            },
             "post": {
-                "description": "Handles the callback from the Google OAuth2 provider to exchange the authorization code for a token, verify the user, and issue access and refresh tokens.",
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Creates a new API key for the authenticated user. API keys provide programmatic access without requiring user credentials.",
                 "consumes": [
                     "application/json"
                 ],
@@ -27,104 +74,172 @@ const docTemplate = `{
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Google OAuth2 Callback",
+                "summary": "Create API key",
                 "parameters": [
                     {
-                        "description": "Request body containing the authorization code and state",
+                        "description": "API key creation request with name and optional scopes",
                         "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest"
+                            "type": "object"
                         }
                     }
                 ],
                 "responses": {
-                    "200": {
-                        "description": "Successfully authenticated and returned tokens",
+                    "201": {
+                        "description": "API key created successfully with key value",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.AccessTokenResponse"
+                            "type": "object"
                         }
                     },
                     "400": {
-                        "description": "Bad request (e.g., invalid state, missing code, or invalid claims)",
+                        "description": "Invalid request - missing required fields",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized (e.g., a user claim is not found or is invalid in the context)",
+                        "description": "Unauthorized - invalid or expired token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/google/login": {
-            "get": {
-                "description": "Redirects the user to the Google OAuth2 authorization page to initiate the login process.",
+        "/auth/api-keys/{id}": {
+            "delete": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Revokes and deletes an API key by ID. Deleted keys can no longer be used for authentication.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Google OAuth2 Login",
+                "summary": "Delete API key",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "API key ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
                 "responses": {
-                    "200": {
-                        "description": "redirect url",
+                    "204": {
+                        "description": "API key deleted successfully"
+                    },
+                    "401": {
+                        "description": "Unauthorized - invalid or expired token",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "API key not found",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.GoogleLoginUrl"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/guest-login": {
-            "post": {
-                "description": "JWT-base Guest Login.",
+        "/auth/callback": {
+            "get": {
+                "description": "Handles the OAuth2 callback from Keycloak, exchanges authorization code for JWT tokens",
+                "consumes": [
+                    "application/json"
+                ],
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Guest Login",
+                "summary": "Handle Keycloak OAuth2 callback",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Authorization code from Keycloak",
+                        "name": "code",
+                        "in": "query",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "State parameter for CSRF protection",
+                        "name": "state",
+                        "in": "query",
+                        "required": true
+                    }
+                ],
                 "responses": {
                     "200": {
-                        "description": "Successfully refreshed the access token",
+                        "description": "JWT tokens",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse"
+                            "type": "object",
+                            "properties": {
+                                "access_token": {
+                                    "type": "string"
+                                },
+                                "expires_in": {
+                                    "type": "integer"
+                                },
+                                "refresh_token": {
+                                    "type": "string"
+                                },
+                                "token_type": {
+                                    "type": "string"
+                                }
+                            }
                         }
                     },
                     "400": {
-                        "description": "Bad Request (e.g., invalid refresh token)",
+                        "description": "Missing code or state",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized (e.g., expired or missing refresh token)",
+                        "description": "Invalid state parameter",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Failed to exchange code for tokens",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/logout": {
-            "get": {
-                "description": "Use a valid refresh token to obtain a new access token. The refresh token is typically sent in a cookie.",
+        "/auth/guest-login": {
+            "post": {
+                "description": "Creates a temporary guest user account and returns JWT tokens. Guest users have limited access and can be upgraded to full accounts later.",
                 "consumes": [
                     "application/json"
                 ],
@@ -134,60 +249,76 @@ const docTemplate = `{
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Refresh an access token",
+                "summary": "Create guest user account",
                 "responses": {
                     "200": {
-                        "description": "Successfully logout"
-                    },
-                    "400": {
-                        "description": "Bad Request (e.g., invalid refresh token)",
+                        "description": "Guest user created with access and refresh tokens",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized (e.g., expired or missing refresh token)",
+                    "500": {
+                        "description": "Internal server error - failed to create guest user",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/me": {
+        "/auth/login": {
             "get": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
+                "description": "Returns the Keycloak authorization URL for frontend to redirect users. Supports OAuth2 authorization code flow with PKCE.",
+                "consumes": [
+                    "application/json"
                 ],
-                "description": "Retrieves the profile of the authenticated user based on the provided JWT.",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Get user profile",
+                "summary": "Initiate Keycloak OAuth2 login",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "URL to redirect after successful login",
+                        "name": "redirect_url",
+                        "in": "query"
+                    }
+                ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved user profile",
+                        "description": "Authorization URL and state parameter",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.GetMeResponse"
+                            "type": "object",
+                            "properties": {
+                                "authorization_url": {
+                                    "type": "string"
+                                },
+                                "state": {
+                                    "type": "string"
+                                }
+                            }
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized (e.g., missing or invalid JWT)",
+                    "500": {
+                        "description": "Failed to initiate login",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/refresh-token": {
+        "/auth/logout": {
             "get": {
-                "description": "Use a valid refresh token to obtain a new access token. The refresh token is typically sent in a cookie.",
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Revokes the current access token and clears authentication cookies. After logout, the user must re-authenticate.",
                 "consumes": [
                     "application/json"
                 ],
@@ -197,196 +328,178 @@ const docTemplate = `{
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Refresh an access token",
+                "summary": "Logout user",
                 "responses": {
                     "200": {
-                        "description": "Successfully refreshed the access token",
+                        "description": "Successfully logged out",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse"
+                            "type": "object"
                         }
                     },
-                    "400": {
-                        "description": "Bad Request (e.g., invalid refresh token)",
+                    "401": {
+                        "description": "Unauthorized - invalid token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized (e.g., expired or missing refresh token)",
+                    "500": {
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/chat/completions": {
-            "post": {
+        "/auth/me": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Generates a model response for the given chat conversation. This is a standard chat completion API that supports both streaming and non-streaming modes without conversation persistence.\n\n**Streaming Mode (stream=true):**\n- Returns Server-Sent Events (SSE) with real-time streaming\n- Streams completion chunks directly from the inference model\n- Final event contains \"[DONE]\" marker\n\n**Non-Streaming Mode (stream=false or omitted):**\n- Returns single JSON response with complete completion\n- Standard OpenAI ChatCompletionResponse format\n\n**Features:**\n- Supports all OpenAI ChatCompletionRequest parameters\n- User authentication required\n- Direct inference model integration\n- No conversation persistence (stateless)",
+                "description": "Returns the authenticated user's profile information including user ID, email, roles, and guest status.",
                 "consumes": [
                     "application/json"
                 ],
                 "produces": [
-                    "application/json",
-                    "text/event-stream"
+                    "application/json"
                 ],
                 "tags": [
-                    "Chat Completions API"
-                ],
-                "summary": "Create a chat completion",
-                "parameters": [
-                    {
-                        "description": "Chat completion request with streaming options",
-                        "name": "request",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/openai.ChatCompletionRequest"
-                        }
-                    }
+                    "Authentication API"
                 ],
+                "summary": "Get current user information",
                 "responses": {
                     "200": {
-                        "description": "Successful streaming response (when stream=true) - SSE format with data: {json} events",
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request payload, empty messages, or inference failure",
+                        "description": "User profile information",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - missing or invalid authentication",
+                        "description": "Unauthorized - invalid or expired token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conv/chat/completions": {
+        "/auth/refresh-token": {
             "post": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Generates a model response for the given chat conversation with conversation persistence and management. This is the conversation-aware version of the chat completion API that supports both streaming and non-streaming modes with conversation management and storage options.\n\n**Streaming Mode (stream=true):**\n- Returns Server-Sent Events (SSE) with real-time streaming\n- First event contains conversation metadata\n- Subsequent events contain completion chunks\n- Final event contains \"[DONE]\" marker\n\n**Non-Streaming Mode (stream=false or omitted):**\n- Returns single JSON response with complete completion\n- Includes conversation metadata in response\n\n**Storage Options:**\n- ` + "`" + `store=true` + "`" + `: Saves user message and assistant response to conversation\n- ` + "`" + `store_reasoning=true` + "`" + `: Includes reasoning content in stored messages\n- ` + "`" + `conversation` + "`" + `: ID of existing conversation or empty for new conversation\n\n**Features:**\n- Conversation persistence and history management\n- Extended request format with conversation and storage options\n- User authentication required\n- Automatic conversation creation and management",
+                "description": "Exchanges a valid refresh token for a new access token. Refresh token must be provided in Authorization header or refresh_token cookie.",
                 "consumes": [
                     "application/json"
                 ],
                 "produces": [
-                    "application/json",
-                    "text/event-stream"
+                    "application/json"
                 ],
                 "tags": [
-                    "Conversation-aware Chat API"
+                    "Authentication API"
                 ],
-                "summary": "Create a conversation-aware chat completion",
+                "summary": "Refresh access token",
                 "parameters": [
                     {
-                        "description": "Extended chat completion request with streaming, storage, and conversation options",
-                        "name": "request",
+                        "description": "Refresh token (can also be in Authorization header)",
+                        "name": "refresh_token",
                         "in": "body",
-                        "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conv.ExtendedChatCompletionRequest"
+                            "type": "string"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successful streaming response (when stream=true) - SSE format with data: {json} events",
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request payload or conversation not found",
+                        "description": "New access token and refresh token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - missing or invalid authentication",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Conversation not found or user not found",
+                        "description": "Unauthorized - invalid or expired refresh token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conv/mcp": {
+        "/auth/revoke": {
             "post": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Handles Model Context Protocol (MCP) requests over an HTTP stream for conversation-aware chat functionality. The response is sent as a continuous stream of data with conversation context.",
+                "description": "Revokes a refresh token to invalidate it",
                 "consumes": [
                     "application/json"
                 ],
                 "produces": [
-                    "text/event-stream"
+                    "application/json"
                 ],
                 "tags": [
-                    "Conversation-aware Chat API"
+                    "Authentication API"
                 ],
-                "summary": "MCP streamable endpoint for conversation-aware chat",
+                "summary": "Revoke Keycloak refresh token",
                 "parameters": [
                     {
-                        "description": "MCP request payload",
+                        "description": "Token to revoke",
                         "name": "request",
                         "in": "body",
                         "required": true,
-                        "schema": {}
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "refresh_token": {
+                                    "type": "string"
+                                }
+                            }
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Streamed response (SSE or chunked transfer)",
+                        "description": "Token revoked successfully",
                         "schema": {
-                            "type": "string"
+                            "type": "object",
+                            "properties": {
+                                "message": {
+                                    "type": "string"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request body",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Keycloak OAuth is not configured",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conv/models": {
-            "get": {
+        "/auth/upgrade": {
+            "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a list of available models that can be used for conversation-aware chat completions. This endpoint provides the same model list as the standard /v1/models endpoint but is specifically designed for conversation-aware chat functionality.",
+                "description": "Converts a guest user account to a permanent account with email/password credentials. Guest flag is removed and user gains full access.",
                 "consumes": [
                     "application/json"
                 ],
@@ -394,92 +507,103 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Conversation-aware Chat API"
+                    "Authentication API"
+                ],
+                "summary": "Upgrade guest to permanent account",
+                "parameters": [
+                    {
+                        "description": "Upgrade request with email and password",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "type": "object"
+                        }
+                    }
                 ],
-                "summary": "List available models for conversation-aware chat",
                 "responses": {
                     "200": {
-                        "description": "Successful response",
+                        "description": "Account upgraded successfully with new tokens",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conv.ModelsResponse"
+                            "type": "object"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request - missing email or password",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - missing or invalid authentication",
+                        "description": "Unauthorized - not a guest user or invalid token",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conversations": {
-            "get": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
+        "/auth/validate": {
+            "post": {
+                "description": "Validates an access token against Keycloak's userinfo endpoint",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
                 ],
-                "description": "Retrieves a paginated list of conversations for the authenticated user with OpenAI-compatible response format.",
                 "tags": [
-                    "Conversations API"
+                    "Authentication API"
                 ],
-                "summary": "List Conversations",
+                "summary": "Validate Keycloak access token",
                 "parameters": [
-                    {
-                        "type": "integer",
-                        "default": 20,
-                        "description": "The maximum number of items to return",
-                        "name": "limit",
-                        "in": "query"
-                    },
-                    {
-                        "type": "string",
-                        "description": "A cursor for use in pagination. The ID of the last object from the previous page",
-                        "name": "after",
-                        "in": "query"
-                    },
                     {
                         "type": "string",
-                        "description": "Order of items (asc/desc)",
-                        "name": "order",
-                        "in": "query"
+                        "description": "Bearer token",
+                        "name": "Authorization",
+                        "in": "header",
+                        "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved the list of conversations",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ExtendedConversationResponse"
-                        }
-                    },
-                    "400": {
-                        "description": "Bad Request - Invalid pagination parameters",
+                        "description": "Token is valid with user information",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object",
+                            "properties": {
+                                "user_info": {
+                                    "type": "object"
+                                },
+                                "valid": {
+                                    "type": "boolean"
+                                }
+                            }
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Invalid or expired token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Keycloak OAuth is not configured",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
+            }
+        },
+        "/auth/validate-api-key": {
             "post": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Creates a new conversation for the authenticated user with optional items",
+                "description": "Internal endpoint used by Kong API Gateway to validate API keys. Not intended for direct client use.",
                 "consumes": [
                     "application/json"
                 ],
@@ -487,157 +611,215 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Authentication API"
                 ],
-                "summary": "Create a conversation",
+                "summary": "Validate API key (Kong Plugin)",
                 "parameters": [
                     {
-                        "description": "Create conversation request",
+                        "description": "API key validation request",
                         "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.CreateConversationRequest"
+                            "type": "object"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Created conversation",
+                        "description": "API key is valid with user information",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request - Bad payload, too many items, or invalid item format",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized",
+                        "description": "Invalid API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conversations/{conversation_id}": {
+        "/v1/admin/models/catalogs": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a conversation by its ID with full metadata and title",
+                "description": "Retrieves a paginated list of model catalogs with optional filtering and searching",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Get a conversation",
+                "summary": "List all model catalogs",
                 "parameters": [
+                    {
+                        "type": "integer",
+                        "description": "Number of records to return (default: 20, max: 100)",
+                        "name": "limit",
+                        "in": "query"
+                    },
+                    {
+                        "type": "integer",
+                        "description": "Number of records to skip for pagination",
+                        "name": "offset",
+                        "in": "query"
+                    },
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
-                        "in": "path",
-                        "required": true
+                        "description": "Sort order: asc or desc (default: desc)",
+                        "name": "order",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Filter by status: init, filled, updated",
+                        "name": "status",
+                        "in": "query"
+                    },
+                    {
+                        "type": "boolean",
+                        "description": "Filter by moderation status",
+                        "name": "is_moderated",
+                        "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Conversation details",
+                        "description": "List of model catalogs",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse"
+                            "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized",
+                    "400": {
+                        "description": "Invalid query parameters",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/admin/models/catalogs/bulk-toggle": {
+            "post": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Enable or disable provider models for specific catalogs or ALL catalogs, with optional exception list. Supports \"enable/disable all except\" patterns globally or scoped to catalogs.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Admin Model API"
+                ],
+                "summary": "Bulk enable/disable provider models by catalog IDs or all catalogs",
+                "parameters": [
+                    {
+                        "description": "Bulk toggle request. If catalog_ids is empty, applies to ALL catalogs. Use except_models to exclude specific models.",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requestmodels.BulkToggleCatalogsRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Bulk operation result with counts and status",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/modelresponses.BulkOperationResponse"
                         }
                     },
-                    "403": {
-                        "description": "Access denied",
+                    "400": {
+                        "description": "Invalid request - exceeds limits or validation error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation not found",
+                        "description": "One or more catalog IDs not found (when catalog_ids provided)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal server error during bulk operation",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
-            "delete": {
+            }
+        },
+        "/v1/admin/models/catalogs/{model_public_id}": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Deletes a conversation and all its items permanently",
+                "description": "Retrieves detailed information about a model catalog entry by its public ID (supports IDs with slashes)",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Delete a conversation",
+                "summary": "Get a model catalog entry",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
+                        "description": "Model Catalog Public ID (can contain slashes)",
+                        "name": "model_public_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Deleted conversation",
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.DeletedConversationResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
+                        "description": "Model catalog details",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
                         }
                     },
-                    "403": {
-                        "description": "Access denied",
+                    "400": {
+                        "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation not found",
+                        "description": "Model catalog not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -648,7 +830,7 @@ const docTemplate = `{
                         "BearerAuth": []
                     }
                 ],
-                "description": "Updates conversation title and/or metadata",
+                "description": "Updates metadata for a model catalog entry. Marks it as manually updated to prevent auto-sync overwrites.",
                 "consumes": [
                     "application/json"
                 ],
@@ -656,155 +838,144 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Update a conversation",
+                "summary": "Update a model catalog entry",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
+                        "description": "Model Catalog Public ID (can contain slashes)",
+                        "name": "model_public_id",
                         "in": "path",
                         "required": true
                     },
                     {
-                        "description": "Update conversation request",
-                        "name": "request",
+                        "description": "Update payload",
+                        "name": "payload",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.UpdateConversationRequest"
+                            "$ref": "#/definitions/requestmodels.UpdateModelCatalogRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Updated conversation",
+                        "description": "Updated model catalog",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse"
+                            "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request payload or update failed",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
+                        "description": "Invalid request payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation not found",
+                        "description": "Model catalog not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conversations/{conversation_id}/items": {
+        "/v1/admin/models/provider-models": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Lists all items in a conversation with OpenAI-compatible pagination",
+                "description": "Retrieves a paginated list of provider models with optional filtering",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "List items in a conversation",
+                "summary": "List all provider models",
                 "parameters": [
                     {
-                        "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
-                        "in": "path",
-                        "required": true
+                        "type": "integer",
+                        "description": "Number of records to return (default: 20, max: 100)",
+                        "name": "limit",
+                        "in": "query"
                     },
                     {
                         "type": "integer",
-                        "description": "Number of items to return (1-100)",
-                        "name": "limit",
+                        "description": "Number of records to skip for pagination",
+                        "name": "offset",
                         "in": "query"
                     },
                     {
                         "type": "string",
-                        "description": "Cursor for pagination - ID of the last item from previous page",
-                        "name": "after",
+                        "description": "Sort order: asc or desc (default: desc)",
+                        "name": "order",
                         "in": "query"
                     },
                     {
                         "type": "string",
-                        "description": "Order of items (asc/desc)",
-                        "name": "order",
+                        "description": "Filter by provider public ID",
+                        "name": "provider_id",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Filter by model key",
+                        "name": "model_key",
+                        "in": "query"
+                    },
+                    {
+                        "type": "boolean",
+                        "description": "Filter by active status",
+                        "name": "active",
+                        "in": "query"
+                    },
+                    {
+                        "type": "boolean",
+                        "description": "Filter by image support",
+                        "name": "supports_images",
                         "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "List of items",
+                        "description": "List of provider models",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse"
+                            "$ref": "#/definitions/modelresponses.ProviderModelResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad Request - Invalid pagination parameters",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Conversation not found",
+                        "description": "Invalid query parameters",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
+            }
+        },
+        "/v1/admin/models/provider-models/bulk-toggle": {
             "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Adds multiple items to a conversation with OpenAI-compatible format",
+                "description": "Enables or disables provider models with flexible patterns: enable all, disable all, enable all except, or disable all except. Optionally filter by provider.",
                 "consumes": [
                     "application/json"
                 ],
@@ -812,246 +983,196 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Create items in a conversation",
+                "summary": "Bulk enable or disable provider models",
                 "parameters": [
                     {
-                        "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
-                        "in": "path",
-                        "required": true
-                    },
-                    {
-                        "description": "Create items request",
-                        "name": "request",
+                        "description": "Bulk toggle payload with enable flag, optional provider filter, and exception list",
+                        "name": "payload",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.CreateItemsRequest"
+                            "$ref": "#/definitions/requestmodels.BulkEnableModelsRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Created items",
+                        "description": "Bulk operation result with counts and status",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse"
+                            "$ref": "#/definitions/modelresponses.BulkOperationResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request payload or invalid item format",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Conversation not found",
+                        "description": "Invalid request payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conversations/{conversation_id}/items/{item_id}": {
+        "/v1/admin/models/provider-models/{provider_model_public_id}": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a specific item from a conversation with full content details",
+                "description": "Retrieves detailed information about a provider model by its public ID",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Get an item from a conversation",
+                "summary": "Get a provider model",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
-                        "in": "path",
-                        "required": true
-                    },
-                    {
-                        "type": "string",
-                        "description": "Item ID",
-                        "name": "item_id",
+                        "description": "Provider Model Public ID",
+                        "name": "provider_model_public_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Item details",
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
+                        "description": "Provider model details",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/modelresponses.ProviderModelResponse"
                         }
                     },
-                    "403": {
-                        "description": "Access denied",
+                    "400": {
+                        "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation or item not found",
+                        "description": "Provider model not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             },
-            "delete": {
+            "patch": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Deletes a specific item from a conversation and returns the deleted item details",
+                "description": "Updates configuration for a provider model including pricing, limits, and feature flags",
+                "consumes": [
+                    "application/json"
+                ],
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Delete an item from a conversation",
+                "summary": "Update a provider model",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
+                        "description": "Provider Model Public ID",
+                        "name": "provider_model_public_id",
                         "in": "path",
                         "required": true
                     },
                     {
-                        "type": "string",
-                        "description": "Item ID",
-                        "name": "item_id",
-                        "in": "path",
-                        "required": true
+                        "description": "Update payload",
+                        "name": "payload",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requestmodels.UpdateProviderModelRequest"
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Deleted item details",
+                        "description": "Updated provider model",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
+                            "$ref": "#/definitions/modelresponses.ProviderModelResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad Request - Deletion failed",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
+                        "description": "Invalid request payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation or item not found",
+                        "description": "Provider model not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/mcp": {
-            "post": {
+        "/v1/admin/providers": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Handles Model Context Protocol (MCP) requests over an HTTP stream. The response is sent as a continuous stream of data.",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Retrieves all providers with their model counts",
                 "produces": [
-                    "text/event-stream"
+                    "application/json"
                 ],
                 "tags": [
-                    "Chat Completions API"
-                ],
-                "summary": "MCP streamable endpoint",
-                "parameters": [
-                    {
-                        "description": "MCP request payload",
-                        "name": "request",
-                        "in": "body",
-                        "required": true,
-                        "schema": {}
-                    }
+                    "Admin Provider API"
                 ],
+                "summary": "Get all providers",
                 "responses": {
                     "200": {
-                        "description": "Streamed response (SSE or chunked transfer)",
+                        "description": "List of providers with model counts",
                         "schema": {
-                            "type": "string"
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/modelresponses.ProviderWithModelCountResponse"
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Failed to retrieve providers",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            }
-        },
-        "/v1/models": {
-            "get": {
+            },
+            "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a list of available models that can be used for chat completions or other tasks.",
+                "description": "Registers a new provider and synchronizes its available models.",
                 "consumes": [
                     "application/json"
                 ],
@@ -1059,250 +1180,234 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Chat Completions API"
+                    "Admin Provider API"
+                ],
+                "summary": "Register a provider",
+                "parameters": [
+                    {
+                        "description": "Provider registration payload",
+                        "name": "payload",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requestmodels.AddProviderRequest"
+                        }
+                    }
                 ],
-                "summary": "List available models",
                 "responses": {
                     "200": {
-                        "description": "Successful response",
+                        "description": "Registered provider with synced models",
+                        "schema": {
+                            "$ref": "#/definitions/modelresponses.ProviderWithModelsResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request payload",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Failed to register provider",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1.ModelsResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/admin_api_keys": {
-            "get": {
+        "/v1/admin/providers/{provider_public_id}": {
+            "patch": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a paginated list of all admin API keys for the authenticated organization.",
+                "description": "Updates an existing provider's configuration",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Admin Provider API"
                 ],
-                "summary": "List Admin API Keys",
+                "summary": "Update a provider",
                 "parameters": [
-                    {
-                        "type": "integer",
-                        "default": 20,
-                        "description": "The maximum number of items to return",
-                        "name": "limit",
-                        "in": "query"
-                    },
                     {
                         "type": "string",
-                        "description": "A cursor for use in pagination. The ID of the last object from the previous page",
-                        "name": "after",
-                        "in": "query"
+                        "description": "Provider public ID",
+                        "name": "provider_public_id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "description": "Provider update payload",
+                        "name": "payload",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requestmodels.UpdateProviderRequest"
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved the list of admin API keys",
+                        "description": "Updated provider",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse"
+                            "$ref": "#/definitions/modelresponses.ProviderResponse"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                    "400": {
+                        "description": "Invalid request payload",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "Provider not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Failed to update provider",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
+            }
+        },
+        "/v1/chat/completions": {
             "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new admin API key for an organization. Requires a valid admin API key in the Authorization header.",
+                "description": "Generates a model response for the given chat conversation. This is a standard chat completion API that supports both streaming and non-streaming modes without conversation persistence.\n\n**Streaming Mode (stream=true):**\n- Returns Server-Sent Events (SSE) with real-time streaming\n- Streams completion chunks directly from the inference model\n- Final event contains \"[DONE]\" marker\n\n**Non-Streaming Mode (stream=false or omitted):**\n- Returns single JSON response with complete completion\n- Standard OpenAI ChatCompletionResponse format\n\n**Storage Options:**\n- ` + "`" + `store=true` + "`" + `: Persist the latest input message and assistant response to the active conversation\n- ` + "`" + `store_reasoning=true` + "`" + `: Additionally persist reasoning content provided by the model\n- When ` + "`" + `store` + "`" + ` is omitted or false, the conversation remains read-only\n\n**Features:**\n- Supports all OpenAI ChatCompletionRequest parameters\n- Optional conversation context for conversation persistence\n- User authentication required\n- Direct inference model integration",
                 "consumes": [
                     "application/json"
                 ],
                 "produces": [
-                    "application/json"
+                    "application/json",
+                    "text/event-stream"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Chat Completions API"
                 ],
-                "summary": "Create Admin API Key",
+                "summary": "Create a chat completion",
                 "parameters": [
                     {
-                        "description": "API key creation request",
-                        "name": "body",
+                        "description": "Chat completion request with streaming options and optional conversation",
+                        "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest"
+                            "$ref": "#/definitions/chatrequests.ChatCompletionRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully created admin API key",
+                        "description": "Successful streaming response (when stream=true) - SSE format with data: {json} events",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
+                            "type": "string"
                         }
                     },
                     "400": {
-                        "description": "Bad request - invalid payload",
+                        "description": "Invalid request payload, empty messages, or inference failure",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/admin_api_keys/{id}": {
+        "/v1/conversations": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a specific admin API key by its ID.",
+                "description": "List conversations for the authenticated user with optional referrer filtering.",
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Get Admin API Key",
+                "summary": "List conversations",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "ID of the admin API key",
-                        "name": "id",
-                        "in": "path",
-                        "required": true
-                    }
-                ],
-                "responses": {
-                    "200": {
-                        "description": "Successfully retrieved the admin API key",
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
+                        "description": "Referrer filter",
+                        "name": "referrer",
+                        "in": "query"
                     },
-                    "404": {
-                        "description": "Not Found - API key with the given ID does not exist or does not belong to the organization",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    }
-                }
-            },
-            "delete": {
-                "security": [
                     {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Deletes an admin API key by its ID.",
-                "tags": [
-                    "Administration API"
-                ],
-                "summary": "Delete Admin API Key",
-                "parameters": [
+                        "type": "integer",
+                        "description": "Maximum number of conversations to return",
+                        "name": "limit",
+                        "in": "query"
+                    },
                     {
                         "type": "string",
-                        "description": "ID of the admin API key to delete",
-                        "name": "id",
-                        "in": "path",
-                        "required": true
-                    }
-                ],
-                "responses": {
-                    "200": {
-                        "description": "Successfully deleted the admin API key",
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
+                        "description": "Return conversations created after the given numeric ID",
+                        "name": "after",
+                        "in": "query"
                     },
-                    "404": {
-                        "description": "Not Found - API key with the given ID does not exist or does not belong to the organization",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    }
-                }
-            }
-        },
-        "/v1/organization/invites": {
-            "get": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Retrieves a paginated list of invites for the current organization.",
-                "tags": [
-                    "Administration API"
-                ],
-                "summary": "List Organization Invites",
-                "parameters": [
                     {
                         "type": "string",
-                        "description": "Cursor pointing to a record after which to fetch results",
-                        "name": "after",
+                        "description": "Sort order (asc or desc)",
+                        "name": "order",
                         "in": "query"
                     },
                     {
-                        "type": "integer",
-                        "description": "Maximum number of results to return",
-                        "name": "limit",
+                        "type": "string",
+                        "description": "Set to 'all' to list conversations across the workspace (requires elevated permissions)",
+                        "name": "scope",
                         "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved list of invites",
+                        "description": "Successfully retrieved conversations",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_organization_invites_InviteResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationListResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid or missing query parameter",
+                        "description": "Invalid request parameters",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1313,7 +1418,7 @@ const docTemplate = `{
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new invite for a user to join the organization.",
+                "description": "Create a new conversation to store and retrieve conversation state across Response API calls\n\n**Features:**\n- Create conversation with optional metadata (max 16 key-value pairs)\n- Add up to 20 initial items to the conversation\n- Returns conversation ID with ` + "`" + `conv_` + "`" + ` prefix\n- Supports OpenAI Conversations API format\n\n**Metadata Constraints:**\n- Maximum 16 key-value pairs\n- Keys: max 64 characters\n- Values: max 512 characters",
                 "consumes": [
                     "application/json"
                 ],
@@ -1321,143 +1426,169 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Create Invite",
+                "summary": "Create a conversation",
                 "parameters": [
                     {
-                        "description": "Invite request payload",
-                        "name": "invite",
+                        "description": "Create conversation request with optional items and metadata",
+                        "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.CreateInviteUserRequest"
+                            "$ref": "#/definitions/conversationrequests.CreateConversationRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully created invite",
+                        "description": "Successfully created conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request payload or user already exists",
+                        "description": "Invalid request - validation failed or too many items",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal server error - conversation creation failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/invites/verification": {
-            "post": {
+        "/v1/conversations/{conv_public_id}": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Verifies an invitation code, checks expiration, registers the user if necessary, and assigns project memberships.",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Retrieve a conversation by ID with ownership verification\n\n**Features:**\n- Retrieves conversation metadata including creation timestamp\n- Automatic ownership verification (user can only access their own conversations)\n- Returns OpenAI-compatible conversation object\n\n**Response Fields:**\n- ` + "`" + `id` + "`" + `: Conversation ID with ` + "`" + `conv_` + "`" + ` prefix\n- ` + "`" + `object` + "`" + `: Always \"conversation\"\n- ` + "`" + `created_at` + "`" + `: Unix timestamp\n- ` + "`" + `metadata` + "`" + `: User-defined key-value pairs",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Verify Invite",
+                "summary": "Get a conversation",
                 "parameters": [
                     {
-                        "description": "Verification request payload",
-                        "name": "verification",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.VerifyInviteUserRequest"
-                        }
+                        "type": "string",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
+                        "in": "path",
+                        "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully verified invite",
+                        "description": "Successfully retrieved conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid or expired invite code",
+                        "description": "Invalid conversation ID format",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "Conversation not found or access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            }
-        },
-        "/v1/organization/invites/{invite_id}": {
-            "get": {
+            },
+            "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a specific invite by its ID.",
+                "description": "Update a conversation's metadata while preserving existing items\n\n**Features:**\n- Update metadata key-value pairs\n- Replaces entire metadata object (not merged)\n- Items remain unchanged\n- Automatic ownership verification\n\n**Metadata Constraints:**\n- Maximum 16 key-value pairs\n- Keys: max 64 characters\n- Values: max 512 characters",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Retrieve Invite",
+                "summary": "Update a conversation",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Public ID of the invite",
-                        "name": "invite_id",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
                         "in": "path",
                         "required": true
+                    },
+                    {
+                        "description": "Update conversation request with new metadata",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/conversationrequests.UpdateConversationRequest"
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved invite",
+                        "description": "Successfully updated conversation",
+                        "schema": {
+                            "$ref": "#/definitions/conversationresponses.ConversationResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request - validation failed or invalid metadata",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Invite not found",
+                        "description": "Conversation not found or access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error - update failed",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1468,98 +1599,146 @@ const docTemplate = `{
                         "BearerAuth": []
                     }
                 ],
-                "description": "Deletes a specific invite by its ID. Only organization owners can delete invites.",
+                "description": "Delete a conversation (soft delete). Items in the conversation will not be deleted but will be inaccessible.\n\n**Features:**\n- Soft delete (conversation marked as deleted, not physically removed)\n- Items remain in database but become inaccessible\n- Automatic ownership verification\n- Returns deletion confirmation with conversation ID\n\n**Response:**\n- ` + "`" + `id` + "`" + `: Deleted conversation ID\n- ` + "`" + `object` + "`" + `: Always \"conversation.deleted\"\n- ` + "`" + `deleted` + "`" + `: Always true",
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Delete Invite",
+                "summary": "Delete a conversation",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Public ID of the invite",
-                        "name": "invite_id",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully deleted invite",
+                        "description": "Successfully deleted conversation",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.DeleteResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationDeletedResponse"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                    "400": {
+                        "description": "Invalid conversation ID format",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
-                    "403": {
-                        "description": "Forbidden - only owners can delete invites",
+                    "401": {
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Invite not found",
+                        "description": "Conversation not found or access denied",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error - deletion failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/projects": {
+        "/v1/conversations/{conv_public_id}/items": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a paginated list of all projects for the authenticated organization.",
+                "description": "List all items in a conversation with cursor-based pagination support\n\n**Features:**\n- Cursor-based pagination using item IDs\n- Configurable page size (1-100 items, default 20)\n- Sort order control (ascending or descending)\n- Optional include parameter for additional fields\n- Returns paginated list with navigation cursors\n\n**Pagination:**\n- Use ` + "`" + `after` + "`" + ` cursor from previous response for next page\n- ` + "`" + `has_more` + "`" + ` indicates if more items are available\n- ` + "`" + `first_id` + "`" + ` and ` + "`" + `last_id` + "`" + ` provide cursor references\n\n**Query Parameters:**\n- ` + "`" + `limit` + "`" + `: Number of items (1-100, default 20)\n- ` + "`" + `order` + "`" + `: Sort order (\"asc\" or \"desc\", default \"desc\")\n- ` + "`" + `after` + "`" + `: Item ID cursor for pagination\n- ` + "`" + `include` + "`" + `: Additional fields to include (optional)",
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "List Projects",
+                "summary": "List conversation items",
                 "parameters": [
                     {
+                        "type": "string",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Item ID cursor to list items after (pagination)",
+                        "name": "after",
+                        "in": "query"
+                    },
+                    {
+                        "maximum": 100,
+                        "minimum": 1,
                         "type": "integer",
                         "default": 20,
-                        "description": "The maximum number of items to return",
+                        "description": "Number of items to return (1-100)",
                         "name": "limit",
                         "in": "query"
                     },
                     {
+                        "enum": [
+                            "asc",
+                            "desc"
+                        ],
                         "type": "string",
-                        "description": "A cursor for use in pagination. The ID of the last object from the previous page",
-                        "name": "after",
+                        "default": "desc",
+                        "description": "Sort order: asc or desc",
+                        "name": "order",
                         "in": "query"
                     },
                     {
-                        "type": "string",
-                        "description": "Whether to include archived projects.",
-                        "name": "include_archived",
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "collectionFormat": "csv",
+                        "description": "Additional fields to include in response",
+                        "name": "include",
                         "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved the list of projects",
+                        "description": "Successfully retrieved items list",
+                        "schema": {
+                            "$ref": "#/definitions/conversationresponses.ItemListResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request - invalid parameters or conversation ID",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectListResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "Conversation not found or access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Internal server error - listing failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1570,7 +1749,7 @@ const docTemplate = `{
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new project for an organization.",
+                "description": "Add items to a conversation. You may add up to 20 items at a time.\n\n**Features:**\n- Bulk item creation (max 20 items per request)\n- Automatic item ID generation with ` + "`" + `msg_` + "`" + ` prefix\n- Items added to conversation's active branch (default: MAIN)\n- Returns list of created items with generated IDs\n\n**Item Types:**\n- ` + "`" + `message` + "`" + `: User or assistant messages\n- ` + "`" + `tool_call` + "`" + `: Tool/function call items\n- ` + "`" + `tool_response` + "`" + `: Tool/function response items\n- Other OpenAI-compatible item types\n\n**Constraints:**\n- Maximum 20 items per request\n- Each item must have valid type and content\n- Items are immutable after creation",
                 "consumes": [
                     "application/json"
                 ],
@@ -1578,263 +1757,344 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Create Project",
+                "summary": "Create conversation items",
                 "parameters": [
                     {
-                        "description": "Project creation request",
-                        "name": "body",
+                        "type": "string",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "collectionFormat": "csv",
+                        "description": "Additional fields to include in response",
+                        "name": "include",
+                        "in": "query"
+                    },
+                    {
+                        "description": "Create items request with array of items",
+                        "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest"
+                            "$ref": "#/definitions/conversationrequests.CreateItemsRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully created project",
+                        "description": "Successfully created items",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationItemCreatedResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad request - invalid payload",
+                        "description": "Invalid request - too many items, invalid format, or validation failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "Conversation not found or access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Internal server error - item creation failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/projects/{project_id}": {
+        "/v1/conversations/{conv_public_id}/items/{item_id}": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a specific project by its ID.",
+                "description": "Retrieve a single item from a conversation by item ID\n\n**Features:**\n- Retrieve specific item by ID\n- Returns complete item with all content\n- Automatic ownership verification via conversation\n- Optional include parameter for additional fields\n\n**Response Fields:**\n- ` + "`" + `id` + "`" + `: Item ID with ` + "`" + `msg_` + "`" + ` prefix\n- ` + "`" + `type` + "`" + `: Item type (message, tool_call, etc.)\n- ` + "`" + `role` + "`" + `: Role for message items (user, assistant)\n- ` + "`" + `content` + "`" + `: Item content array\n- ` + "`" + `status` + "`" + `: Item status (completed, incomplete, etc.)\n- ` + "`" + `created_at` + "`" + `: Unix timestamp",
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Get Project",
+                "summary": "Get a conversation item",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "ID of the project",
-                        "name": "project_id",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Item ID (format: msg_xxxxx)",
+                        "name": "item_id",
                         "in": "path",
                         "required": true
+                    },
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "collectionFormat": "csv",
+                        "description": "Additional fields to include in response",
+                        "name": "include",
+                        "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved the project",
+                        "description": "Successfully retrieved item",
+                        "schema": {
+                            "$ref": "#/definitions/conversationresponses.ItemResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid conversation ID or item ID format",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Not Found - project with the given ID does not exist or does not belong to the organization",
+                        "description": "Conversation or item not found, or access denied",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             },
-            "post": {
+            "delete": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Updates a specific project by its ID.",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Delete an item from a conversation. The item will be removed from the conversation.\n\n**Features:**\n- Remove specific item from conversation\n- Automatic ownership verification\n- Returns updated conversation object after deletion\n- Items are permanently removed (not soft delete)\n\n**Important:**\n- Deleting an item may affect conversation flow\n- Item IDs are not reused after deletion\n- Other items in conversation remain unchanged\n- Consider creating a new branch instead of deleting items\n\n**Response:**\nReturns the conversation object (not the deleted item)",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Update Project",
+                "summary": "Delete a conversation item",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "ID of the project to update",
-                        "name": "project_id",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
                         "in": "path",
                         "required": true
                     },
                     {
-                        "description": "Project update request",
-                        "name": "body",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest"
-                        }
+                        "type": "string",
+                        "description": "Item ID to delete (format: msg_xxxxx)",
+                        "name": "item_id",
+                        "in": "path",
+                        "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully updated the project",
+                        "description": "Successfully deleted item, returns conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad request - invalid payload",
+                        "description": "Invalid conversation ID or item ID format",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Not Found - project with the given ID does not exist",
+                        "description": "Conversation or item not found, or access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error - deletion failed",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/projects/{project_id}/archive": {
-            "post": {
+        "/v1/healthz": {
+            "get": {
+                "description": "Returns the health status of the API server. Used by orchestrators and monitoring systems.",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Server API"
+                ],
+                "summary": "Health check endpoint",
+                "responses": {
+                    "200": {
+                        "description": "Health status OK",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/models": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Archives a specific project by its ID, making it inactive.",
+                "description": "Retrieves a list of available models that can be used for chat completions or other tasks. Returns either simple model list or detailed list with provider metadata based on X-PROVIDER-DATA header.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Chat Completions API"
                 ],
-                "summary": "Archive Project",
+                "summary": "List available models",
                 "parameters": [
                     {
+                        "enum": [
+                            "true",
+                            "false"
+                        ],
                         "type": "string",
-                        "description": "ID of the project to archive",
-                        "name": "project_id",
-                        "in": "path",
-                        "required": true
+                        "description": "Set to 'true' to include provider metadata in response",
+                        "name": "X-PROVIDER-DATA",
+                        "in": "header"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully archived the project",
+                        "description": "List of models with provider metadata (when X-PROVIDER-DATA=true)",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/modelresponses.ModelWithProviderResponseList"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                    "404": {
+                        "description": "Models or providers not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
-                    "404": {
-                        "description": "Not Found - project with the given ID does not exist",
+                    "500": {
+                        "description": "Failed to retrieve models",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/projects/{project_public_id}/api_keys": {
+        "/v1/models/catalogs/{model_public_id}": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "List API keys for a specific project.",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Retrieves detailed information about a model catalog entry by its public ID (supports IDs with slashes like openrouter/nova-lite-v1)",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Model API"
                 ],
-                "summary": "List new project API key",
+                "summary": "Get a model catalog entry",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Project Public ID",
-                        "name": "project_public_id",
+                        "description": "Model Catalog Public ID (can contain slashes)",
+                        "name": "model_public_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "API key created successfully",
+                        "description": "Model catalog details",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse"
+                            "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad request, e.g., invalid payload or missing IDs",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized, e.g., invalid or missing token",
+                        "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Not Found, e.g., project or organization not found",
+                        "description": "Model catalog not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
-            "post": {
+            }
+        },
+        "/v1/models/providers": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new API key for a specific project.",
+                "description": "Retrieves a list of available model providers that can be used for inference.",
                 "consumes": [
                     "application/json"
                 ],
@@ -1842,69 +2102,88 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Model API"
+                ],
+                "summary": "List model providers",
+                "responses": {
+                    "200": {
+                        "description": "List of providers",
+                        "schema": {
+                            "$ref": "#/definitions/modelresponses.ProviderResponseList"
+                        }
+                    },
+                    "500": {
+                        "description": "Failed to retrieve providers",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/projects": {
+            "get": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "List all projects for the authenticated user",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Projects API"
                 ],
-                "summary": "Create a new project API key",
+                "summary": "List projects",
                 "parameters": [
+                    {
+                        "type": "integer",
+                        "description": "Maximum number of projects to return",
+                        "name": "limit",
+                        "in": "query"
+                    },
                     {
                         "type": "string",
-                        "description": "Project Public ID",
-                        "name": "project_public_id",
-                        "in": "path",
-                        "required": true
+                        "description": "Return projects after the given numeric ID",
+                        "name": "after",
+                        "in": "query"
                     },
                     {
-                        "description": "Request body for creating an API key",
-                        "name": "requestBody",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest"
-                        }
+                        "type": "string",
+                        "description": "Sort order (asc or desc)",
+                        "name": "order",
+                        "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "API key created successfully",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse"
-                        }
-                    },
-                    "400": {
-                        "description": "Bad request, e.g., invalid payload or missing IDs",
+                        "description": "OK",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/projectres.ProjectListResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized, e.g., invalid or missing token",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Not Found, e.g., project or organization not found",
+                        "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            }
-        },
-        "/v1/responses": {
+            },
             "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new LLM response for the given input. Supports multiple input types including text, images, files, web search, and more.\n\n**Supported Input Types:**\n- ` + "`" + `text` + "`" + `: Plain text input\n- ` + "`" + `image` + "`" + `: Image input (URL or base64)\n- ` + "`" + `file` + "`" + `: File input by file ID\n- ` + "`" + `web_search` + "`" + `: Web search input\n- ` + "`" + `file_search` + "`" + `: File search input\n- ` + "`" + `streaming` + "`" + `: Streaming input\n- ` + "`" + `function_calls` + "`" + `: Function calls input\n- ` + "`" + `reasoning` + "`" + `: Reasoning input\n\n**Example Request:**\n` + "`" + `` + "`" + `` + "`" + `json\n{\n\"model\": \"gpt-4\",\n\"input\": {\n\"type\": \"text\",\n\"text\": \"Hello, how are you?\"\n},\n\"max_tokens\": 100,\n\"temperature\": 0.7,\n\"stream\": false,\n\"background\": false\n}\n` + "`" + `` + "`" + `` + "`" + `\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `id` + "`" + `: Response identifier\n- ` + "`" + `object` + "`" + `: Object type (\"response\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `model` + "`" + `: Model used\n- ` + "`" + `status` + "`" + `: Response status\n- ` + "`" + `input` + "`" + `: Input data\n- ` + "`" + `output` + "`" + `: Generated output\n\n**Example Response:**\n` + "`" + `` + "`" + `` + "`" + `json\n{\n\"jan_status\": \"000000\",\n\"id\": \"resp_1234567890\",\n\"object\": \"response\",\n\"created\": 1234567890,\n\"model\": \"gpt-4\",\n\"status\": \"completed\",\n\"input\": {\n\"type\": \"text\",\n\"text\": \"Hello, how are you?\"\n},\n\"output\": {\n\"type\": \"text\",\n\"text\": {\n\"value\": \"I'm doing well, thank you!\"\n}\n}\n}\n` + "`" + `` + "`" + `` + "`" + `\n\n**Response Status:**\n- ` + "`" + `completed` + "`" + `: Response generation finished successfully\n- ` + "`" + `processing` + "`" + `: Response is being generated\n- ` + "`" + `failed` + "`" + `: Response generation failed\n- ` + "`" + `cancelled` + "`" + `: Response was cancelled",
+                "description": "Create a new project for grouping conversations",
                 "consumes": [
                     "application/json"
                 ],
@@ -1912,128 +2191,95 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
+                    "Projects API"
                 ],
-                "summary": "Create a response",
+                "summary": "Create project",
                 "parameters": [
                     {
-                        "description": "Request payload containing model, input, and generation parameters",
+                        "description": "Create project request",
                         "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.CreateResponseRequest"
+                            "$ref": "#/definitions/projectreq.CreateProjectRequest"
                         }
                     }
                 ],
                 "responses": {
-                    "200": {
-                        "description": "Created response",
+                    "201": {
+                        "description": "Created",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
-                        }
-                    },
-                    "202": {
-                        "description": "Response accepted for background processing",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
+                            "$ref": "#/definitions/projectres.ProjectResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request payload",
+                        "description": "Bad Request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "422": {
-                        "description": "Validation error",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "429": {
-                        "description": "Rate limit exceeded",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/responses/{response_id}": {
+        "/v1/projects/{project_id}": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves an LLM response by its ID. Returns the complete response object with embedded structure where all fields are at the top level.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `id` + "`" + `: Response identifier\n- ` + "`" + `object` + "`" + `: Object type (\"response\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `model` + "`" + `: Model used\n- ` + "`" + `status` + "`" + `: Response status\n- ` + "`" + `input` + "`" + `: Input data\n- ` + "`" + `output` + "`" + `: Generated output",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Get a single project by ID",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
+                    "Projects API"
                 ],
-                "summary": "Get a response",
+                "summary": "Get project",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Unique identifier of the response",
-                        "name": "response_id",
+                        "description": "Project ID",
+                        "name": "project_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Response details",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request",
+                        "description": "OK",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/projectres.ProjectResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Response not found",
+                        "description": "Not Found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -2044,74 +2290,57 @@ const docTemplate = `{
                         "BearerAuth": []
                     }
                 ],
-                "description": "Deletes an LLM response by its ID. Returns the deleted response object with embedded structure where all fields are at the top level.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `id` + "`" + `: Response identifier\n- ` + "`" + `object` + "`" + `: Object type (\"response\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `model` + "`" + `: Model used\n- ` + "`" + `status` + "`" + `: Response status (will be \"cancelled\")\n- ` + "`" + `input` + "`" + `: Input data\n- ` + "`" + `cancelled_at` + "`" + `: Cancellation timestamp",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Soft-delete a project",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
+                    "Projects API"
                 ],
-                "summary": "Delete a response",
+                "summary": "Delete project",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Unique identifier of the response",
-                        "name": "response_id",
+                        "description": "Project ID",
+                        "name": "project_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Deleted response",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request",
+                        "description": "OK",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/projectres.ProjectDeletedResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Response not found",
+                        "description": "Not Found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            }
-        },
-        "/v1/responses/{response_id}/cancel": {
-            "post": {
+            },
+            "patch": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Cancels a running LLM response that was created with background=true. Only responses that are currently processing can be cancelled.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `id` + "`" + `: Response identifier\n- ` + "`" + `object` + "`" + `: Object type (\"response\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `model` + "`" + `: Model used\n- ` + "`" + `status` + "`" + `: Response status (will be \"cancelled\")\n- ` + "`" + `input` + "`" + `: Input data\n- ` + "`" + `cancelled_at` + "`" + `: Cancellation timestamp",
+                "description": "Update project name, instruction, or archived status",
                 "consumes": [
                     "application/json"
                 ],
@@ -2119,138 +2348,79 @@ const docTemplate = `{
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
+                    "Projects API"
                 ],
-                "summary": "Cancel a response",
+                "summary": "Update project",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Unique identifier of the response to cancel",
-                        "name": "response_id",
+                        "description": "Project ID",
+                        "name": "project_id",
                         "in": "path",
                         "required": true
+                    },
+                    {
+                        "description": "Update request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/projectreq.UpdateProjectRequest"
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Response cancelled successfully",
+                        "description": "OK",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
+                            "$ref": "#/definitions/projectres.ProjectResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request or response cannot be cancelled",
+                        "description": "Bad Request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Response not found",
+                        "description": "Not Found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/responses/{response_id}/input_items": {
+        "/v1/readyz": {
             "get": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Retrieves a paginated list of input items for a response. Supports cursor-based pagination for efficient retrieval of large datasets.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- ` + "`" + `jan_status` + "`" + `: Jan API status code (optional)\n- ` + "`" + `first_id` + "`" + `: First item ID for pagination (optional)\n- ` + "`" + `last_id` + "`" + `: Last item ID for pagination (optional)\n- ` + "`" + `has_more` + "`" + `: Whether more items are available (optional)\n- ` + "`" + `id` + "`" + `: Input item identifier\n- ` + "`" + `object` + "`" + `: Object type (\"input_item\")\n- ` + "`" + `created` + "`" + `: Unix timestamp\n- ` + "`" + `type` + "`" + `: Input type\n- ` + "`" + `text` + "`" + `: Text content (for text type)\n- ` + "`" + `image` + "`" + `: Image content (for image type)\n- ` + "`" + `file` + "`" + `: File content (for file type)\n\n**Example Response:**\n` + "`" + `` + "`" + `` + "`" + `json\n{\n\"jan_status\": \"000000\",\n\"first_id\": \"input_123\",\n\"last_id\": \"input_456\",\n\"has_more\": false,\n\"id\": \"input_1234567890\",\n\"object\": \"input_item\",\n\"created\": 1234567890,\n\"type\": \"text\",\n\"text\": \"Hello, world!\"\n}\n` + "`" + `` + "`" + `` + "`" + `",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Returns the readiness status of the API server. Indicates if the service is ready to accept traffic.",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
-                ],
-                "summary": "List input items",
-                "parameters": [
-                    {
-                        "type": "string",
-                        "description": "Unique identifier of the response",
-                        "name": "response_id",
-                        "in": "path",
-                        "required": true
-                    },
-                    {
-                        "type": "integer",
-                        "description": "Maximum number of items to return (default: 20, max: 100)",
-                        "name": "limit",
-                        "in": "query"
-                    },
-                    {
-                        "type": "string",
-                        "description": "Cursor for pagination - return items after this ID",
-                        "name": "after",
-                        "in": "query"
-                    },
-                    {
-                        "type": "string",
-                        "description": "Cursor for pagination - return items before this ID",
-                        "name": "before",
-                        "in": "query"
-                    }
+                    "Server API"
                 ],
+                "summary": "Readiness check endpoint",
                 "responses": {
                     "200": {
-                        "description": "List of input items",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ListInputItemsResponse"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request or pagination parameters",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Response not found",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "500": {
-                        "description": "Internal server error",
+                        "description": "Readiness status ready",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
                         }
                     }
                 }
@@ -2258,7 +2428,7 @@ const docTemplate = `{
         },
         "/v1/version": {
             "get": {
-                "description": "Returns the current build version of the API server.",
+                "description": "Returns the current build version of the API server and environment reload timestamp.",
                 "produces": [
                     "application/json"
                 ],
@@ -2268,7 +2438,7 @@ const docTemplate = `{
                 "summary": "Get API build version",
                 "responses": {
                     "200": {
-                        "description": "version info",
+                        "description": "Version information including version number and environment reload timestamp",
                         "schema": {
                             "type": "object",
                             "additionalProperties": {
@@ -2281,120 +2451,24 @@ const docTemplate = `{
         }
     },
     "definitions": {
-        "app_interfaces_http_routes_v1.Model": {
+        "chatrequests.ChatCompletionRequest": {
             "type": "object",
             "properties": {
-                "created": {
-                    "type": "integer"
+                "chat_template_kwargs": {
+                    "description": "ChatTemplateKwargs provides a way to add non-standard parameters to the request body.\nAdditional kwargs to pass to the template renderer. Will be accessible by the chat template.\nSuch as think mode for qwen3. \"chat_template_kwargs\": {\"enable_thinking\": false}\nhttps://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes",
+                    "type": "object",
+                    "additionalProperties": {}
                 },
-                "id": {
-                    "type": "string"
+                "conversation": {
+                    "description": "Conversation can be either a string (conversation ID) or a conversation object\nItems from this conversation are prepended to Messages for this response request.\nInput items and output items from this response are automatically added to this conversation after completion.",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/chatrequests.ConversationReference"
+                        }
+                    ]
                 },
-                "object": {
-                    "type": "string"
-                },
-                "owned_by": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1.ModelsResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1.Model"
-                    }
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth.AccessTokenResponse": {
-            "type": "object",
-            "properties": {
-                "access_token": {
-                    "type": "string"
-                },
-                "expires_in": {
-                    "type": "integer"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth.GetMeResponse": {
-            "type": "object",
-            "properties": {
-                "email": {
-                    "type": "string"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth_google.AccessTokenResponse": {
-            "type": "object",
-            "properties": {
-                "access_token": {
-                    "type": "string"
-                },
-                "expires_in": {
-                    "type": "integer"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest": {
-            "type": "object",
-            "required": [
-                "code"
-            ],
-            "properties": {
-                "code": {
-                    "type": "string"
-                },
-                "state": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth_google.GoogleLoginUrl": {
-            "type": "object",
-            "properties": {
-                "object": {
-                    "type": "string"
-                },
-                "url": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conv.ExtendedChatCompletionRequest": {
-            "type": "object",
-            "properties": {
-                "chat_template_kwargs": {
-                    "description": "ChatTemplateKwargs provides a way to add non-standard parameters to the request body.\nAdditional kwargs to pass to the template renderer. Will be accessible by the chat template.\nSuch as think mode for qwen3. \"chat_template_kwargs\": {\"enable_thinking\": false}\nhttps://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "conversation": {
-                    "type": "string"
-                },
-                "frequency_penalty": {
-                    "type": "number"
+                "frequency_penalty": {
+                    "type": "number"
                 },
                 "function_call": {
                     "description": "Deprecated: use ToolChoice instead."
@@ -2494,11 +2568,11 @@ const docTemplate = `{
                     }
                 },
                 "store": {
-                    "description": "If true, the response will be stored in the conversation, default is false",
+                    "description": "Store controls whether the latest input and generated response should be persisted",
                     "type": "boolean"
                 },
                 "store_reasoning": {
-                    "description": "If true, the reasoning will be stored in the conversation, default is false",
+                    "description": "StoreReasoning controls whether reasoning content (if present) should also be persisted",
                     "type": "boolean"
                 },
                 "stream": {
@@ -2533,10 +2607,17 @@ const docTemplate = `{
                 },
                 "user": {
                     "type": "string"
+                },
+                "verbosity": {
+                    "description": "Verbosity determines how many output tokens are generated. Lowering the number of\ntokens reduces overall latency. It can be set to \"low\", \"medium\", or \"high\".\nNote: This field is only confirmed to work with gpt-5, gpt-5-mini and gpt-5-nano.\nAlso, it is not in the API reference of chat completion at the time of writing,\nthough it is supported by the API.",
+                    "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conv.ExtendedCompletionResponse": {
+        "chatrequests.ConversationReference": {
+            "type": "object"
+        },
+        "chatresponses.ChatCompletionResponse": {
             "type": "object",
             "properties": {
                 "choices": {
@@ -2545,15 +2626,15 @@ const docTemplate = `{
                         "$ref": "#/definitions/openai.ChatCompletionChoice"
                     }
                 },
+                "conversation": {
+                    "$ref": "#/definitions/chatresponses.ConversationContext"
+                },
                 "created": {
                     "type": "integer"
                 },
                 "id": {
                     "type": "string"
                 },
-                "metadata": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conv.ResponseMetadata"
-                },
                 "model": {
                     "type": "string"
                 },
@@ -2577,1568 +2658,1403 @@ const docTemplate = `{
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conv.Model": {
+        "chatresponses.ConversationContext": {
             "type": "object",
             "properties": {
-                "created": {
-                    "type": "integer"
-                },
                 "id": {
+                    "description": "The unique ID of the conversation",
                     "type": "string"
                 },
-                "object": {
-                    "type": "string"
-                },
-                "owned_by": {
+                "title": {
+                    "description": "The title of the conversation (optional)",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conv.ModelsResponse": {
+        "conversation.Annotation": {
             "type": "object",
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conv.Model"
-                    }
+                "bounding_box": {
+                    "description": "Bounding box for image/PDF annotations",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.BBox"
+                        }
+                    ]
                 },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conv.ResponseMetadata": {
-            "type": "object",
-            "properties": {
-                "ask_item_id": {
-                    "type": "string"
+                "confidence": {
+                    "description": "Citation confidence score (0.0-1.0)",
+                    "type": "number"
                 },
-                "completion_item_id": {
+                "container_id": {
+                    "description": "Document container reference",
                     "type": "string"
                 },
-                "conversation_created": {
-                    "type": "boolean"
+                "end_index": {
+                    "description": "End position in text",
+                    "type": "integer"
                 },
-                "conversation_id": {
+                "file_id": {
+                    "description": "For file citations",
                     "type": "string"
                 },
-                "conversation_title": {
+                "filename": {
+                    "description": "File name for citations",
                     "type": "string"
                 },
-                "store": {
-                    "type": "boolean"
+                "index": {
+                    "description": "Citation index",
+                    "type": "integer"
                 },
-                "store_reasoning": {
-                    "type": "boolean"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.AnnotationResponse": {
-            "type": "object",
-            "properties": {
-                "end_index": {
+                "page_number": {
+                    "description": "Page reference for documents",
                     "type": "integer"
                 },
-                "file_id": {
+                "quote": {
+                    "description": "Actual quoted text from source",
                     "type": "string"
                 },
-                "index": {
-                    "type": "integer"
-                },
                 "start_index": {
+                    "description": "Start position in text",
                     "type": "integer"
                 },
                 "text": {
+                    "description": "Display text",
                     "type": "string"
                 },
                 "type": {
+                    "description": "\"file_citation\", \"url_citation\", \"file_path\", etc.",
                     "type": "string"
                 },
                 "url": {
+                    "description": "For URL citations",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.ContentResponse": {
+        "conversation.AudioContent": {
             "type": "object",
             "properties": {
-                "file": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.FileContentResponse"
-                },
-                "finish_reason": {
+                "data": {
+                    "description": "Base64 encoded audio data",
                     "type": "string"
                 },
-                "image": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ImageContentResponse"
-                },
-                "input_text": {
+                "format": {
+                    "description": "Audio format: mp3, wav, pcm16, etc.",
                     "type": "string"
                 },
-                "output_text": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.OutputTextResponse"
-                },
-                "reasoning_content": {
+                "id": {
                     "type": "string"
                 },
-                "text": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.TextResponse"
-                },
-                "type": {
+                "transcript": {
+                    "description": "Text transcription of audio",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.ConversationContentRequest": {
+        "conversation.BBox": {
             "type": "object",
-            "required": [
-                "type"
-            ],
             "properties": {
-                "text": {
-                    "type": "string"
+                "height": {
+                    "type": "number"
                 },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemRequest": {
-            "type": "object",
-            "required": [
-                "content",
-                "type"
-            ],
-            "properties": {
-                "content": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationContentRequest"
-                    }
+                "width": {
+                    "type": "number"
                 },
-                "role": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole"
+                "x": {
+                    "type": "number"
                 },
-                "type": {
-                    "type": "string"
+                "y": {
+                    "type": "number"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemResponse": {
+        "conversation.CodeContent": {
             "type": "object",
             "properties": {
-                "content": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ContentResponse"
-                    }
-                },
-                "created_at": {
-                    "type": "integer"
-                },
-                "id": {
+                "code": {
+                    "description": "Code content",
                     "type": "string"
                 },
-                "object": {
+                "error": {
+                    "description": "Execution error",
                     "type": "string"
                 },
-                "role": {
+                "execution_id": {
+                    "description": "Execution session ID",
                     "type": "string"
                 },
-                "status": {
-                    "type": "string"
+                "exit_code": {
+                    "description": "Process exit code",
+                    "type": "integer"
                 },
-                "type": {
+                "language": {
+                    "description": "Programming language",
                     "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.CreateConversationRequest": {
-            "type": "object",
-            "properties": {
-                "items": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest"
-                    }
                 },
                 "metadata": {
+                    "description": "Additional metadata",
                     "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
+                    "additionalProperties": {}
                 },
-                "title": {
+                "output": {
+                    "description": "Execution output",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.CreateItemsRequest": {
-            "type": "object",
-            "required": [
-                "items"
-            ],
-            "properties": {
-                "items": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest"
-                    }
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.DeletedConversationResponse": {
+        "conversation.ComputerAction": {
             "type": "object",
             "properties": {
-                "deleted": {
-                    "type": "boolean"
-                },
-                "id": {
+                "action": {
+                    "description": "Action type: \"click\", \"type\", \"key\", \"scroll\", \"move\", etc.",
                     "type": "string"
                 },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse": {
-            "type": "object",
-            "properties": {
-                "created_at": {
-                    "type": "integer"
+                "coordinates": {
+                    "description": "Screen coordinates for mouse actions",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.Coordinates"
+                        }
+                    ]
                 },
-                "id": {
+                "key": {
+                    "description": "Key for keyboard actions",
                     "type": "string"
                 },
                 "metadata": {
+                    "description": "Additional action metadata",
                     "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
+                    "additionalProperties": {}
                 },
-                "object": {
-                    "type": "string"
+                "scroll_delta": {
+                    "description": "Scroll amount",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ScrollDelta"
+                        }
+                    ]
                 },
-                "title": {
+                "text": {
+                    "description": "Text for typing actions",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.FileContentResponse": {
+        "conversation.Content": {
             "type": "object",
             "properties": {
-                "file_id": {
-                    "type": "string"
-                },
-                "mime_type": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
+                "audio": {
+                    "description": "Audio content for speech",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.AudioContent"
+                        }
+                    ]
                 },
-                "size": {
-                    "type": "integer"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ImageContentResponse": {
-            "type": "object",
-            "properties": {
-                "detail": {
+                "code": {
+                    "description": "Code block with execution metadata",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.CodeContent"
+                        }
+                    ]
+                },
+                "computer_action": {
+                    "description": "Computer interaction details",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ComputerAction"
+                        }
+                    ]
+                },
+                "computer_screenshot": {
+                    "description": "Screenshot from computer use",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ScreenshotContent"
+                        }
+                    ]
+                },
+                "file": {
+                    "description": "File content",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.FileContent"
+                        }
+                    ]
+                },
+                "finish_reason": {
+                    "description": "Finish reason",
                     "type": "string"
                 },
-                "file_id": {
+                "function_call": {
+                    "description": "Function call content (deprecated, use tool_calls)",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.FunctionCall"
+                        }
+                    ]
+                },
+                "function_call_output": {
+                    "description": "Function call output",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.FunctionCallOut"
+                        }
+                    ]
+                },
+                "image": {
+                    "description": "Image content",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ImageContent"
+                        }
+                    ]
+                },
+                "input_audio": {
+                    "description": "User audio input",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.InputAudio"
+                        }
+                    ]
+                },
+                "input_text": {
+                    "description": "User input text (simple)",
                     "type": "string"
                 },
-                "url": {
+                "output_text": {
+                    "description": "AI output text (with annotations)",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.OutputText"
+                        }
+                    ]
+                },
+                "reasoning_content": {
+                    "description": "AI reasoning content",
                     "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.OutputTextResponse": {
-            "type": "object",
-            "properties": {
-                "annotations": {
+                },
+                "refusal": {
+                    "description": "Model refusal message",
+                    "type": "string"
+                },
+                "summary_text": {
+                    "description": "Summary content",
+                    "type": "string"
+                },
+                "text": {
+                    "description": "Generic text content",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.Text"
+                        }
+                    ]
+                },
+                "thinking": {
+                    "description": "Internal reasoning (o1 models)",
+                    "type": "string"
+                },
+                "tool_call_id": {
+                    "description": "Tool call ID (for tool responses)",
+                    "type": "string"
+                },
+                "tool_calls": {
+                    "description": "Tool calls (for assistant messages)",
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.AnnotationResponse"
+                        "$ref": "#/definitions/conversation.ToolCall"
                     }
                 },
-                "text": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.TextResponse": {
-            "type": "object",
-            "properties": {
-                "value": {
+                "type": {
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.UpdateConversationRequest": {
+        "conversation.Coordinates": {
             "type": "object",
             "properties": {
-                "metadata": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
+                "x": {
+                    "type": "integer"
                 },
-                "title": {
-                    "type": "string"
+                "y": {
+                    "type": "integer"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse": {
+        "conversation.FileContent": {
             "type": "object",
             "properties": {
-                "deleted": {
-                    "type": "boolean"
+                "file_id": {
+                    "type": "string"
                 },
-                "id": {
+                "mime_type": {
                     "type": "string"
                 },
-                "object": {
+                "name": {
                     "type": "string"
+                },
+                "size": {
+                    "type": "integer"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse": {
+        "conversation.FunctionCall": {
             "type": "object",
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
-                    }
-                },
-                "first_id": {
+                "arguments": {
+                    "description": "JSON-encoded arguments",
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
+                "id": {
+                    "description": "Call ID",
                     "type": "string"
                 },
-                "object": {
-                    "type": "string",
-                    "example": "list"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest": {
-            "type": "object",
-            "required": [
-                "name"
-            ],
-            "properties": {
                 "name": {
-                    "type": "string",
-                    "example": "My Admin API Key"
+                    "description": "Function name",
+                    "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse": {
+        "conversation.FunctionCallOut": {
             "type": "object",
             "properties": {
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "key_1234567890"
-                },
-                "last_used_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "name": {
-                    "type": "string",
-                    "example": "My Admin API Key"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "api_key"
-                },
-                "owner": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.Owner"
-                },
-                "redacted_value": {
-                    "type": "string",
-                    "example": "sk-...abcd"
+                "call_id": {
+                    "description": "ID of the function call this responds to",
+                    "type": "string"
                 },
-                "value": {
-                    "type": "string",
-                    "example": "sk-abcdef1234567890"
+                "output": {
+                    "description": "String output from the function",
+                    "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization.Owner": {
+        "conversation.ImageContent": {
             "type": "object",
             "properties": {
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "user_1234567890"
-                },
-                "name": {
-                    "type": "string",
-                    "example": "John Doe"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "user"
+                "detail": {
+                    "description": "\"low\", \"high\", \"auto\"",
+                    "type": "string"
                 },
-                "role": {
-                    "type": "string",
-                    "example": "admin"
+                "file_id": {
+                    "type": "string"
                 },
-                "type": {
-                    "type": "string",
-                    "example": "user"
+                "url": {
+                    "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_invites.CreateInviteUserRequest": {
+        "conversation.IncompleteDetails": {
             "type": "object",
             "properties": {
-                "email": {
+                "error": {
+                    "description": "Error message if applicable",
                     "type": "string"
                 },
-                "projects": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteProject"
-                    }
-                },
-                "role": {
+                "reason": {
+                    "description": "\"max_tokens\", \"content_filter\", \"tool_calls\", etc.",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_invites.InviteProject": {
+        "conversation.InputAudio": {
             "type": "object",
             "properties": {
-                "id": {
+                "data": {
+                    "description": "Base64 encoded audio data",
                     "type": "string"
                 },
-                "role": {
+                "format": {
+                    "description": "Audio format: mp3, wav, pcm16, etc.",
+                    "type": "string"
+                },
+                "transcript": {
+                    "description": "Optional text transcription",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_invites.InviteResponse": {
+        "conversation.Item": {
             "type": "object",
             "properties": {
-                "accepted_at": {
+                "branch": {
+                    "description": "Branch identifier (MAIN, EDIT_1, etc.)",
                     "type": "string"
                 },
-                "email": {
+                "completed_at": {
                     "type": "string"
                 },
-                "expires_at": {
+                "content": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Content"
+                    }
+                },
+                "created_at": {
                     "type": "string"
                 },
                 "id": {
                     "type": "string"
                 },
-                "invited_at": {
+                "incomplete_at": {
                     "type": "string"
                 },
+                "incomplete_details": {
+                    "$ref": "#/definitions/conversation.IncompleteDetails"
+                },
                 "object": {
+                    "description": "Always \"conversation.item\" for OpenAI compatibility",
                     "type": "string"
                 },
-                "projects": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteProject"
-                    }
-                },
-                "role": {
+                "rated_at": {
+                    "description": "When rating was given",
                     "type": "string"
                 },
-                "status": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_invites.VerifyInviteUserRequest": {
-            "type": "object",
-            "properties": {
-                "code": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest": {
-            "type": "object",
-            "required": [
-                "name"
-            ],
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "New AI Project"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.ProjectListResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
-                    }
+                "rating": {
+                    "description": "User feedback/rating",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ItemRating"
+                        }
+                    ]
                 },
-                "first_id": {
+                "rating_comment": {
+                    "description": "Optional comment with rating",
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
+                "role": {
+                    "$ref": "#/definitions/conversation.ItemRole"
                 },
-                "last_id": {
-                    "type": "string"
+                "sequence_number": {
+                    "description": "Order within branch",
+                    "type": "integer"
                 },
-                "object": {
-                    "type": "string",
-                    "example": "list"
+                "status": {
+                    "$ref": "#/definitions/conversation.ItemStatus"
+                },
+                "type": {
+                    "$ref": "#/definitions/conversation.ItemType"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_projects.ProjectResponse": {
-            "type": "object",
-            "properties": {
-                "archived_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "proj_1234567890"
-                },
-                "name": {
-                    "type": "string",
-                    "example": "My First Project"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "project"
-                },
-                "status": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest": {
-            "type": "object",
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "Updated AI Project"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse": {
-            "type": "object",
-            "properties": {
-                "apikeyType": {
-                    "type": "string"
-                },
-                "description": {
-                    "type": "string"
-                },
-                "enabled": {
-                    "type": "boolean"
-                },
-                "expiresAt": {
-                    "type": "string"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "key": {
-                    "type": "string"
-                },
-                "last_usedAt": {
-                    "type": "string"
-                },
-                "permissions": {
-                    "type": "string"
-                },
-                "plaintextHint": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest": {
-            "type": "object",
-            "properties": {
-                "description": {
-                    "type": "string"
-                },
-                "expiresAt": {
-                    "type": "string"
-                }
-            }
+        "conversation.ItemRating": {
+            "type": "string",
+            "enum": [
+                "like",
+                "unlike"
+            ],
+            "x-enum-comments": {
+                "ItemRatingLike": "Positive feedback (like)",
+                "ItemRatingUnlike": "Negative feedback (unlike)"
+            },
+            "x-enum-varnames": [
+                "ItemRatingLike",
+                "ItemRatingUnlike"
+            ]
         },
-        "menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole": {
+        "conversation.ItemRole": {
             "type": "string",
             "enum": [
                 "system",
                 "user",
                 "assistant",
-                "tool"
+                "tool",
+                "developer",
+                "critic",
+                "discriminator",
+                "unknown"
             ],
+            "x-enum-comments": {
+                "ItemRoleCritic": "For critique/evaluation workflows",
+                "ItemRoleDeveloper": "System-level instructions (OpenAI replacement for system)",
+                "ItemRoleDiscriminator": "For adversarial/validation workflows",
+                "ItemRoleUnknown": "Fallback for unrecognized roles"
+            },
             "x-enum-varnames": [
                 "ItemRoleSystem",
                 "ItemRoleUser",
                 "ItemRoleAssistant",
-                "ItemRoleTool"
+                "ItemRoleTool",
+                "ItemRoleDeveloper",
+                "ItemRoleCritic",
+                "ItemRoleDiscriminator",
+                "ItemRoleUnknown"
             ]
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.CreateResponseRequest": {
-            "type": "object",
-            "required": [
-                "input",
-                "model"
+        "conversation.ItemStatus": {
+            "type": "string",
+            "enum": [
+                "incomplete",
+                "in_progress",
+                "completed",
+                "failed",
+                "cancelled",
+                "searching",
+                "generating",
+                "calling",
+                "streaming",
+                "rate_limited"
+            ],
+            "x-enum-comments": {
+                "ItemStatusCalling": "Function/tool call in progress",
+                "ItemStatusCancelled": "Cancelled by user or system",
+                "ItemStatusCompleted": "Successfully finished",
+                "ItemStatusFailed": "Failed with error",
+                "ItemStatusGenerating": "Image generation in progress",
+                "ItemStatusInProgress": "Currently processing",
+                "ItemStatusIncomplete": "Not started or partially complete (OpenAI uses this instead of \"pending\")",
+                "ItemStatusRateLimited": "Rate limit hit",
+                "ItemStatusSearching": "File/web search in progress",
+                "ItemStatusStreaming": "Streaming response in progress"
+            },
+            "x-enum-varnames": [
+                "ItemStatusIncomplete",
+                "ItemStatusInProgress",
+                "ItemStatusCompleted",
+                "ItemStatusFailed",
+                "ItemStatusCancelled",
+                "ItemStatusSearching",
+                "ItemStatusGenerating",
+                "ItemStatusCalling",
+                "ItemStatusStreaming",
+                "ItemStatusRateLimited"
+            ]
+        },
+        "conversation.ItemType": {
+            "type": "string",
+            "enum": [
+                "message",
+                "function_call",
+                "function_call_output",
+                "reasoning",
+                "file_search",
+                "web_search",
+                "code_interpreter",
+                "computer_use",
+                "custom_tool_call",
+                "mcp_item",
+                "image_generation"
             ],
+            "x-enum-comments": {
+                "ItemTypeCodeInterpreter": "Code execution",
+                "ItemTypeComputerUse": "Computer interaction",
+                "ItemTypeCustomToolCall": "Custom tool invocations",
+                "ItemTypeFileSearch": "RAG/retrieval operations",
+                "ItemTypeImageGeneration": "DALL-E image generation",
+                "ItemTypeMCPItem": "Model Context Protocol items",
+                "ItemTypeReasoning": "For o1/reasoning models",
+                "ItemTypeWebSearch": "Web browsing operations"
+            },
+            "x-enum-varnames": [
+                "ItemTypeMessage",
+                "ItemTypeFunctionCall",
+                "ItemTypeFunctionCallOut",
+                "ItemTypeReasoning",
+                "ItemTypeFileSearch",
+                "ItemTypeWebSearch",
+                "ItemTypeCodeInterpreter",
+                "ItemTypeComputerUse",
+                "ItemTypeCustomToolCall",
+                "ItemTypeMCPItem",
+                "ItemTypeImageGeneration"
+            ]
+        },
+        "conversation.LogProb": {
+            "type": "object",
             "properties": {
-                "background": {
-                    "description": "Whether to run the response in the background.",
-                    "type": "boolean"
-                },
-                "conversation": {
-                    "description": "The conversation ID to append items to. If not set or set to ClientCreatedRootConversationID, a new conversation will be created.",
-                    "type": "string"
-                },
-                "frequency_penalty": {
-                    "description": "The frequency penalty to use for this response.",
-                    "type": "number"
-                },
-                "input": {
-                    "description": "The input to the model. Can be a string or array of strings."
-                },
-                "logit_bias": {
-                    "description": "The logit bias to use for this response.",
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "number",
-                        "format": "float64"
+                "bytes": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
                     }
                 },
-                "max_tokens": {
-                    "description": "The maximum number of tokens to generate.",
-                    "type": "integer"
-                },
-                "metadata": {
-                    "description": "The metadata to use for this response.",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "model": {
-                    "description": "The ID of the model to use for this response.",
-                    "type": "string"
-                },
-                "presence_penalty": {
-                    "description": "The presence penalty to use for this response.",
+                "logprob": {
                     "type": "number"
                 },
-                "previous_response_id": {
-                    "description": "The ID of the previous response to continue from. If set, the conversation will be loaded from the previous response.",
+                "token": {
                     "type": "string"
                 },
-                "repetition_penalty": {
-                    "description": "The repetition penalty to use for this response.",
-                    "type": "number"
-                },
-                "response_format": {
-                    "description": "The response format to use for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ResponseFormat"
-                        }
-                    ]
-                },
-                "seed": {
-                    "description": "The seed to use for this response.",
-                    "type": "integer"
+                "top_logprobs": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.TopLogProb"
+                    }
+                }
+            }
+        },
+        "conversation.OutputText": {
+            "type": "object",
+            "properties": {
+                "annotations": {
+                    "description": "Required for OpenAI compatibility",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Annotation"
+                    }
                 },
-                "stop": {
-                    "description": "The stop sequences to use for this response.",
+                "logprobs": {
+                    "description": "Token probabilities",
                     "type": "array",
                     "items": {
-                        "type": "string"
+                        "$ref": "#/definitions/conversation.LogProb"
                     }
                 },
-                "store": {
-                    "description": "Whether to store the conversation. If false, no conversation will be created or used.",
-                    "type": "boolean"
+                "text": {
+                    "type": "string"
+                }
+            }
+        },
+        "conversation.ScreenshotContent": {
+            "type": "object",
+            "properties": {
+                "description": {
+                    "description": "Optional description",
+                    "type": "string"
                 },
-                "stream": {
-                    "description": "Whether to stream the response.",
-                    "type": "boolean"
+                "height": {
+                    "description": "Image height in pixels",
+                    "type": "integer"
                 },
-                "system_prompt": {
-                    "description": "The system prompt to use for this response.",
+                "image_data": {
+                    "description": "Base64 encoded image data",
                     "type": "string"
                 },
-                "temperature": {
-                    "description": "The temperature to use for this response.",
-                    "type": "number"
+                "image_url": {
+                    "description": "URL to screenshot image",
+                    "type": "string"
                 },
-                "timeout": {
-                    "description": "The timeout in seconds for this response.",
+                "timestamp": {
+                    "description": "Unix timestamp when screenshot was taken",
                     "type": "integer"
                 },
-                "tool_choice": {
-                    "description": "The tool choice to use for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ToolChoice"
-                        }
-                    ]
+                "width": {
+                    "description": "Image width in pixels",
+                    "type": "integer"
+                }
+            }
+        },
+        "conversation.ScrollDelta": {
+            "type": "object",
+            "properties": {
+                "x": {
+                    "type": "integer"
                 },
-                "tools": {
-                    "description": "The tools to use for this response.",
+                "y": {
+                    "type": "integer"
+                }
+            }
+        },
+        "conversation.Text": {
+            "type": "object",
+            "properties": {
+                "annotations": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.Tool"
+                        "$ref": "#/definitions/conversation.Annotation"
                     }
                 },
-                "top_k": {
-                    "description": "The top_k to use for this response.",
-                    "type": "integer"
-                },
-                "top_p": {
-                    "description": "The top_p to use for this response.",
-                    "type": "number"
-                },
-                "user": {
-                    "description": "The user to use for this response.",
+                "text": {
+                    "description": "Changed from \"value\" to match OpenAI spec",
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileInput": {
+        "conversation.ToolCall": {
             "type": "object",
-            "required": [
-                "file_id"
-            ],
             "properties": {
-                "file_id": {
-                    "description": "The ID of the file.",
+                "function": {
+                    "$ref": "#/definitions/conversation.FunctionCall"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "type": {
+                    "description": "\"function\", \"file_search\", \"code_interpreter\"",
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileSearchInput": {
+        "conversation.TopLogProb": {
             "type": "object",
-            "required": [
-                "file_ids",
-                "query"
-            ],
             "properties": {
-                "file_ids": {
-                    "description": "The IDs of the files to search in.",
+                "bytes": {
                     "type": "array",
                     "items": {
-                        "type": "string"
+                        "type": "integer"
                     }
                 },
-                "max_results": {
-                    "description": "The number of results to return.",
-                    "type": "integer"
+                "logprob": {
+                    "type": "number"
                 },
-                "query": {
-                    "description": "The query to search for.",
+                "token": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCall": {
+        "conversationrequests.CreateConversationRequest": {
             "type": "object",
-            "required": [
-                "name"
-            ],
             "properties": {
-                "arguments": {
-                    "description": "The arguments to pass to the function.",
+                "items": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Item"
+                    }
+                },
+                "metadata": {
                     "type": "object",
-                    "additionalProperties": {}
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
-                "name": {
-                    "description": "The name of the function to call.",
+                "project_id": {
+                    "type": "string"
+                },
+                "referrer": {
+                    "type": "string"
+                },
+                "title": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCallsInput": {
+        "conversationrequests.CreateItemsRequest": {
             "type": "object",
             "required": [
-                "calls"
+                "items"
             ],
             "properties": {
-                "calls": {
-                    "description": "The function calls to make.",
+                "items": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCall"
+                        "$ref": "#/definitions/conversation.Item"
                     }
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionChoice": {
+        "conversationrequests.UpdateConversationRequest": {
             "type": "object",
-            "required": [
-                "name"
-            ],
             "properties": {
-                "name": {
-                    "description": "The name of the function.",
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "referrer": {
+                    "type": "string"
+                },
+                "title": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionDefinition": {
+        "conversationresponses.ConversationDeletedResponse": {
             "type": "object",
-            "required": [
-                "name"
-            ],
             "properties": {
-                "description": {
-                    "description": "The description of the function.",
-                    "type": "string"
+                "deleted": {
+                    "type": "boolean"
                 },
-                "name": {
-                    "description": "The name of the function.",
+                "id": {
                     "type": "string"
                 },
-                "parameters": {
-                    "description": "The parameters of the function.",
-                    "type": "object",
-                    "additionalProperties": {}
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.ImageInput": {
+        "conversationresponses.ConversationItemCreatedResponse": {
             "type": "object",
             "properties": {
                 "data": {
-                    "description": "The base64 encoded image data.",
-                    "type": "string"
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Item"
+                    }
                 },
-                "detail": {
-                    "description": "The detail level for the image.",
+                "first_id": {
                     "type": "string"
                 },
-                "url": {
-                    "description": "The URL of the image.",
-                    "type": "string"
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.InputType": {
-            "type": "string",
-            "enum": [
-                "text",
-                "image",
-                "file",
-                "web_search",
-                "file_search",
-                "streaming",
-                "function_calls",
-                "reasoning"
-            ],
-            "x-enum-varnames": [
-                "InputTypeText",
-                "InputTypeImage",
-                "InputTypeFile",
-                "InputTypeWebSearch",
-                "InputTypeFileSearch",
-                "InputTypeStreaming",
-                "InputTypeFunctionCalls",
-                "InputTypeReasoning"
-            ]
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.ReasoningInput": {
-            "type": "object",
-            "required": [
-                "task"
-            ],
-            "properties": {
-                "context": {
-                    "description": "The context for the reasoning task.",
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
                     "type": "string"
                 },
-                "task": {
-                    "description": "The reasoning task to perform.",
+                "object": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.ResponseFormat": {
+        "conversationresponses.ConversationListResponse": {
             "type": "object",
-            "required": [
-                "type"
-            ],
             "properties": {
-                "type": {
-                    "description": "The type of response format.",
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversationresponses.ConversationResponse"
+                    }
+                },
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
                     "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "total": {
+                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.StreamingInput": {
+        "conversationresponses.ConversationResponse": {
             "type": "object",
-            "required": [
-                "url"
-            ],
             "properties": {
-                "body": {
-                    "description": "The body to send with the request.",
+                "created_at": {
+                    "type": "integer"
+                },
+                "id": {
                     "type": "string"
                 },
-                "headers": {
-                    "description": "The headers to send with the request.",
+                "metadata": {
                     "type": "object",
                     "additionalProperties": {
                         "type": "string"
                     }
                 },
-                "method": {
-                    "description": "The method to use for the request.",
+                "object": {
                     "type": "string"
                 },
-                "url": {
-                    "description": "The URL to stream from.",
+                "project_id": {
+                    "type": "string"
+                },
+                "referrer": {
+                    "type": "string"
+                },
+                "title": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.Tool": {
+        "conversationresponses.ItemListResponse": {
             "type": "object",
-            "required": [
-                "type"
-            ],
             "properties": {
-                "function": {
-                    "description": "The function definition for function tools.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionDefinition"
-                        }
-                    ]
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Item"
+                    }
                 },
-                "type": {
-                    "description": "The type of tool.",
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
+                    "type": "string"
+                },
+                "object": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.ToolChoice": {
+        "conversationresponses.ItemResponse": {
             "type": "object",
-            "required": [
-                "type"
-            ],
             "properties": {
-                "function": {
-                    "description": "The function to use for function tool choice.",
+                "branch": {
+                    "description": "Branch identifier (MAIN, EDIT_1, etc.)",
+                    "type": "string"
+                },
+                "completed_at": {
+                    "type": "string"
+                },
+                "content": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Content"
+                    }
+                },
+                "created_at": {
+                    "type": "string"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "incomplete_at": {
+                    "type": "string"
+                },
+                "incomplete_details": {
+                    "$ref": "#/definitions/conversation.IncompleteDetails"
+                },
+                "object": {
+                    "description": "Always \"conversation.item\" for OpenAI compatibility",
+                    "type": "string"
+                },
+                "rated_at": {
+                    "description": "When rating was given",
+                    "type": "string"
+                },
+                "rating": {
+                    "description": "User feedback/rating",
                     "allOf": [
                         {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionChoice"
+                            "$ref": "#/definitions/conversation.ItemRating"
                         }
                     ]
                 },
-                "type": {
-                    "description": "The type of tool choice.",
+                "rating_comment": {
+                    "description": "Optional comment with rating",
                     "type": "string"
+                },
+                "role": {
+                    "$ref": "#/definitions/conversation.ItemRole"
+                },
+                "sequence_number": {
+                    "description": "Order within branch",
+                    "type": "integer"
+                },
+                "status": {
+                    "$ref": "#/definitions/conversation.ItemStatus"
+                },
+                "type": {
+                    "$ref": "#/definitions/conversation.ItemType"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.WebSearchInput": {
+        "model.Architecture": {
             "type": "object",
-            "required": [
-                "query"
-            ],
             "properties": {
-                "max_results": {
-                    "description": "The number of results to return.",
-                    "type": "integer"
+                "input_modalities": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "instruct_type": {
+                    "description": "nullable",
+                    "type": "string"
                 },
-                "query": {
-                    "description": "The query to search for.",
+                "modality": {
+                    "description": "\"text+image-\u003etext\"",
                     "type": "string"
                 },
-                "search_engine": {
-                    "description": "The search engine to use.",
+                "output_modalities": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "tokenizer": {
+                    "description": "\"GPT\" / \"SentencePiece\" / etc.",
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ConversationInfo": {
+        "model.ModelCatalogStatus": {
+            "type": "string",
+            "enum": [
+                "init",
+                "filled",
+                "updated"
+            ],
+            "x-enum-comments": {
+                "ModelCatalogStatusFilled": "may update from Provider like OpenRouter",
+                "ModelCatalogStatusInit": "default status when creating entry",
+                "ModelCatalogStatusUpdated": "manually updated by admin (cannot be auto-updated anymore"
+            },
+            "x-enum-varnames": [
+                "ModelCatalogStatusInit",
+                "ModelCatalogStatusFilled",
+                "ModelCatalogStatusUpdated"
+            ]
+        },
+        "model.PriceLine": {
             "type": "object",
             "properties": {
-                "id": {
-                    "description": "The unique ID of the conversation.",
+                "amount_micro_usd": {
+                    "description": "e.g., 15000 -\u003e $0.0150",
+                    "type": "integer"
+                },
+                "currency": {
+                    "description": "\"USD\" (fixed if you only bill in USD)",
                     "type": "string"
+                },
+                "unit": {
+                    "$ref": "#/definitions/model.PriceUnit"
+                }
+            }
+        },
+        "model.PriceUnit": {
+            "type": "string",
+            "enum": [
+                "per_1k_prompt_tokens",
+                "per_1k_completion_tokens",
+                "per_request",
+                "per_image",
+                "per_web_search",
+                "per_internal_reasoning"
+            ],
+            "x-enum-varnames": [
+                "Per1KPromptTokens",
+                "Per1KCompletionTokens",
+                "PerRequest",
+                "PerImage",
+                "PerWebSearch",
+                "PerInternalReasoning"
+            ]
+        },
+        "model.Pricing": {
+            "type": "object",
+            "properties": {
+                "lines": {
+                    "description": "flexible: add/remove units without schema churn",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/model.PriceLine"
+                    }
+                }
+            }
+        },
+        "model.SupportedParameters": {
+            "type": "object",
+            "properties": {
+                "default": {
+                    "description": "temperature/top_p/frequency_penalty, null allowed",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "number"
+                    }
+                },
+                "names": {
+                    "description": "e.g., [\"include_reasoning\",\"max_tokens\",...]",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.DetailedUsage": {
+        "model.TokenLimits": {
             "type": "object",
             "properties": {
-                "input_tokens": {
-                    "description": "The number of tokens in the prompt.",
+                "context_length": {
+                    "description": "e.g., 400000",
                     "type": "integer"
                 },
-                "input_tokens_details": {
-                    "description": "Details about input tokens.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.TokenDetails"
-                        }
-                    ]
+                "max_completion_tokens": {
+                    "description": "e.g., 128000",
+                    "type": "integer"
+                }
+            }
+        },
+        "modelresponses.BulkOperationResponse": {
+            "type": "object",
+            "properties": {
+                "failed_count": {
+                    "type": "integer"
+                },
+                "failed_models": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
                 },
-                "output_tokens": {
-                    "description": "The number of tokens in the completion.",
+                "skipped_count": {
                     "type": "integer"
                 },
-                "output_tokens_details": {
-                    "description": "Details about output tokens.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.TokenDetails"
-                        }
-                    ]
+                "total_checked": {
+                    "type": "integer"
                 },
-                "total_tokens": {
-                    "description": "The total number of tokens used.",
+                "updated_count": {
                     "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse": {
+        "modelresponses.ModelCatalogResponse": {
             "type": "object",
             "properties": {
-                "code": {
+                "active": {
+                    "type": "boolean"
+                },
+                "architecture": {
+                    "$ref": "#/definitions/model.Architecture"
+                },
+                "created_at": {
+                    "type": "integer"
+                },
+                "extras": {
+                    "type": "object",
+                    "additionalProperties": {}
+                },
+                "id": {
                     "type": "string"
                 },
-                "error": {
+                "is_moderated": {
+                    "type": "boolean"
+                },
+                "last_synced_at": {
+                    "type": "integer"
+                },
+                "notes": {
                     "type": "string"
+                },
+                "status": {
+                    "$ref": "#/definitions/model.ModelCatalogStatus"
+                },
+                "supported_parameters": {
+                    "$ref": "#/definitions/model.SupportedParameters"
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "updated_at": {
+                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.FormatType": {
+        "modelresponses.ModelResponse": {
             "type": "object",
             "properties": {
-                "type": {
-                    "description": "The type of format.",
+                "created": {
+                    "type": "integer"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "owned_by": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse": {
+        "modelresponses.ModelResponseList": {
             "type": "object",
             "properties": {
-                "result": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/modelresponses.ModelResponse"
+                    }
                 },
-                "status": {
+                "object": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.InputItem": {
+        "modelresponses.ModelResponseWithProvider": {
             "type": "object",
             "properties": {
                 "created": {
-                    "description": "The Unix timestamp (in seconds) when the input item was created.",
                     "type": "integer"
                 },
-                "file": {
-                    "description": "The file content (for file type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileInput"
-                        }
-                    ]
-                },
-                "file_search": {
-                    "description": "The file search content (for file_search type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileSearchInput"
-                        }
-                    ]
-                },
-                "function_calls": {
-                    "description": "The function calls content (for function_calls type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCallsInput"
-                        }
-                    ]
-                },
                 "id": {
-                    "description": "The unique identifier for the input item.",
                     "type": "string"
                 },
-                "image": {
-                    "description": "The image content (for image type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ImageInput"
-                        }
-                    ]
-                },
                 "object": {
-                    "description": "The object type, which is always \"input_item\".",
                     "type": "string"
                 },
-                "reasoning": {
-                    "description": "The reasoning content (for reasoning type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ReasoningInput"
-                        }
-                    ]
-                },
-                "streaming": {
-                    "description": "The streaming content (for streaming type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.StreamingInput"
-                        }
-                    ]
+                "owned_by": {
+                    "type": "string"
                 },
-                "text": {
-                    "description": "The text content (for text type).",
+                "provider_id": {
                     "type": "string"
                 },
-                "type": {
-                    "description": "The type of input item.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.InputType"
-                        }
-                    ]
+                "provider_name": {
+                    "type": "string"
                 },
-                "web_search": {
-                    "description": "The web search content (for web_search type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.WebSearchInput"
-                        }
-                    ]
+                "provider_vendor": {
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ListInputItemsResponse": {
+        "modelresponses.ModelWithProviderResponseList": {
             "type": "object",
             "properties": {
                 "data": {
-                    "description": "The list of input items.",
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.InputItem"
+                        "$ref": "#/definitions/modelresponses.ModelResponseWithProvider"
                     }
                 },
-                "first_id": {
-                    "description": "The first ID in the list.",
-                    "type": "string"
-                },
-                "has_more": {
-                    "description": "Whether there are more items available.",
-                    "type": "boolean"
-                },
-                "last_id": {
-                    "description": "The last ID in the list.",
-                    "type": "string"
-                },
                 "object": {
-                    "description": "The object type, which is always \"list\".",
-                    "type": "string"
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.Reasoning": {
-            "type": "object",
-            "properties": {
-                "effort": {
-                    "description": "The effort level for reasoning.",
-                    "type": "string"
-                },
-                "summary": {
-                    "description": "The summary of reasoning.",
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response": {
+        "modelresponses.ProviderModelResponse": {
             "type": "object",
             "properties": {
-                "background": {
-                    "description": "Whether the response was run in the background.",
+                "active": {
                     "type": "boolean"
                 },
-                "cancelled_at": {
-                    "description": "The Unix timestamp (in seconds) when the response was cancelled.",
-                    "type": "integer"
-                },
-                "completed_at": {
-                    "description": "The Unix timestamp (in seconds) when the response was completed.",
-                    "type": "integer"
-                },
-                "conversation": {
-                    "description": "The conversation that this response belongs to.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ConversationInfo"
-                        }
-                    ]
-                },
-                "created": {
-                    "description": "The Unix timestamp (in seconds) when the response was created.",
+                "created_at": {
                     "type": "integer"
                 },
-                "error": {
-                    "description": "The error that occurred during processing, if any.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseError"
-                        }
-                    ]
-                },
-                "failed_at": {
-                    "description": "The Unix timestamp (in seconds) when the response was failed.",
-                    "type": "integer"
+                "display_name": {
+                    "type": "string"
                 },
-                "frequency_penalty": {
-                    "description": "The frequency penalty that was used for this response.",
-                    "type": "number"
+                "family": {
+                    "type": "string"
                 },
                 "id": {
-                    "description": "The unique identifier for the response.",
                     "type": "string"
                 },
-                "incomplete_details": {
-                    "description": "OpenAI API response fields"
-                },
-                "input": {
-                    "description": "The input that was provided to the model. Can be a string or array of strings."
-                },
-                "instructions": {},
-                "logit_bias": {
-                    "description": "The logit bias that was used for this response.",
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "number",
-                        "format": "float64"
-                    }
-                },
-                "max_output_tokens": {
-                    "type": "integer"
-                },
-                "max_tokens": {
-                    "description": "The maximum number of tokens that were generated.",
-                    "type": "integer"
-                },
-                "metadata": {
-                    "description": "The metadata that was provided for this response.",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "model": {
-                    "description": "The ID of the model used for this response.",
+                "model_catalog_id": {
                     "type": "string"
                 },
-                "object": {
-                    "description": "The object type, which is always \"response\".",
+                "model_public_id": {
                     "type": "string"
                 },
-                "output": {
-                    "description": "The output generated by the model."
-                },
-                "parallel_tool_calls": {
-                    "type": "boolean"
+                "pricing": {
+                    "$ref": "#/definitions/model.Pricing"
                 },
-                "presence_penalty": {
-                    "description": "The presence penalty that was used for this response.",
-                    "type": "number"
-                },
-                "previous_response_id": {
+                "provider_id": {
                     "type": "string"
                 },
-                "reasoning": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Reasoning"
-                },
-                "repetition_penalty": {
-                    "description": "The repetition penalty that was used for this response.",
-                    "type": "number"
-                },
-                "response_format": {
-                    "description": "The response format that was used for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ResponseFormat"
-                        }
-                    ]
-                },
-                "seed": {
-                    "description": "The seed that was used for this response.",
-                    "type": "integer"
-                },
-                "status": {
-                    "description": "The status of the response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseStatus"
-                        }
-                    ]
+                "provider_original_model_id": {
+                    "type": "string"
                 },
-                "stop": {
-                    "description": "The stop sequences that were used for this response.",
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
+                "provider_vendor": {
+                    "type": "string"
                 },
-                "store": {
+                "supports_audio": {
                     "type": "boolean"
                 },
-                "stream": {
-                    "description": "Whether the response was streamed.",
+                "supports_embeddings": {
                     "type": "boolean"
                 },
-                "system_prompt": {
-                    "description": "The system prompt that was used for this response.",
-                    "type": "string"
-                },
-                "temperature": {
-                    "description": "The temperature that was used for this response.",
-                    "type": "number"
-                },
-                "text": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.TextFormat"
+                "supports_images": {
+                    "type": "boolean"
                 },
-                "timeout": {
-                    "description": "The timeout in seconds that was used for this response.",
-                    "type": "integer"
+                "supports_reasoning": {
+                    "type": "boolean"
                 },
-                "tool_choice": {
-                    "description": "The tool choice that was used for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ToolChoice"
-                        }
-                    ]
+                "supports_video": {
+                    "type": "boolean"
                 },
-                "tools": {
-                    "description": "The tools that were used for this response.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.Tool"
-                    }
+                "token_limits": {
+                    "$ref": "#/definitions/model.TokenLimits"
                 },
-                "top_k": {
-                    "description": "The top_k that was used for this response.",
+                "updated_at": {
                     "type": "integer"
-                },
-                "top_p": {
-                    "description": "The top_p that was used for this response.",
-                    "type": "number"
-                },
-                "truncation": {
-                    "type": "string"
-                },
-                "usage": {
-                    "description": "The usage statistics for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.DetailedUsage"
-                        }
-                    ]
-                },
-                "user": {
-                    "description": "The user that was provided for this response.",
-                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseError": {
+        "modelresponses.ProviderResponse": {
             "type": "object",
             "properties": {
-                "code": {
-                    "description": "The error code.",
+                "active": {
+                    "type": "boolean"
+                },
+                "base_url": {
+                    "type": "string"
+                },
+                "id": {
                     "type": "string"
                 },
-                "details": {
-                    "description": "The error details.",
+                "metadata": {
                     "type": "object",
-                    "additionalProperties": {}
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
-                "message": {
-                    "description": "The error message.",
+                "name": {
+                    "type": "string"
+                },
+                "vendor": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseStatus": {
-            "type": "string",
-            "enum": [
-                "pending",
-                "running",
-                "completed",
-                "cancelled",
-                "failed"
-            ],
-            "x-enum-varnames": [
-                "ResponseStatusPending",
-                "ResponseStatusRunning",
-                "ResponseStatusCompleted",
-                "ResponseStatusCancelled",
-                "ResponseStatusFailed"
-            ]
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.TextFormat": {
-            "type": "object",
-            "properties": {
-                "format": {
-                    "description": "The format type.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.FormatType"
-                        }
-                    ]
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.TokenDetails": {
+        "modelresponses.ProviderResponseList": {
             "type": "object",
             "properties": {
-                "cached_tokens": {
-                    "description": "The number of cached tokens.",
-                    "type": "integer"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/modelresponses.ProviderResponse"
+                    }
                 },
-                "reasoning_tokens": {
-                    "description": "The number of reasoning tokens.",
-                    "type": "integer"
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.DeleteResponse": {
+        "modelresponses.ProviderWithModelCountResponse": {
             "type": "object",
             "properties": {
-                "deleted": {
+                "active": {
                     "type": "boolean"
                 },
-                "id": {
+                "base_url": {
                     "type": "string"
                 },
-                "object": {
+                "id": {
                     "type": "string"
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
                     }
                 },
-                "first_id": {
-                    "type": "string"
+                "model_active_count": {
+                    "type": "integer"
                 },
-                "has_more": {
-                    "type": "boolean"
+                "model_count": {
+                    "type": "integer"
                 },
-                "last_id": {
+                "name": {
                     "type": "string"
                 },
-                "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
-                },
-                "total": {
-                    "type": "integer"
+                "vendor": {
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ExtendedConversationResponse": {
+        "modelresponses.ProviderWithModelsResponse": {
             "type": "object",
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse"
-                    }
+                "active": {
+                    "type": "boolean"
                 },
-                "first_id": {
+                "base_url": {
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
+                "id": {
                     "type": "string"
                 },
-                "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
-                "total": {
-                    "type": "integer"
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_organization_invites_InviteResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
+                "models": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse"
+                        "$ref": "#/definitions/modelresponses.ModelResponse"
                     }
                 },
-                "first_id": {
+                "name": {
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
+                "vendor": {
                     "type": "string"
-                },
-                "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
-                },
-                "total": {
-                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList": {
-            "type": "string",
-            "enum": [
-                "list"
-            ],
-            "x-enum-varnames": [
-                "ObjectTypeListList"
-            ]
-        },
         "openai.ChatCompletionChoice": {
             "type": "object",
             "properties": {
@@ -4171,223 +4087,38 @@ const docTemplate = `{
                     "type": "string"
                 },
                 "function_call": {
-                    "$ref": "#/definitions/openai.FunctionCall"
-                },
-                "multiContent": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.ChatMessagePart"
-                    }
-                },
-                "name": {
-                    "description": "This property isn't in the official documentation, but it's in\nthe documentation for the official library for python:\n- https://github.com/openai/openai-python/blob/main/chatml.md\n- https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb",
-                    "type": "string"
-                },
-                "reasoning_content": {
-                    "description": "This property is used for the \"reasoning\" feature supported by deepseek-reasoner\nwhich is not in the official documentation.\nthe doc from deepseek:\n- https://api-docs.deepseek.com/api/create-chat-completion#responses",
-                    "type": "string"
-                },
-                "refusal": {
-                    "type": "string"
-                },
-                "role": {
-                    "type": "string"
-                },
-                "tool_call_id": {
-                    "description": "For Role=tool prompts this should be set to the ID given in the assistant's prior request to call a tool.",
-                    "type": "string"
-                },
-                "tool_calls": {
-                    "description": "For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.ToolCall"
-                    }
-                }
-            }
-        },
-        "openai.ChatCompletionRequest": {
-            "type": "object",
-            "properties": {
-                "chat_template_kwargs": {
-                    "description": "ChatTemplateKwargs provides a way to add non-standard parameters to the request body.\nAdditional kwargs to pass to the template renderer. Will be accessible by the chat template.\nSuch as think mode for qwen3. \"chat_template_kwargs\": {\"enable_thinking\": false}\nhttps://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "frequency_penalty": {
-                    "type": "number"
-                },
-                "function_call": {
-                    "description": "Deprecated: use ToolChoice instead."
-                },
-                "functions": {
-                    "description": "Deprecated: use Tools instead.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.FunctionDefinition"
-                    }
-                },
-                "guided_choice": {
-                    "description": "GuidedChoice is a vLLM-specific extension that restricts the model's output\nto one of the predefined string choices provided in this field. This feature\nis used to constrain the model's responses to a controlled set of options,\nensuring predictable and consistent outputs in scenarios where specific\nchoices are required.",
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "logit_bias": {
-                    "description": "LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.\nincorrect: ` + "`" + `\"logit_bias\":{\"You\": 6}` + "`" + `, correct: ` + "`" + `\"logit_bias\":{\"1639\": 6}` + "`" + `\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias",
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "integer"
-                    }
-                },
-                "logprobs": {
-                    "description": "LogProbs indicates whether to return log probabilities of the output tokens or not.\nIf true, returns the log probabilities of each output token returned in the content of message.\nThis option is currently not available on the gpt-4-vision-preview model.",
-                    "type": "boolean"
-                },
-                "max_completion_tokens": {
-                    "description": "MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,\nincluding visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning",
-                    "type": "integer"
-                },
-                "max_tokens": {
-                    "description": "MaxTokens The maximum number of tokens that can be generated in the chat completion.\nThis value can be used to control costs for text generated via API.\nDeprecated: use MaxCompletionTokens. Not compatible with o1-series models.\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens",
-                    "type": "integer"
-                },
-                "messages": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.ChatCompletionMessage"
-                    }
-                },
-                "metadata": {
-                    "description": "Metadata to store with the completion.",
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
-                },
-                "model": {
-                    "type": "string"
-                },
-                "n": {
-                    "type": "integer"
-                },
-                "parallel_tool_calls": {
-                    "description": "Disable the default behavior of parallel tool calls by setting it: false."
-                },
-                "prediction": {
-                    "description": "Configuration for a predicted output.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.Prediction"
-                        }
-                    ]
-                },
-                "presence_penalty": {
-                    "type": "number"
-                },
-                "reasoning_effort": {
-                    "description": "Controls effort on reasoning for reasoning models. It can be set to \"low\", \"medium\", or \"high\".",
-                    "type": "string"
-                },
-                "response_format": {
-                    "$ref": "#/definitions/openai.ChatCompletionResponseFormat"
-                },
-                "safety_identifier": {
-                    "description": "A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.\nThe IDs should be a string that uniquely identifies each user.\nWe recommend hashing their username or email address, in order to avoid sending us any identifying information.\nhttps://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier",
-                    "type": "string"
-                },
-                "seed": {
-                    "type": "integer"
-                },
-                "service_tier": {
-                    "description": "Specifies the latency tier to use for processing the request.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.ServiceTier"
-                        }
-                    ]
-                },
-                "stop": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "store": {
-                    "description": "Store can be set to true to store the output of this completion request for use in distillations and evals.\nhttps://platform.openai.com/docs/api-reference/chat/create#chat-create-store",
-                    "type": "boolean"
-                },
-                "stream": {
-                    "type": "boolean"
-                },
-                "stream_options": {
-                    "description": "Options for streaming response. Only set this when you set stream: true.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.StreamOptions"
-                        }
-                    ]
-                },
-                "temperature": {
-                    "type": "number"
-                },
-                "tool_choice": {
-                    "description": "This can be either a string or an ToolChoice object."
-                },
-                "tools": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.Tool"
-                    }
-                },
-                "top_logprobs": {
-                    "description": "TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each\ntoken position, each with an associated log probability.\nlogprobs must be set to true if this parameter is used.",
-                    "type": "integer"
-                },
-                "top_p": {
-                    "type": "number"
-                },
-                "user": {
-                    "type": "string"
-                }
-            }
-        },
-        "openai.ChatCompletionResponse": {
-            "type": "object",
-            "properties": {
-                "choices": {
+                    "$ref": "#/definitions/openai.FunctionCall"
+                },
+                "multiContent": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.ChatCompletionChoice"
+                        "$ref": "#/definitions/openai.ChatMessagePart"
                     }
                 },
-                "created": {
-                    "type": "integer"
+                "name": {
+                    "description": "This property isn't in the official documentation, but it's in\nthe documentation for the official library for python:\n- https://github.com/openai/openai-python/blob/main/chatml.md\n- https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb",
+                    "type": "string"
                 },
-                "id": {
+                "reasoning_content": {
+                    "description": "This property is used for the \"reasoning\" feature supported by deepseek-reasoner\nwhich is not in the official documentation.\nthe doc from deepseek:\n- https://api-docs.deepseek.com/api/create-chat-completion#responses",
                     "type": "string"
                 },
-                "model": {
+                "refusal": {
                     "type": "string"
                 },
-                "object": {
+                "role": {
                     "type": "string"
                 },
-                "prompt_filter_results": {
+                "tool_call_id": {
+                    "description": "For Role=tool prompts this should be set to the ID given in the assistant's prior request to call a tool.",
+                    "type": "string"
+                },
+                "tool_calls": {
+                    "description": "For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.",
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.PromptFilterResult"
+                        "$ref": "#/definitions/openai.ToolCall"
                     }
-                },
-                "service_tier": {
-                    "$ref": "#/definitions/openai.ServiceTier"
-                },
-                "system_fingerprint": {
-                    "type": "string"
-                },
-                "usage": {
-                    "$ref": "#/definitions/openai.Usage"
                 }
             }
         },
@@ -4801,6 +4532,296 @@ const docTemplate = `{
                     "type": "string"
                 }
             }
+        },
+        "projectreq.CreateProjectRequest": {
+            "type": "object",
+            "required": [
+                "name"
+            ],
+            "properties": {
+                "instruction": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
+        },
+        "projectreq.UpdateProjectRequest": {
+            "type": "object",
+            "properties": {
+                "instruction": {
+                    "type": "string"
+                },
+                "is_archived": {
+                    "type": "boolean"
+                },
+                "is_favorite": {
+                    "type": "boolean"
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
+        },
+        "projectres.ProjectDeletedResponse": {
+            "type": "object",
+            "properties": {
+                "deleted": {
+                    "type": "boolean"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                }
+            }
+        },
+        "projectres.ProjectListResponse": {
+            "type": "object",
+            "properties": {
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/projectres.ProjectResponse"
+                    }
+                },
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
+                    "type": "string"
+                },
+                "next_cursor": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "total": {
+                    "type": "integer"
+                }
+            }
+        },
+        "projectres.ProjectResponse": {
+            "type": "object",
+            "properties": {
+                "archived_at": {
+                    "type": "integer"
+                },
+                "created_at": {
+                    "type": "integer"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "instruction": {
+                    "type": "string"
+                },
+                "is_archived": {
+                    "type": "boolean"
+                },
+                "is_favorite": {
+                    "type": "boolean"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "updated_at": {
+                    "type": "integer"
+                }
+            }
+        },
+        "requestmodels.AddProviderRequest": {
+            "type": "object",
+            "required": [
+                "base_url",
+                "name",
+                "vendor"
+            ],
+            "properties": {
+                "active": {
+                    "type": "boolean"
+                },
+                "api_key": {
+                    "type": "string"
+                },
+                "base_url": {
+                    "type": "string"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "name": {
+                    "type": "string"
+                },
+                "vendor": {
+                    "type": "string"
+                }
+            }
+        },
+        "requestmodels.BulkEnableModelsRequest": {
+            "type": "object",
+            "required": [
+                "enable"
+            ],
+            "properties": {
+                "enable": {
+                    "description": "Required: true to enable, false to disable",
+                    "type": "boolean"
+                },
+                "except_models": {
+                    "description": "List of model keys to exclude",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "provider_id": {
+                    "description": "Optional: filter by provider",
+                    "type": "string",
+                    "minLength": 1
+                }
+            }
+        },
+        "requestmodels.BulkToggleCatalogsRequest": {
+            "type": "object",
+            "required": [
+                "enable"
+            ],
+            "properties": {
+                "catalog_ids": {
+                    "description": "Optional: specific catalog public IDs. If empty, applies to all catalogs",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "enable": {
+                    "description": "Required: true to enable, false to disable",
+                    "type": "boolean"
+                },
+                "except_models": {
+                    "description": "List of model keys to exclude from the operation",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
+        "requestmodels.UpdateModelCatalogRequest": {
+            "type": "object",
+            "properties": {
+                "architecture": {
+                    "$ref": "#/definitions/model.Architecture"
+                },
+                "extras": {
+                    "type": "object",
+                    "additionalProperties": {}
+                },
+                "is_moderated": {
+                    "type": "boolean"
+                },
+                "notes": {
+                    "type": "string"
+                },
+                "supported_parameters": {
+                    "$ref": "#/definitions/model.SupportedParameters"
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
+        "requestmodels.UpdateProviderModelRequest": {
+            "type": "object",
+            "properties": {
+                "active": {
+                    "type": "boolean"
+                },
+                "display_name": {
+                    "type": "string"
+                },
+                "family": {
+                    "type": "string"
+                },
+                "pricing": {
+                    "$ref": "#/definitions/model.Pricing"
+                },
+                "supports_audio": {
+                    "type": "boolean"
+                },
+                "supports_embeddings": {
+                    "type": "boolean"
+                },
+                "supports_images": {
+                    "type": "boolean"
+                },
+                "supports_reasoning": {
+                    "type": "boolean"
+                },
+                "supports_video": {
+                    "type": "boolean"
+                },
+                "token_limits": {
+                    "$ref": "#/definitions/model.TokenLimits"
+                }
+            }
+        },
+        "requestmodels.UpdateProviderRequest": {
+            "type": "object",
+            "properties": {
+                "active": {
+                    "type": "boolean"
+                },
+                "api_key": {
+                    "type": "string"
+                },
+                "base_url": {
+                    "type": "string"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
+        },
+        "responses.ErrorResponse": {
+            "type": "object",
+            "properties": {
+                "code": {
+                    "description": "UUID from PlatformError",
+                    "type": "string"
+                },
+                "error": {
+                    "type": "string"
+                },
+                "message": {
+                    "type": "string"
+                },
+                "request_id": {
+                    "type": "string"
+                }
+            }
         }
     },
     "securityDefinitions": {
@@ -4815,16 +4836,14 @@ const docTemplate = `{
 
 // SwaggerInfo holds exported Swagger Info so clients can modify it
 var SwaggerInfo = &swag.Spec{
-	Version:          "1.0",
+	Version:          "2.0",
 	Host:             "",
 	BasePath:         "/",
 	Schemes:          []string{},
-	Title:            "Jan Server",
-	Description:      "This is the API gateway for Jan Server.",
+	Title:            "Jan Server LLM API",
+	Description:      "OpenAI-compatible LLM API platform with enterprise authentication, conversation management, and streaming support.",
 	InfoInstanceName: "swagger",
 	SwaggerTemplate:  docTemplate,
-	LeftDelim:        "{{",
-	RightDelim:       "}}",
 }
 
 func init() {
diff --git a/services/llm-api/docs/swagger/swagger-combined.json b/services/llm-api/docs/swagger/swagger-combined.json
new file mode 100644
index 00000000..3553df18
--- /dev/null
+++ b/services/llm-api/docs/swagger/swagger-combined.json
@@ -0,0 +1,4894 @@
+{
+  "basePath": "/",
+  "definitions": {
+    "MCP_responses.ErrorResponse": {
+      "properties": {
+        "code": {
+          "description": "UUID from PlatformError",
+          "type": "string"
+        },
+        "error": {
+          "type": "string"
+        },
+        "request_id": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "chatrequests.ChatCompletionRequest": {
+      "properties": {
+        "chat_template_kwargs": {
+          "additionalProperties": {},
+          "description": "ChatTemplateKwargs provides a way to add non-standard parameters to the request body.\nAdditional kwargs to pass to the template renderer. Will be accessible by the chat template.\nSuch as think mode for qwen3. \"chat_template_kwargs\": {\"enable_thinking\": false}\nhttps://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes",
+          "type": "object"
+        },
+        "conversation": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/chatrequests.ConversationReference"
+            }
+          ],
+          "description": "Conversation can be either a string (conversation ID) or a conversation object\nItems from this conversation are prepended to Messages for this response request.\nInput items and output items from this response are automatically added to this conversation after completion."
+        },
+        "frequency_penalty": {
+          "type": "number"
+        },
+        "function_call": {
+          "description": "Deprecated: use ToolChoice instead."
+        },
+        "functions": {
+          "description": "Deprecated: use Tools instead.",
+          "items": {
+            "$ref": "#/definitions/openai.FunctionDefinition"
+          },
+          "type": "array"
+        },
+        "guided_choice": {
+          "description": "GuidedChoice is a vLLM-specific extension that restricts the model's output\nto one of the predefined string choices provided in this field. This feature\nis used to constrain the model's responses to a controlled set of options,\nensuring predictable and consistent outputs in scenarios where specific\nchoices are required.",
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "logit_bias": {
+          "additionalProperties": {
+            "type": "integer"
+          },
+          "description": "LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.\nincorrect: `\"logit_bias\":{\"You\": 6}`, correct: `\"logit_bias\":{\"1639\": 6}`\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias",
+          "type": "object"
+        },
+        "logprobs": {
+          "description": "LogProbs indicates whether to return log probabilities of the output tokens or not.\nIf true, returns the log probabilities of each output token returned in the content of message.\nThis option is currently not available on the gpt-4-vision-preview model.",
+          "type": "boolean"
+        },
+        "max_completion_tokens": {
+          "description": "MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,\nincluding visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning",
+          "type": "integer"
+        },
+        "max_tokens": {
+          "description": "MaxTokens The maximum number of tokens that can be generated in the chat completion.\nThis value can be used to control costs for text generated via API.\nDeprecated: use MaxCompletionTokens. Not compatible with o1-series models.\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens",
+          "type": "integer"
+        },
+        "messages": {
+          "items": {
+            "$ref": "#/definitions/openai.ChatCompletionMessage"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "additionalProperties": {
+            "type": "string"
+          },
+          "description": "Metadata to store with the completion.",
+          "type": "object"
+        },
+        "model": {
+          "type": "string"
+        },
+        "n": {
+          "type": "integer"
+        },
+        "parallel_tool_calls": {
+          "description": "Disable the default behavior of parallel tool calls by setting it: false."
+        },
+        "prediction": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/openai.Prediction"
+            }
+          ],
+          "description": "Configuration for a predicted output."
+        },
+        "presence_penalty": {
+          "type": "number"
+        },
+        "reasoning_effort": {
+          "description": "Controls effort on reasoning for reasoning models. It can be set to \"low\", \"medium\", or \"high\".",
+          "type": "string"
+        },
+        "response_format": {
+          "$ref": "#/definitions/openai.ChatCompletionResponseFormat"
+        },
+        "safety_identifier": {
+          "description": "A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.\nThe IDs should be a string that uniquely identifies each user.\nWe recommend hashing their username or email address, in order to avoid sending us any identifying information.\nhttps://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier",
+          "type": "string"
+        },
+        "seed": {
+          "type": "integer"
+        },
+        "service_tier": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/openai.ServiceTier"
+            }
+          ],
+          "description": "Specifies the latency tier to use for processing the request."
+        },
+        "stop": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "store": {
+          "description": "Store controls whether the latest input and generated response should be persisted",
+          "type": "boolean"
+        },
+        "store_reasoning": {
+          "description": "StoreReasoning controls whether reasoning content (if present) should also be persisted",
+          "type": "boolean"
+        },
+        "stream": {
+          "type": "boolean"
+        },
+        "stream_options": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/openai.StreamOptions"
+            }
+          ],
+          "description": "Options for streaming response. Only set this when you set stream: true."
+        },
+        "temperature": {
+          "type": "number"
+        },
+        "tool_choice": {
+          "description": "This can be either a string or an ToolChoice object."
+        },
+        "tools": {
+          "items": {
+            "$ref": "#/definitions/openai.Tool"
+          },
+          "type": "array"
+        },
+        "top_logprobs": {
+          "description": "TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each\ntoken position, each with an associated log probability.\nlogprobs must be set to true if this parameter is used.",
+          "type": "integer"
+        },
+        "top_p": {
+          "type": "number"
+        },
+        "user": {
+          "type": "string"
+        },
+        "verbosity": {
+          "description": "Verbosity determines how many output tokens are generated. Lowering the number of\ntokens reduces overall latency. It can be set to \"low\", \"medium\", or \"high\".\nNote: This field is only confirmed to work with gpt-5, gpt-5-mini and gpt-5-nano.\nAlso, it is not in the API reference of chat completion at the time of writing,\nthough it is supported by the API.",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "chatrequests.ConversationReference": {
+      "type": "object"
+    },
+    "chatresponses.ChatCompletionResponse": {
+      "properties": {
+        "choices": {
+          "items": {
+            "$ref": "#/definitions/openai.ChatCompletionChoice"
+          },
+          "type": "array"
+        },
+        "conversation": {
+          "$ref": "#/definitions/chatresponses.ConversationContext"
+        },
+        "created": {
+          "type": "integer"
+        },
+        "id": {
+          "type": "string"
+        },
+        "model": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        },
+        "prompt_filter_results": {
+          "items": {
+            "$ref": "#/definitions/openai.PromptFilterResult"
+          },
+          "type": "array"
+        },
+        "service_tier": {
+          "$ref": "#/definitions/openai.ServiceTier"
+        },
+        "system_fingerprint": {
+          "type": "string"
+        },
+        "usage": {
+          "$ref": "#/definitions/openai.Usage"
+        }
+      },
+      "type": "object"
+    },
+    "chatresponses.ConversationContext": {
+      "properties": {
+        "id": {
+          "description": "The unique ID of the conversation",
+          "type": "string"
+        },
+        "title": {
+          "description": "The title of the conversation (optional)",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.Annotation": {
+      "properties": {
+        "bounding_box": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.BBox"
+            }
+          ],
+          "description": "Bounding box for image/PDF annotations"
+        },
+        "confidence": {
+          "description": "Citation confidence score (0.0-1.0)",
+          "type": "number"
+        },
+        "container_id": {
+          "description": "Document container reference",
+          "type": "string"
+        },
+        "end_index": {
+          "description": "End position in text",
+          "type": "integer"
+        },
+        "file_id": {
+          "description": "For file citations",
+          "type": "string"
+        },
+        "filename": {
+          "description": "File name for citations",
+          "type": "string"
+        },
+        "index": {
+          "description": "Citation index",
+          "type": "integer"
+        },
+        "page_number": {
+          "description": "Page reference for documents",
+          "type": "integer"
+        },
+        "quote": {
+          "description": "Actual quoted text from source",
+          "type": "string"
+        },
+        "start_index": {
+          "description": "Start position in text",
+          "type": "integer"
+        },
+        "text": {
+          "description": "Display text",
+          "type": "string"
+        },
+        "type": {
+          "description": "\"file_citation\", \"url_citation\", \"file_path\", etc.",
+          "type": "string"
+        },
+        "url": {
+          "description": "For URL citations",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.AudioContent": {
+      "properties": {
+        "data": {
+          "description": "Base64 encoded audio data",
+          "type": "string"
+        },
+        "format": {
+          "description": "Audio format: mp3, wav, pcm16, etc.",
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "transcript": {
+          "description": "Text transcription of audio",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.BBox": {
+      "properties": {
+        "height": {
+          "type": "number"
+        },
+        "width": {
+          "type": "number"
+        },
+        "x": {
+          "type": "number"
+        },
+        "y": {
+          "type": "number"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.CodeContent": {
+      "properties": {
+        "code": {
+          "description": "Code content",
+          "type": "string"
+        },
+        "error": {
+          "description": "Execution error",
+          "type": "string"
+        },
+        "execution_id": {
+          "description": "Execution session ID",
+          "type": "string"
+        },
+        "exit_code": {
+          "description": "Process exit code",
+          "type": "integer"
+        },
+        "language": {
+          "description": "Programming language",
+          "type": "string"
+        },
+        "metadata": {
+          "additionalProperties": {},
+          "description": "Additional metadata",
+          "type": "object"
+        },
+        "output": {
+          "description": "Execution output",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.ComputerAction": {
+      "properties": {
+        "action": {
+          "description": "Action type: \"click\", \"type\", \"key\", \"scroll\", \"move\", etc.",
+          "type": "string"
+        },
+        "coordinates": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.Coordinates"
+            }
+          ],
+          "description": "Screen coordinates for mouse actions"
+        },
+        "key": {
+          "description": "Key for keyboard actions",
+          "type": "string"
+        },
+        "metadata": {
+          "additionalProperties": {},
+          "description": "Additional action metadata",
+          "type": "object"
+        },
+        "scroll_delta": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.ScrollDelta"
+            }
+          ],
+          "description": "Scroll amount"
+        },
+        "text": {
+          "description": "Text for typing actions",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.Content": {
+      "properties": {
+        "audio": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.AudioContent"
+            }
+          ],
+          "description": "Audio content for speech"
+        },
+        "code": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.CodeContent"
+            }
+          ],
+          "description": "Code block with execution metadata"
+        },
+        "computer_action": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.ComputerAction"
+            }
+          ],
+          "description": "Computer interaction details"
+        },
+        "computer_screenshot": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.ScreenshotContent"
+            }
+          ],
+          "description": "Screenshot from computer use"
+        },
+        "file": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.FileContent"
+            }
+          ],
+          "description": "File content"
+        },
+        "finish_reason": {
+          "description": "Finish reason",
+          "type": "string"
+        },
+        "function_call": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.FunctionCall"
+            }
+          ],
+          "description": "Function call content (deprecated, use tool_calls)"
+        },
+        "function_call_output": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.FunctionCallOut"
+            }
+          ],
+          "description": "Function call output"
+        },
+        "image": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.ImageContent"
+            }
+          ],
+          "description": "Image content"
+        },
+        "input_audio": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.InputAudio"
+            }
+          ],
+          "description": "User audio input"
+        },
+        "input_text": {
+          "description": "User input text (simple)",
+          "type": "string"
+        },
+        "output_text": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.OutputText"
+            }
+          ],
+          "description": "AI output text (with annotations)"
+        },
+        "reasoning_content": {
+          "description": "AI reasoning content",
+          "type": "string"
+        },
+        "refusal": {
+          "description": "Model refusal message",
+          "type": "string"
+        },
+        "summary_text": {
+          "description": "Summary content",
+          "type": "string"
+        },
+        "text": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.Text"
+            }
+          ],
+          "description": "Generic text content"
+        },
+        "thinking": {
+          "description": "Internal reasoning (o1 models)",
+          "type": "string"
+        },
+        "tool_call_id": {
+          "description": "Tool call ID (for tool responses)",
+          "type": "string"
+        },
+        "tool_calls": {
+          "description": "Tool calls (for assistant messages)",
+          "items": {
+            "$ref": "#/definitions/conversation.ToolCall"
+          },
+          "type": "array"
+        },
+        "type": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.Coordinates": {
+      "properties": {
+        "x": {
+          "type": "integer"
+        },
+        "y": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.FileContent": {
+      "properties": {
+        "file_id": {
+          "type": "string"
+        },
+        "mime_type": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "size": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.FunctionCall": {
+      "properties": {
+        "arguments": {
+          "description": "JSON-encoded arguments",
+          "type": "string"
+        },
+        "id": {
+          "description": "Call ID",
+          "type": "string"
+        },
+        "name": {
+          "description": "Function name",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.FunctionCallOut": {
+      "properties": {
+        "call_id": {
+          "description": "ID of the function call this responds to",
+          "type": "string"
+        },
+        "output": {
+          "description": "String output from the function",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.ImageContent": {
+      "properties": {
+        "detail": {
+          "description": "\"low\", \"high\", \"auto\"",
+          "type": "string"
+        },
+        "file_id": {
+          "type": "string"
+        },
+        "url": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.IncompleteDetails": {
+      "properties": {
+        "error": {
+          "description": "Error message if applicable",
+          "type": "string"
+        },
+        "reason": {
+          "description": "\"max_tokens\", \"content_filter\", \"tool_calls\", etc.",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.InputAudio": {
+      "properties": {
+        "data": {
+          "description": "Base64 encoded audio data",
+          "type": "string"
+        },
+        "format": {
+          "description": "Audio format: mp3, wav, pcm16, etc.",
+          "type": "string"
+        },
+        "transcript": {
+          "description": "Optional text transcription",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.Item": {
+      "properties": {
+        "branch": {
+          "description": "Branch identifier (MAIN, EDIT_1, etc.)",
+          "type": "string"
+        },
+        "completed_at": {
+          "type": "string"
+        },
+        "content": {
+          "items": {
+            "$ref": "#/definitions/conversation.Content"
+          },
+          "type": "array"
+        },
+        "created_at": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "incomplete_at": {
+          "type": "string"
+        },
+        "incomplete_details": {
+          "$ref": "#/definitions/conversation.IncompleteDetails"
+        },
+        "object": {
+          "description": "Always \"conversation.item\" for OpenAI compatibility",
+          "type": "string"
+        },
+        "rated_at": {
+          "description": "When rating was given",
+          "type": "string"
+        },
+        "rating": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.ItemRating"
+            }
+          ],
+          "description": "User feedback/rating"
+        },
+        "rating_comment": {
+          "description": "Optional comment with rating",
+          "type": "string"
+        },
+        "role": {
+          "$ref": "#/definitions/conversation.ItemRole"
+        },
+        "sequence_number": {
+          "description": "Order within branch",
+          "type": "integer"
+        },
+        "status": {
+          "$ref": "#/definitions/conversation.ItemStatus"
+        },
+        "type": {
+          "$ref": "#/definitions/conversation.ItemType"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.ItemRating": {
+      "enum": [
+        "like",
+        "unlike"
+      ],
+      "type": "string",
+      "x-enum-comments": {
+        "ItemRatingLike": "Positive feedback (like)",
+        "ItemRatingUnlike": "Negative feedback (unlike)"
+      },
+      "x-enum-varnames": [
+        "ItemRatingLike",
+        "ItemRatingUnlike"
+      ]
+    },
+    "conversation.ItemRole": {
+      "enum": [
+        "system",
+        "user",
+        "assistant",
+        "tool",
+        "developer",
+        "critic",
+        "discriminator",
+        "unknown"
+      ],
+      "type": "string",
+      "x-enum-comments": {
+        "ItemRoleCritic": "For critique/evaluation workflows",
+        "ItemRoleDeveloper": "System-level instructions (OpenAI replacement for system)",
+        "ItemRoleDiscriminator": "For adversarial/validation workflows",
+        "ItemRoleUnknown": "Fallback for unrecognized roles"
+      },
+      "x-enum-varnames": [
+        "ItemRoleSystem",
+        "ItemRoleUser",
+        "ItemRoleAssistant",
+        "ItemRoleTool",
+        "ItemRoleDeveloper",
+        "ItemRoleCritic",
+        "ItemRoleDiscriminator",
+        "ItemRoleUnknown"
+      ]
+    },
+    "conversation.ItemStatus": {
+      "enum": [
+        "incomplete",
+        "in_progress",
+        "completed",
+        "failed",
+        "cancelled",
+        "searching",
+        "generating",
+        "calling",
+        "streaming",
+        "rate_limited"
+      ],
+      "type": "string",
+      "x-enum-comments": {
+        "ItemStatusCalling": "Function/tool call in progress",
+        "ItemStatusCancelled": "Cancelled by user or system",
+        "ItemStatusCompleted": "Successfully finished",
+        "ItemStatusFailed": "Failed with error",
+        "ItemStatusGenerating": "Image generation in progress",
+        "ItemStatusInProgress": "Currently processing",
+        "ItemStatusIncomplete": "Not started or partially complete (OpenAI uses this instead of \"pending\")",
+        "ItemStatusRateLimited": "Rate limit hit",
+        "ItemStatusSearching": "File/web search in progress",
+        "ItemStatusStreaming": "Streaming response in progress"
+      },
+      "x-enum-varnames": [
+        "ItemStatusIncomplete",
+        "ItemStatusInProgress",
+        "ItemStatusCompleted",
+        "ItemStatusFailed",
+        "ItemStatusCancelled",
+        "ItemStatusSearching",
+        "ItemStatusGenerating",
+        "ItemStatusCalling",
+        "ItemStatusStreaming",
+        "ItemStatusRateLimited"
+      ]
+    },
+    "conversation.ItemType": {
+      "enum": [
+        "message",
+        "function_call",
+        "function_call_output",
+        "reasoning",
+        "file_search",
+        "web_search",
+        "code_interpreter",
+        "computer_use",
+        "custom_tool_call",
+        "mcp_item",
+        "image_generation"
+      ],
+      "type": "string",
+      "x-enum-comments": {
+        "ItemTypeCodeInterpreter": "Code execution",
+        "ItemTypeComputerUse": "Computer interaction",
+        "ItemTypeCustomToolCall": "Custom tool invocations",
+        "ItemTypeFileSearch": "RAG/retrieval operations",
+        "ItemTypeImageGeneration": "DALL-E image generation",
+        "ItemTypeMCPItem": "Model Context Protocol items",
+        "ItemTypeReasoning": "For o1/reasoning models",
+        "ItemTypeWebSearch": "Web browsing operations"
+      },
+      "x-enum-varnames": [
+        "ItemTypeMessage",
+        "ItemTypeFunctionCall",
+        "ItemTypeFunctionCallOut",
+        "ItemTypeReasoning",
+        "ItemTypeFileSearch",
+        "ItemTypeWebSearch",
+        "ItemTypeCodeInterpreter",
+        "ItemTypeComputerUse",
+        "ItemTypeCustomToolCall",
+        "ItemTypeMCPItem",
+        "ItemTypeImageGeneration"
+      ]
+    },
+    "conversation.LogProb": {
+      "properties": {
+        "bytes": {
+          "items": {
+            "type": "integer"
+          },
+          "type": "array"
+        },
+        "logprob": {
+          "type": "number"
+        },
+        "token": {
+          "type": "string"
+        },
+        "top_logprobs": {
+          "items": {
+            "$ref": "#/definitions/conversation.TopLogProb"
+          },
+          "type": "array"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.OutputText": {
+      "properties": {
+        "annotations": {
+          "description": "Required for OpenAI compatibility",
+          "items": {
+            "$ref": "#/definitions/conversation.Annotation"
+          },
+          "type": "array"
+        },
+        "logprobs": {
+          "description": "Token probabilities",
+          "items": {
+            "$ref": "#/definitions/conversation.LogProb"
+          },
+          "type": "array"
+        },
+        "text": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.ScreenshotContent": {
+      "properties": {
+        "description": {
+          "description": "Optional description",
+          "type": "string"
+        },
+        "height": {
+          "description": "Image height in pixels",
+          "type": "integer"
+        },
+        "image_data": {
+          "description": "Base64 encoded image data",
+          "type": "string"
+        },
+        "image_url": {
+          "description": "URL to screenshot image",
+          "type": "string"
+        },
+        "timestamp": {
+          "description": "Unix timestamp when screenshot was taken",
+          "type": "integer"
+        },
+        "width": {
+          "description": "Image width in pixels",
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.ScrollDelta": {
+      "properties": {
+        "x": {
+          "type": "integer"
+        },
+        "y": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.Text": {
+      "properties": {
+        "annotations": {
+          "items": {
+            "$ref": "#/definitions/conversation.Annotation"
+          },
+          "type": "array"
+        },
+        "text": {
+          "description": "Changed from \"value\" to match OpenAI spec",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.ToolCall": {
+      "properties": {
+        "function": {
+          "$ref": "#/definitions/conversation.FunctionCall"
+        },
+        "id": {
+          "type": "string"
+        },
+        "type": {
+          "description": "\"function\", \"file_search\", \"code_interpreter\"",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversation.TopLogProb": {
+      "properties": {
+        "bytes": {
+          "items": {
+            "type": "integer"
+          },
+          "type": "array"
+        },
+        "logprob": {
+          "type": "number"
+        },
+        "token": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversationrequests.CreateConversationRequest": {
+      "properties": {
+        "items": {
+          "items": {
+            "$ref": "#/definitions/conversation.Item"
+          },
+          "type": "array"
+        },
+        "metadata": {
+          "additionalProperties": {
+            "type": "string"
+          },
+          "type": "object"
+        },
+        "project_id": {
+          "type": "string"
+        },
+        "referrer": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversationrequests.CreateItemsRequest": {
+      "properties": {
+        "items": {
+          "items": {
+            "$ref": "#/definitions/conversation.Item"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "items"
+      ],
+      "type": "object"
+    },
+    "conversationrequests.UpdateConversationRequest": {
+      "properties": {
+        "metadata": {
+          "additionalProperties": {
+            "type": "string"
+          },
+          "type": "object"
+        },
+        "referrer": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversationresponses.ConversationDeletedResponse": {
+      "properties": {
+        "deleted": {
+          "type": "boolean"
+        },
+        "id": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversationresponses.ConversationItemCreatedResponse": {
+      "properties": {
+        "data": {
+          "items": {
+            "$ref": "#/definitions/conversation.Item"
+          },
+          "type": "array"
+        },
+        "first_id": {
+          "type": "string"
+        },
+        "has_more": {
+          "type": "boolean"
+        },
+        "last_id": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversationresponses.ConversationListResponse": {
+      "properties": {
+        "data": {
+          "items": {
+            "$ref": "#/definitions/conversationresponses.ConversationResponse"
+          },
+          "type": "array"
+        },
+        "first_id": {
+          "type": "string"
+        },
+        "has_more": {
+          "type": "boolean"
+        },
+        "last_id": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        },
+        "total": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "conversationresponses.ConversationResponse": {
+      "properties": {
+        "created_at": {
+          "type": "integer"
+        },
+        "id": {
+          "type": "string"
+        },
+        "metadata": {
+          "additionalProperties": {
+            "type": "string"
+          },
+          "type": "object"
+        },
+        "object": {
+          "type": "string"
+        },
+        "project_id": {
+          "type": "string"
+        },
+        "referrer": {
+          "type": "string"
+        },
+        "title": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversationresponses.ItemListResponse": {
+      "properties": {
+        "data": {
+          "items": {
+            "$ref": "#/definitions/conversation.Item"
+          },
+          "type": "array"
+        },
+        "first_id": {
+          "type": "string"
+        },
+        "has_more": {
+          "type": "boolean"
+        },
+        "last_id": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "conversationresponses.ItemResponse": {
+      "properties": {
+        "branch": {
+          "description": "Branch identifier (MAIN, EDIT_1, etc.)",
+          "type": "string"
+        },
+        "completed_at": {
+          "type": "string"
+        },
+        "content": {
+          "items": {
+            "$ref": "#/definitions/conversation.Content"
+          },
+          "type": "array"
+        },
+        "created_at": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "incomplete_at": {
+          "type": "string"
+        },
+        "incomplete_details": {
+          "$ref": "#/definitions/conversation.IncompleteDetails"
+        },
+        "object": {
+          "description": "Always \"conversation.item\" for OpenAI compatibility",
+          "type": "string"
+        },
+        "rated_at": {
+          "description": "When rating was given",
+          "type": "string"
+        },
+        "rating": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/conversation.ItemRating"
+            }
+          ],
+          "description": "User feedback/rating"
+        },
+        "rating_comment": {
+          "description": "Optional comment with rating",
+          "type": "string"
+        },
+        "role": {
+          "$ref": "#/definitions/conversation.ItemRole"
+        },
+        "sequence_number": {
+          "description": "Order within branch",
+          "type": "integer"
+        },
+        "status": {
+          "$ref": "#/definitions/conversation.ItemStatus"
+        },
+        "type": {
+          "$ref": "#/definitions/conversation.ItemType"
+        }
+      },
+      "type": "object"
+    },
+    "model.Architecture": {
+      "properties": {
+        "input_modalities": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "instruct_type": {
+          "description": "nullable",
+          "type": "string"
+        },
+        "modality": {
+          "description": "\"text+image-\u003etext\"",
+          "type": "string"
+        },
+        "output_modalities": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "tokenizer": {
+          "description": "\"GPT\" / \"SentencePiece\" / etc.",
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "model.ModelCatalogStatus": {
+      "enum": [
+        "init",
+        "filled",
+        "updated"
+      ],
+      "type": "string",
+      "x-enum-comments": {
+        "ModelCatalogStatusFilled": "may update from Provider like OpenRouter",
+        "ModelCatalogStatusInit": "default status when creating entry",
+        "ModelCatalogStatusUpdated": "manually updated by admin (cannot be auto-updated anymore"
+      },
+      "x-enum-varnames": [
+        "ModelCatalogStatusInit",
+        "ModelCatalogStatusFilled",
+        "ModelCatalogStatusUpdated"
+      ]
+    },
+    "model.PriceLine": {
+      "properties": {
+        "amount_micro_usd": {
+          "description": "e.g., 15000 -\u003e $0.0150",
+          "type": "integer"
+        },
+        "currency": {
+          "description": "\"USD\" (fixed if you only bill in USD)",
+          "type": "string"
+        },
+        "unit": {
+          "$ref": "#/definitions/model.PriceUnit"
+        }
+      },
+      "type": "object"
+    },
+    "model.PriceUnit": {
+      "enum": [
+        "per_1k_prompt_tokens",
+        "per_1k_completion_tokens",
+        "per_request",
+        "per_image",
+        "per_web_search",
+        "per_internal_reasoning"
+      ],
+      "type": "string",
+      "x-enum-varnames": [
+        "Per1KPromptTokens",
+        "Per1KCompletionTokens",
+        "PerRequest",
+        "PerImage",
+        "PerWebSearch",
+        "PerInternalReasoning"
+      ]
+    },
+    "model.Pricing": {
+      "properties": {
+        "lines": {
+          "description": "flexible: add/remove units without schema churn",
+          "items": {
+            "$ref": "#/definitions/model.PriceLine"
+          },
+          "type": "array"
+        }
+      },
+      "type": "object"
+    },
+    "model.SupportedParameters": {
+      "properties": {
+        "default": {
+          "additionalProperties": {
+            "type": "number"
+          },
+          "description": "temperature/top_p/frequency_penalty, null allowed",
+          "type": "object"
+        },
+        "names": {
+          "description": "e.g., [\"include_reasoning\",\"max_tokens\",...]",
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "type": "object"
+    },
+    "model.TokenLimits": {
+      "properties": {
+        "context_length": {
+          "description": "e.g., 400000",
+          "type": "integer"
+        },
+        "max_completion_tokens": {
+          "description": "e.g., 128000",
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.BulkOperationResponse": {
+      "properties": {
+        "failed_count": {
+          "type": "integer"
+        },
+        "failed_models": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "skipped_count": {
+          "type": "integer"
+        },
+        "total_checked": {
+          "type": "integer"
+        },
+        "updated_count": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ModelCatalogResponse": {
+      "properties": {
+        "active": {
+          "type": "boolean"
+        },
+        "architecture": {
+          "$ref": "#/definitions/model.Architecture"
+        },
+        "created_at": {
+          "type": "integer"
+        },
+        "extras": {
+          "additionalProperties": {},
+          "type": "object"
+        },
+        "id": {
+          "type": "string"
+        },
+        "is_moderated": {
+          "type": "boolean"
+        },
+        "last_synced_at": {
+          "type": "integer"
+        },
+        "notes": {
+          "type": "string"
+        },
+        "status": {
+          "$ref": "#/definitions/model.ModelCatalogStatus"
+        },
+        "supported_parameters": {
+          "$ref": "#/definitions/model.SupportedParameters"
+        },
+        "tags": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "updated_at": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ModelResponse": {
+      "properties": {
+        "created": {
+          "type": "integer"
+        },
+        "id": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        },
+        "owned_by": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ModelResponseList": {
+      "properties": {
+        "data": {
+          "items": {
+            "$ref": "#/definitions/modelresponses.ModelResponse"
+          },
+          "type": "array"
+        },
+        "object": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ModelResponseWithProvider": {
+      "properties": {
+        "created": {
+          "type": "integer"
+        },
+        "id": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        },
+        "owned_by": {
+          "type": "string"
+        },
+        "provider_id": {
+          "type": "string"
+        },
+        "provider_name": {
+          "type": "string"
+        },
+        "provider_vendor": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ModelWithProviderResponseList": {
+      "properties": {
+        "data": {
+          "items": {
+            "$ref": "#/definitions/modelresponses.ModelResponseWithProvider"
+          },
+          "type": "array"
+        },
+        "object": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ProviderModelResponse": {
+      "properties": {
+        "active": {
+          "type": "boolean"
+        },
+        "created_at": {
+          "type": "integer"
+        },
+        "display_name": {
+          "type": "string"
+        },
+        "family": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "model_catalog_id": {
+          "type": "string"
+        },
+        "model_public_id": {
+          "type": "string"
+        },
+        "pricing": {
+          "$ref": "#/definitions/model.Pricing"
+        },
+        "provider_id": {
+          "type": "string"
+        },
+        "provider_original_model_id": {
+          "type": "string"
+        },
+        "provider_vendor": {
+          "type": "string"
+        },
+        "supports_audio": {
+          "type": "boolean"
+        },
+        "supports_embeddings": {
+          "type": "boolean"
+        },
+        "supports_images": {
+          "type": "boolean"
+        },
+        "supports_reasoning": {
+          "type": "boolean"
+        },
+        "supports_video": {
+          "type": "boolean"
+        },
+        "token_limits": {
+          "$ref": "#/definitions/model.TokenLimits"
+        },
+        "updated_at": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ProviderResponse": {
+      "properties": {
+        "active": {
+          "type": "boolean"
+        },
+        "base_url": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "metadata": {
+          "additionalProperties": {
+            "type": "string"
+          },
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "vendor": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ProviderResponseList": {
+      "properties": {
+        "data": {
+          "items": {
+            "$ref": "#/definitions/modelresponses.ProviderResponse"
+          },
+          "type": "array"
+        },
+        "object": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ProviderWithModelCountResponse": {
+      "properties": {
+        "active": {
+          "type": "boolean"
+        },
+        "base_url": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "metadata": {
+          "additionalProperties": {
+            "type": "string"
+          },
+          "type": "object"
+        },
+        "model_active_count": {
+          "type": "integer"
+        },
+        "model_count": {
+          "type": "integer"
+        },
+        "name": {
+          "type": "string"
+        },
+        "vendor": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "modelresponses.ProviderWithModelsResponse": {
+      "properties": {
+        "active": {
+          "type": "boolean"
+        },
+        "base_url": {
+          "type": "string"
+        },
+        "id": {
+          "type": "string"
+        },
+        "metadata": {
+          "additionalProperties": {
+            "type": "string"
+          },
+          "type": "object"
+        },
+        "models": {
+          "items": {
+            "$ref": "#/definitions/modelresponses.ModelResponse"
+          },
+          "type": "array"
+        },
+        "name": {
+          "type": "string"
+        },
+        "vendor": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ChatCompletionChoice": {
+      "properties": {
+        "content_filter_results": {
+          "$ref": "#/definitions/openai.ContentFilterResults"
+        },
+        "finish_reason": {
+          "allOf": [
+            {
+              "$ref": "#/definitions/openai.FinishReason"
+            }
+          ],
+          "description": "FinishReason\nstop: API returned complete message,\nor a message terminated by one of the stop sequences provided via the stop parameter\nlength: Incomplete model output due to max_tokens parameter or token limit\nfunction_call: The model decided to call a function\ncontent_filter: Omitted content due to a flag from our content filters\nnull: API response still in progress or incomplete"
+        },
+        "index": {
+          "type": "integer"
+        },
+        "logprobs": {
+          "$ref": "#/definitions/openai.LogProbs"
+        },
+        "message": {
+          "$ref": "#/definitions/openai.ChatCompletionMessage"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ChatCompletionMessage": {
+      "properties": {
+        "content": {
+          "type": "string"
+        },
+        "function_call": {
+          "$ref": "#/definitions/openai.FunctionCall"
+        },
+        "multiContent": {
+          "items": {
+            "$ref": "#/definitions/openai.ChatMessagePart"
+          },
+          "type": "array"
+        },
+        "name": {
+          "description": "This property isn't in the official documentation, but it's in\nthe documentation for the official library for python:\n- https://github.com/openai/openai-python/blob/main/chatml.md\n- https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb",
+          "type": "string"
+        },
+        "reasoning_content": {
+          "description": "This property is used for the \"reasoning\" feature supported by deepseek-reasoner\nwhich is not in the official documentation.\nthe doc from deepseek:\n- https://api-docs.deepseek.com/api/create-chat-completion#responses",
+          "type": "string"
+        },
+        "refusal": {
+          "type": "string"
+        },
+        "role": {
+          "type": "string"
+        },
+        "tool_call_id": {
+          "description": "For Role=tool prompts this should be set to the ID given in the assistant's prior request to call a tool.",
+          "type": "string"
+        },
+        "tool_calls": {
+          "description": "For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.",
+          "items": {
+            "$ref": "#/definitions/openai.ToolCall"
+          },
+          "type": "array"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ChatCompletionResponseFormat": {
+      "properties": {
+        "json_schema": {
+          "$ref": "#/definitions/openai.ChatCompletionResponseFormatJSONSchema"
+        },
+        "type": {
+          "$ref": "#/definitions/openai.ChatCompletionResponseFormatType"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ChatCompletionResponseFormatJSONSchema": {
+      "properties": {
+        "description": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "schema": {},
+        "strict": {
+          "type": "boolean"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ChatCompletionResponseFormatType": {
+      "enum": [
+        "json_object",
+        "json_schema",
+        "text"
+      ],
+      "type": "string",
+      "x-enum-varnames": [
+        "ChatCompletionResponseFormatTypeJSONObject",
+        "ChatCompletionResponseFormatTypeJSONSchema",
+        "ChatCompletionResponseFormatTypeText"
+      ]
+    },
+    "openai.ChatMessageImageURL": {
+      "properties": {
+        "detail": {
+          "$ref": "#/definitions/openai.ImageURLDetail"
+        },
+        "url": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ChatMessagePart": {
+      "properties": {
+        "image_url": {
+          "$ref": "#/definitions/openai.ChatMessageImageURL"
+        },
+        "text": {
+          "type": "string"
+        },
+        "type": {
+          "$ref": "#/definitions/openai.ChatMessagePartType"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ChatMessagePartType": {
+      "enum": [
+        "text",
+        "image_url"
+      ],
+      "type": "string",
+      "x-enum-varnames": [
+        "ChatMessagePartTypeText",
+        "ChatMessagePartTypeImageURL"
+      ]
+    },
+    "openai.CompletionTokensDetails": {
+      "properties": {
+        "accepted_prediction_tokens": {
+          "type": "integer"
+        },
+        "audio_tokens": {
+          "type": "integer"
+        },
+        "reasoning_tokens": {
+          "type": "integer"
+        },
+        "rejected_prediction_tokens": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ContentFilterResults": {
+      "properties": {
+        "hate": {
+          "$ref": "#/definitions/openai.Hate"
+        },
+        "jailbreak": {
+          "$ref": "#/definitions/openai.JailBreak"
+        },
+        "profanity": {
+          "$ref": "#/definitions/openai.Profanity"
+        },
+        "self_harm": {
+          "$ref": "#/definitions/openai.SelfHarm"
+        },
+        "sexual": {
+          "$ref": "#/definitions/openai.Sexual"
+        },
+        "violence": {
+          "$ref": "#/definitions/openai.Violence"
+        }
+      },
+      "type": "object"
+    },
+    "openai.FinishReason": {
+      "enum": [
+        "stop",
+        "length",
+        "function_call",
+        "tool_calls",
+        "content_filter",
+        "null"
+      ],
+      "type": "string",
+      "x-enum-varnames": [
+        "FinishReasonStop",
+        "FinishReasonLength",
+        "FinishReasonFunctionCall",
+        "FinishReasonToolCalls",
+        "FinishReasonContentFilter",
+        "FinishReasonNull"
+      ]
+    },
+    "openai.FunctionCall": {
+      "properties": {
+        "arguments": {
+          "description": "call function with arguments in JSON format",
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "openai.FunctionDefinition": {
+      "properties": {
+        "description": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        },
+        "parameters": {
+          "description": "Parameters is an object describing the function.\nYou can pass json.RawMessage to describe the schema,\nor you can pass in a struct which serializes to the proper JSON schema.\nThe jsonschema package is provided for convenience, but you should\nconsider another specialized library if you require more complex schemas."
+        },
+        "strict": {
+          "type": "boolean"
+        }
+      },
+      "type": "object"
+    },
+    "openai.Hate": {
+      "properties": {
+        "filtered": {
+          "type": "boolean"
+        },
+        "severity": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ImageURLDetail": {
+      "enum": [
+        "high",
+        "low",
+        "auto"
+      ],
+      "type": "string",
+      "x-enum-varnames": [
+        "ImageURLDetailHigh",
+        "ImageURLDetailLow",
+        "ImageURLDetailAuto"
+      ]
+    },
+    "openai.JailBreak": {
+      "properties": {
+        "detected": {
+          "type": "boolean"
+        },
+        "filtered": {
+          "type": "boolean"
+        }
+      },
+      "type": "object"
+    },
+    "openai.LogProb": {
+      "properties": {
+        "bytes": {
+          "description": "Omitting the field if it is null",
+          "items": {
+            "type": "integer"
+          },
+          "type": "array"
+        },
+        "logprob": {
+          "type": "number"
+        },
+        "token": {
+          "type": "string"
+        },
+        "top_logprobs": {
+          "description": "TopLogProbs is a list of the most likely tokens and their log probability, at this token position.\nIn rare cases, there may be fewer than the number of requested top_logprobs returned.",
+          "items": {
+            "$ref": "#/definitions/openai.TopLogProbs"
+          },
+          "type": "array"
+        }
+      },
+      "type": "object"
+    },
+    "openai.LogProbs": {
+      "properties": {
+        "content": {
+          "description": "Content is a list of message content tokens with log probability information.",
+          "items": {
+            "$ref": "#/definitions/openai.LogProb"
+          },
+          "type": "array"
+        }
+      },
+      "type": "object"
+    },
+    "openai.Prediction": {
+      "properties": {
+        "content": {
+          "type": "string"
+        },
+        "type": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "openai.Profanity": {
+      "properties": {
+        "detected": {
+          "type": "boolean"
+        },
+        "filtered": {
+          "type": "boolean"
+        }
+      },
+      "type": "object"
+    },
+    "openai.PromptFilterResult": {
+      "properties": {
+        "content_filter_results": {
+          "$ref": "#/definitions/openai.ContentFilterResults"
+        },
+        "index": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "openai.PromptTokensDetails": {
+      "properties": {
+        "audio_tokens": {
+          "type": "integer"
+        },
+        "cached_tokens": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "openai.SelfHarm": {
+      "properties": {
+        "filtered": {
+          "type": "boolean"
+        },
+        "severity": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ServiceTier": {
+      "enum": [
+        "auto",
+        "default",
+        "flex",
+        "priority"
+      ],
+      "type": "string",
+      "x-enum-varnames": [
+        "ServiceTierAuto",
+        "ServiceTierDefault",
+        "ServiceTierFlex",
+        "ServiceTierPriority"
+      ]
+    },
+    "openai.Sexual": {
+      "properties": {
+        "filtered": {
+          "type": "boolean"
+        },
+        "severity": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "openai.StreamOptions": {
+      "properties": {
+        "include_usage": {
+          "description": "If set, an additional chunk will be streamed before the data: [DONE] message.\nThe usage field on this chunk shows the token usage statistics for the entire request,\nand the choices field will always be an empty array.\nAll other chunks will also include a usage field, but with a null value.",
+          "type": "boolean"
+        }
+      },
+      "type": "object"
+    },
+    "openai.Tool": {
+      "properties": {
+        "function": {
+          "$ref": "#/definitions/openai.FunctionDefinition"
+        },
+        "type": {
+          "$ref": "#/definitions/openai.ToolType"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ToolCall": {
+      "properties": {
+        "function": {
+          "$ref": "#/definitions/openai.FunctionCall"
+        },
+        "id": {
+          "type": "string"
+        },
+        "index": {
+          "description": "Index is not nil only in chat completion chunk object",
+          "type": "integer"
+        },
+        "type": {
+          "$ref": "#/definitions/openai.ToolType"
+        }
+      },
+      "type": "object"
+    },
+    "openai.ToolType": {
+      "enum": [
+        "function"
+      ],
+      "type": "string",
+      "x-enum-varnames": [
+        "ToolTypeFunction"
+      ]
+    },
+    "openai.TopLogProbs": {
+      "properties": {
+        "bytes": {
+          "items": {
+            "type": "integer"
+          },
+          "type": "array"
+        },
+        "logprob": {
+          "type": "number"
+        },
+        "token": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "openai.Usage": {
+      "properties": {
+        "completion_tokens": {
+          "type": "integer"
+        },
+        "completion_tokens_details": {
+          "$ref": "#/definitions/openai.CompletionTokensDetails"
+        },
+        "prompt_tokens": {
+          "type": "integer"
+        },
+        "prompt_tokens_details": {
+          "$ref": "#/definitions/openai.PromptTokensDetails"
+        },
+        "total_tokens": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "openai.Violence": {
+      "properties": {
+        "filtered": {
+          "type": "boolean"
+        },
+        "severity": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "projectreq.CreateProjectRequest": {
+      "properties": {
+        "instruction": {
+          "type": "string"
+        },
+        "name": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "name"
+      ],
+      "type": "object"
+    },
+    "projectreq.UpdateProjectRequest": {
+      "properties": {
+        "instruction": {
+          "type": "string"
+        },
+        "is_archived": {
+          "type": "boolean"
+        },
+        "is_favorite": {
+          "type": "boolean"
+        },
+        "name": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "projectres.ProjectDeletedResponse": {
+      "properties": {
+        "deleted": {
+          "type": "boolean"
+        },
+        "id": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "projectres.ProjectListResponse": {
+      "properties": {
+        "data": {
+          "items": {
+            "$ref": "#/definitions/projectres.ProjectResponse"
+          },
+          "type": "array"
+        },
+        "first_id": {
+          "type": "string"
+        },
+        "has_more": {
+          "type": "boolean"
+        },
+        "last_id": {
+          "type": "string"
+        },
+        "next_cursor": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        },
+        "total": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "projectres.ProjectResponse": {
+      "properties": {
+        "archived_at": {
+          "type": "integer"
+        },
+        "created_at": {
+          "type": "integer"
+        },
+        "id": {
+          "type": "string"
+        },
+        "instruction": {
+          "type": "string"
+        },
+        "is_archived": {
+          "type": "boolean"
+        },
+        "is_favorite": {
+          "type": "boolean"
+        },
+        "name": {
+          "type": "string"
+        },
+        "object": {
+          "type": "string"
+        },
+        "updated_at": {
+          "type": "integer"
+        }
+      },
+      "type": "object"
+    },
+    "requestmodels.AddProviderRequest": {
+      "properties": {
+        "active": {
+          "type": "boolean"
+        },
+        "api_key": {
+          "type": "string"
+        },
+        "base_url": {
+          "type": "string"
+        },
+        "metadata": {
+          "additionalProperties": {
+            "type": "string"
+          },
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        },
+        "vendor": {
+          "type": "string"
+        }
+      },
+      "required": [
+        "base_url",
+        "name",
+        "vendor"
+      ],
+      "type": "object"
+    },
+    "requestmodels.BulkEnableModelsRequest": {
+      "properties": {
+        "enable": {
+          "description": "Required: true to enable, false to disable",
+          "type": "boolean"
+        },
+        "except_models": {
+          "description": "List of model keys to exclude",
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "provider_id": {
+          "description": "Optional: filter by provider",
+          "minLength": 1,
+          "type": "string"
+        }
+      },
+      "required": [
+        "enable"
+      ],
+      "type": "object"
+    },
+    "requestmodels.BulkToggleCatalogsRequest": {
+      "properties": {
+        "catalog_ids": {
+          "description": "Optional: specific catalog public IDs. If empty, applies to all catalogs",
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        },
+        "enable": {
+          "description": "Required: true to enable, false to disable",
+          "type": "boolean"
+        },
+        "except_models": {
+          "description": "List of model keys to exclude from the operation",
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "enable"
+      ],
+      "type": "object"
+    },
+    "requestmodels.UpdateModelCatalogRequest": {
+      "properties": {
+        "architecture": {
+          "$ref": "#/definitions/model.Architecture"
+        },
+        "extras": {
+          "additionalProperties": {},
+          "type": "object"
+        },
+        "is_moderated": {
+          "type": "boolean"
+        },
+        "notes": {
+          "type": "string"
+        },
+        "supported_parameters": {
+          "$ref": "#/definitions/model.SupportedParameters"
+        },
+        "tags": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "type": "object"
+    },
+    "requestmodels.UpdateProviderModelRequest": {
+      "properties": {
+        "active": {
+          "type": "boolean"
+        },
+        "display_name": {
+          "type": "string"
+        },
+        "family": {
+          "type": "string"
+        },
+        "pricing": {
+          "$ref": "#/definitions/model.Pricing"
+        },
+        "supports_audio": {
+          "type": "boolean"
+        },
+        "supports_embeddings": {
+          "type": "boolean"
+        },
+        "supports_images": {
+          "type": "boolean"
+        },
+        "supports_reasoning": {
+          "type": "boolean"
+        },
+        "supports_video": {
+          "type": "boolean"
+        },
+        "token_limits": {
+          "$ref": "#/definitions/model.TokenLimits"
+        }
+      },
+      "type": "object"
+    },
+    "requestmodels.UpdateProviderRequest": {
+      "properties": {
+        "active": {
+          "type": "boolean"
+        },
+        "api_key": {
+          "type": "string"
+        },
+        "base_url": {
+          "type": "string"
+        },
+        "metadata": {
+          "additionalProperties": {
+            "type": "string"
+          },
+          "type": "object"
+        },
+        "name": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    },
+    "responses.ErrorResponse": {
+      "properties": {
+        "code": {
+          "description": "UUID from PlatformError",
+          "type": "string"
+        },
+        "error": {
+          "type": "string"
+        },
+        "message": {
+          "type": "string"
+        },
+        "request_id": {
+          "type": "string"
+        }
+      },
+      "type": "object"
+    }
+  },
+  "info": {
+    "contact": {
+      "name": "Jan Server Team",
+      "url": "https://github.com/janhq/jan-server"
+    },
+    "description": "Unified API documentation for Jan Server including LLM API (OpenAI-compatible) and MCP Tools",
+    "title": "Jan Server API (LLM API + MCP Tools)",
+    "version": "2.0"
+  },
+  "paths": {
+    "/auth/api-keys": {
+      "get": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Returns all API keys created by the authenticated user. Key values are not returned, only metadata.",
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "List of API keys with metadata",
+            "schema": {
+              "type": "object"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - invalid or expired token",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "List user's API keys",
+        "tags": [
+          "Authentication API"
+        ]
+      },
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Creates a new API key for the authenticated user. API keys provide programmatic access without requiring user credentials.",
+        "parameters": [
+          {
+            "description": "API key creation request with name and optional scopes",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "type": "object"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "201": {
+            "description": "API key created successfully with key value",
+            "schema": {
+              "type": "object"
+            }
+          },
+          "400": {
+            "description": "Invalid request - missing required fields",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - invalid or expired token",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Create API key",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/api-keys/{id}": {
+      "delete": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Revokes and deletes an API key by ID. Deleted keys can no longer be used for authentication.",
+        "parameters": [
+          {
+            "description": "API key ID",
+            "in": "path",
+            "name": "id",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "204": {
+            "description": "API key deleted successfully"
+          },
+          "401": {
+            "description": "Unauthorized - invalid or expired token",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "API key not found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Delete API key",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/callback": {
+      "get": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Handles the OAuth2 callback from Keycloak, exchanges authorization code for JWT tokens",
+        "parameters": [
+          {
+            "description": "Authorization code from Keycloak",
+            "in": "query",
+            "name": "code",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "description": "State parameter for CSRF protection",
+            "in": "query",
+            "name": "state",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "JWT tokens",
+            "schema": {
+              "properties": {
+                "access_token": {
+                  "type": "string"
+                },
+                "expires_in": {
+                  "type": "integer"
+                },
+                "refresh_token": {
+                  "type": "string"
+                },
+                "token_type": {
+                  "type": "string"
+                }
+              },
+              "type": "object"
+            }
+          },
+          "400": {
+            "description": "Missing code or state",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Invalid state parameter",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Failed to exchange code for tokens",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "summary": "Handle Keycloak OAuth2 callback",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/guest-login": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Creates a temporary guest user account and returns JWT tokens. Guest users have limited access and can be upgraded to full accounts later.",
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Guest user created with access and refresh tokens",
+            "schema": {
+              "type": "object"
+            }
+          },
+          "500": {
+            "description": "Internal server error - failed to create guest user",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "summary": "Create guest user account",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/login": {
+      "get": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Returns the Keycloak authorization URL for frontend to redirect users. Supports OAuth2 authorization code flow with PKCE.",
+        "parameters": [
+          {
+            "description": "URL to redirect after successful login",
+            "in": "query",
+            "name": "redirect_url",
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Authorization URL and state parameter",
+            "schema": {
+              "properties": {
+                "authorization_url": {
+                  "type": "string"
+                },
+                "state": {
+                  "type": "string"
+                }
+              },
+              "type": "object"
+            }
+          },
+          "500": {
+            "description": "Failed to initiate login",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "summary": "Initiate Keycloak OAuth2 login",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/logout": {
+      "get": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Revokes the current access token and clears authentication cookies. After logout, the user must re-authenticate.",
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully logged out",
+            "schema": {
+              "type": "object"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - invalid token",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Logout user",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/me": {
+      "get": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Returns the authenticated user's profile information including user ID, email, roles, and guest status.",
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "User profile information",
+            "schema": {
+              "type": "object"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - invalid or expired token",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Get current user information",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/refresh-token": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Exchanges a valid refresh token for a new access token. Refresh token must be provided in Authorization header or refresh_token cookie.",
+        "parameters": [
+          {
+            "description": "Refresh token (can also be in Authorization header)",
+            "in": "body",
+            "name": "refresh_token",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "New access token and refresh token",
+            "schema": {
+              "type": "object"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - invalid or expired refresh token",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "summary": "Refresh access token",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/revoke": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Revokes a refresh token to invalidate it",
+        "parameters": [
+          {
+            "description": "Token to revoke",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "properties": {
+                "refresh_token": {
+                  "type": "string"
+                }
+              },
+              "type": "object"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Token revoked successfully",
+            "schema": {
+              "properties": {
+                "message": {
+                  "type": "string"
+                }
+              },
+              "type": "object"
+            }
+          },
+          "400": {
+            "description": "Invalid request body",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Keycloak OAuth is not configured",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "summary": "Revoke Keycloak refresh token",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/upgrade": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Converts a guest user account to a permanent account with email/password credentials. Guest flag is removed and user gains full access.",
+        "parameters": [
+          {
+            "description": "Upgrade request with email and password",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "type": "object"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Account upgraded successfully with new tokens",
+            "schema": {
+              "type": "object"
+            }
+          },
+          "400": {
+            "description": "Invalid request - missing email or password",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - not a guest user or invalid token",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Upgrade guest to permanent account",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/validate": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Validates an access token against Keycloak's userinfo endpoint",
+        "parameters": [
+          {
+            "description": "Bearer token",
+            "in": "header",
+            "name": "Authorization",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Token is valid with user information",
+            "schema": {
+              "properties": {
+                "user_info": {
+                  "type": "object"
+                },
+                "valid": {
+                  "type": "boolean"
+                }
+              },
+              "type": "object"
+            }
+          },
+          "401": {
+            "description": "Invalid or expired token",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Keycloak OAuth is not configured",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "summary": "Validate Keycloak access token",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/auth/validate-api-key": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Internal endpoint used by Kong API Gateway to validate API keys. Not intended for direct client use.",
+        "parameters": [
+          {
+            "description": "API key validation request",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "type": "object"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "API key is valid with user information",
+            "schema": {
+              "type": "object"
+            }
+          },
+          "401": {
+            "description": "Invalid API key",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "summary": "Validate API key (Kong Plugin)",
+        "tags": [
+          "Authentication API"
+        ]
+      }
+    },
+    "/mcp/v1/mcp": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Handles Model Context Protocol (MCP) requests over HTTP. Supports MCP methods: initialize, ping, tools/list, tools/call, prompts/list, prompts/call, resources/list, resources/read.\n\n**Available Tools:**\n- `google_search`: Web search via pluggable engines (Serper/SearXNG/duckduckgo) with params: q, gl, hl, location, num, tbs, page, autocorrect, domain_allow_list, location_hint, offline_mode. Returns structured citations.\n- `scrape`: Web page scraping (params: url, includeMarkdown) returning text, preview, cache_status, and metadata.\n- `file_search_index` / `file_search_query`: Index arbitrary text and run similarity queries against the lightweight vector store.\n- `python_exec`: Execute trusted code through SandboxFusion (params: code, language, session_id, approved) to retrieve stdout/stderr/artifacts.\n\n**MCP Protocol:**\n- Request format: JSON-RPC 2.0 with method and params\n- Response format: Server-Sent Events (SSE) stream\n- Stateless mode (no session management)",
+        "parameters": [
+          {
+            "description": "MCP JSON-RPC request payload (e.g., {\\",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "type": "object"
+            }
+          }
+        ],
+        "produces": [
+          "text/event-stream"
+        ],
+        "responses": {
+          "200": {
+            "description": "Streamed MCP response in SSE format",
+            "schema": {
+              "type": "string"
+            }
+          },
+          "400": {
+            "description": "Invalid MCP request payload or unsupported method",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "summary": "MCP endpoint for tool execution",
+        "tags": [
+          "MCP API"
+        ]
+      }
+    },
+    "/v1/admin/models/catalogs": {
+      "get": {
+        "description": "Retrieves a paginated list of model catalogs with optional filtering and searching",
+        "parameters": [
+          {
+            "description": "Number of records to return (default: 20, max: 100)",
+            "in": "query",
+            "name": "limit",
+            "type": "integer"
+          },
+          {
+            "description": "Number of records to skip for pagination",
+            "in": "query",
+            "name": "offset",
+            "type": "integer"
+          },
+          {
+            "description": "Sort order: asc or desc (default: desc)",
+            "in": "query",
+            "name": "order",
+            "type": "string"
+          },
+          {
+            "description": "Filter by status: init, filled, updated",
+            "in": "query",
+            "name": "status",
+            "type": "string"
+          },
+          {
+            "description": "Filter by moderation status",
+            "in": "query",
+            "name": "is_moderated",
+            "type": "boolean"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "List of model catalogs",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid query parameters",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "List all model catalogs",
+        "tags": [
+          "Admin Model API"
+        ]
+      }
+    },
+    "/v1/admin/models/catalogs/bulk-toggle": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Enable or disable provider models for specific catalogs or ALL catalogs, with optional exception list. Supports \"enable/disable all except\" patterns globally or scoped to catalogs.",
+        "parameters": [
+          {
+            "description": "Bulk toggle request. If catalog_ids is empty, applies to ALL catalogs. Use except_models to exclude specific models.",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/requestmodels.BulkToggleCatalogsRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Bulk operation result with counts and status",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.BulkOperationResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request - exceeds limits or validation error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "One or more catalog IDs not found (when catalog_ids provided)",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error during bulk operation",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Bulk enable/disable provider models by catalog IDs or all catalogs",
+        "tags": [
+          "Admin Model API"
+        ]
+      }
+    },
+    "/v1/admin/models/catalogs/{model_public_id}": {
+      "get": {
+        "description": "Retrieves detailed information about a model catalog entry by its public ID (supports IDs with slashes)",
+        "parameters": [
+          {
+            "description": "Model Catalog Public ID (can contain slashes)",
+            "in": "path",
+            "name": "model_public_id",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Model catalog details",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Model catalog not found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Get a model catalog entry",
+        "tags": [
+          "Admin Model API"
+        ]
+      },
+      "patch": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Updates metadata for a model catalog entry. Marks it as manually updated to prevent auto-sync overwrites.",
+        "parameters": [
+          {
+            "description": "Model Catalog Public ID (can contain slashes)",
+            "in": "path",
+            "name": "model_public_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "description": "Update payload",
+            "in": "body",
+            "name": "payload",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/requestmodels.UpdateModelCatalogRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Updated model catalog",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request payload",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Model catalog not found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Update a model catalog entry",
+        "tags": [
+          "Admin Model API"
+        ]
+      }
+    },
+    "/v1/admin/models/provider-models": {
+      "get": {
+        "description": "Retrieves a paginated list of provider models with optional filtering",
+        "parameters": [
+          {
+            "description": "Number of records to return (default: 20, max: 100)",
+            "in": "query",
+            "name": "limit",
+            "type": "integer"
+          },
+          {
+            "description": "Number of records to skip for pagination",
+            "in": "query",
+            "name": "offset",
+            "type": "integer"
+          },
+          {
+            "description": "Sort order: asc or desc (default: desc)",
+            "in": "query",
+            "name": "order",
+            "type": "string"
+          },
+          {
+            "description": "Filter by provider public ID",
+            "in": "query",
+            "name": "provider_id",
+            "type": "string"
+          },
+          {
+            "description": "Filter by model key",
+            "in": "query",
+            "name": "model_key",
+            "type": "string"
+          },
+          {
+            "description": "Filter by active status",
+            "in": "query",
+            "name": "active",
+            "type": "boolean"
+          },
+          {
+            "description": "Filter by image support",
+            "in": "query",
+            "name": "supports_images",
+            "type": "boolean"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "List of provider models",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ProviderModelResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid query parameters",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "List all provider models",
+        "tags": [
+          "Admin Model API"
+        ]
+      }
+    },
+    "/v1/admin/models/provider-models/bulk-toggle": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Enables or disables provider models with flexible patterns: enable all, disable all, enable all except, or disable all except. Optionally filter by provider.",
+        "parameters": [
+          {
+            "description": "Bulk toggle payload with enable flag, optional provider filter, and exception list",
+            "in": "body",
+            "name": "payload",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/requestmodels.BulkEnableModelsRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Bulk operation result with counts and status",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.BulkOperationResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request payload",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Bulk enable or disable provider models",
+        "tags": [
+          "Admin Model API"
+        ]
+      }
+    },
+    "/v1/admin/models/provider-models/{provider_model_public_id}": {
+      "get": {
+        "description": "Retrieves detailed information about a provider model by its public ID",
+        "parameters": [
+          {
+            "description": "Provider Model Public ID",
+            "in": "path",
+            "name": "provider_model_public_id",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Provider model details",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ProviderModelResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Provider model not found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Get a provider model",
+        "tags": [
+          "Admin Model API"
+        ]
+      },
+      "patch": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Updates configuration for a provider model including pricing, limits, and feature flags",
+        "parameters": [
+          {
+            "description": "Provider Model Public ID",
+            "in": "path",
+            "name": "provider_model_public_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "description": "Update payload",
+            "in": "body",
+            "name": "payload",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/requestmodels.UpdateProviderModelRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Updated provider model",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ProviderModelResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request payload",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Provider model not found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Update a provider model",
+        "tags": [
+          "Admin Model API"
+        ]
+      }
+    },
+    "/v1/admin/providers": {
+      "get": {
+        "description": "Retrieves all providers with their model counts",
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "List of providers with model counts",
+            "schema": {
+              "items": {
+                "$ref": "#/definitions/modelresponses.ProviderWithModelCountResponse"
+              },
+              "type": "array"
+            }
+          },
+          "500": {
+            "description": "Failed to retrieve providers",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Get all providers",
+        "tags": [
+          "Admin Provider API"
+        ]
+      },
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Registers a new provider and synchronizes its available models.",
+        "parameters": [
+          {
+            "description": "Provider registration payload",
+            "in": "body",
+            "name": "payload",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/requestmodels.AddProviderRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Registered provider with synced models",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ProviderWithModelsResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request payload",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Failed to register provider",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Register a provider",
+        "tags": [
+          "Admin Provider API"
+        ]
+      }
+    },
+    "/v1/admin/providers/{provider_public_id}": {
+      "patch": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Updates an existing provider's configuration",
+        "parameters": [
+          {
+            "description": "Provider public ID",
+            "in": "path",
+            "name": "provider_public_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "description": "Provider update payload",
+            "in": "body",
+            "name": "payload",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/requestmodels.UpdateProviderRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Updated provider",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ProviderResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request payload",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Provider not found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Failed to update provider",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Update a provider",
+        "tags": [
+          "Admin Provider API"
+        ]
+      }
+    },
+    "/v1/chat/completions": {
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Generates a model response for the given chat conversation. This is a standard chat completion API that supports both streaming and non-streaming modes without conversation persistence.\n\n**Streaming Mode (stream=true):**\n- Returns Server-Sent Events (SSE) with real-time streaming\n- Streams completion chunks directly from the inference model\n- Final event contains \"[DONE]\" marker\n\n**Non-Streaming Mode (stream=false or omitted):**\n- Returns single JSON response with complete completion\n- Standard OpenAI ChatCompletionResponse format\n\n**Storage Options:**\n- `store=true`: Persist the latest input message and assistant response to the active conversation\n- `store_reasoning=true`: Additionally persist reasoning content provided by the model\n- When `store` is omitted or false, the conversation remains read-only\n\n**Features:**\n- Supports all OpenAI ChatCompletionRequest parameters\n- Optional conversation context for conversation persistence\n- User authentication required\n- Direct inference model integration",
+        "parameters": [
+          {
+            "description": "Chat completion request with streaming options and optional conversation",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/chatrequests.ChatCompletionRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json",
+          "text/event-stream"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successful streaming response (when stream=true) - SSE format with data: {json} events",
+            "schema": {
+              "type": "string"
+            }
+          },
+          "400": {
+            "description": "Invalid request payload, empty messages, or inference failure",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Create a chat completion",
+        "tags": [
+          "Chat Completions API"
+        ]
+      }
+    },
+    "/v1/conversations": {
+      "get": {
+        "description": "List conversations for the authenticated user with optional referrer filtering.",
+        "parameters": [
+          {
+            "description": "Referrer filter",
+            "in": "query",
+            "name": "referrer",
+            "type": "string"
+          },
+          {
+            "description": "Maximum number of conversations to return",
+            "in": "query",
+            "name": "limit",
+            "type": "integer"
+          },
+          {
+            "description": "Return conversations created after the given numeric ID",
+            "in": "query",
+            "name": "after",
+            "type": "string"
+          },
+          {
+            "description": "Sort order (asc or desc)",
+            "in": "query",
+            "name": "order",
+            "type": "string"
+          },
+          {
+            "description": "Set to 'all' to list conversations across the workspace (requires elevated permissions)",
+            "in": "query",
+            "name": "scope",
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully retrieved conversations",
+            "schema": {
+              "$ref": "#/definitions/conversationresponses.ConversationListResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request parameters",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "List conversations",
+        "tags": [
+          "Conversations API"
+        ]
+      },
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Create a new conversation to store and retrieve conversation state across Response API calls\n\n**Features:**\n- Create conversation with optional metadata (max 16 key-value pairs)\n- Add up to 20 initial items to the conversation\n- Returns conversation ID with `conv_` prefix\n- Supports OpenAI Conversations API format\n\n**Metadata Constraints:**\n- Maximum 16 key-value pairs\n- Keys: max 64 characters\n- Values: max 512 characters",
+        "parameters": [
+          {
+            "description": "Create conversation request with optional items and metadata",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/conversationrequests.CreateConversationRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully created conversation",
+            "schema": {
+              "$ref": "#/definitions/conversationresponses.ConversationResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request - validation failed or too many items",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error - conversation creation failed",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Create a conversation",
+        "tags": [
+          "Conversations API"
+        ]
+      }
+    },
+    "/v1/conversations/{conv_public_id}": {
+      "delete": {
+        "description": "Delete a conversation (soft delete). Items in the conversation will not be deleted but will be inaccessible.\n\n**Features:**\n- Soft delete (conversation marked as deleted, not physically removed)\n- Items remain in database but become inaccessible\n- Automatic ownership verification\n- Returns deletion confirmation with conversation ID\n\n**Response:**\n- `id`: Deleted conversation ID\n- `object`: Always \"conversation.deleted\"\n- `deleted`: Always true",
+        "parameters": [
+          {
+            "description": "Conversation ID (format: conv_xxxxx)",
+            "in": "path",
+            "name": "conv_public_id",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully deleted conversation",
+            "schema": {
+              "$ref": "#/definitions/conversationresponses.ConversationDeletedResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid conversation ID format",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Conversation not found or access denied",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error - deletion failed",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Delete a conversation",
+        "tags": [
+          "Conversations API"
+        ]
+      },
+      "get": {
+        "description": "Retrieve a conversation by ID with ownership verification\n\n**Features:**\n- Retrieves conversation metadata including creation timestamp\n- Automatic ownership verification (user can only access their own conversations)\n- Returns OpenAI-compatible conversation object\n\n**Response Fields:**\n- `id`: Conversation ID with `conv_` prefix\n- `object`: Always \"conversation\"\n- `created_at`: Unix timestamp\n- `metadata`: User-defined key-value pairs",
+        "parameters": [
+          {
+            "description": "Conversation ID (format: conv_xxxxx)",
+            "in": "path",
+            "name": "conv_public_id",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully retrieved conversation",
+            "schema": {
+              "$ref": "#/definitions/conversationresponses.ConversationResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid conversation ID format",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Conversation not found or access denied",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Get a conversation",
+        "tags": [
+          "Conversations API"
+        ]
+      },
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Update a conversation's metadata while preserving existing items\n\n**Features:**\n- Update metadata key-value pairs\n- Replaces entire metadata object (not merged)\n- Items remain unchanged\n- Automatic ownership verification\n\n**Metadata Constraints:**\n- Maximum 16 key-value pairs\n- Keys: max 64 characters\n- Values: max 512 characters",
+        "parameters": [
+          {
+            "description": "Conversation ID (format: conv_xxxxx)",
+            "in": "path",
+            "name": "conv_public_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "description": "Update conversation request with new metadata",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/conversationrequests.UpdateConversationRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully updated conversation",
+            "schema": {
+              "$ref": "#/definitions/conversationresponses.ConversationResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request - validation failed or invalid metadata",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Conversation not found or access denied",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error - update failed",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Update a conversation",
+        "tags": [
+          "Conversations API"
+        ]
+      }
+    },
+    "/v1/conversations/{conv_public_id}/items": {
+      "get": {
+        "description": "List all items in a conversation with cursor-based pagination support\n\n**Features:**\n- Cursor-based pagination using item IDs\n- Configurable page size (1-100 items, default 20)\n- Sort order control (ascending or descending)\n- Optional include parameter for additional fields\n- Returns paginated list with navigation cursors\n\n**Pagination:**\n- Use `after` cursor from previous response for next page\n- `has_more` indicates if more items are available\n- `first_id` and `last_id` provide cursor references\n\n**Query Parameters:**\n- `limit`: Number of items (1-100, default 20)\n- `order`: Sort order (\"asc\" or \"desc\", default \"desc\")\n- `after`: Item ID cursor for pagination\n- `include`: Additional fields to include (optional)",
+        "parameters": [
+          {
+            "description": "Conversation ID (format: conv_xxxxx)",
+            "in": "path",
+            "name": "conv_public_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "description": "Item ID cursor to list items after (pagination)",
+            "in": "query",
+            "name": "after",
+            "type": "string"
+          },
+          {
+            "default": 20,
+            "description": "Number of items to return (1-100)",
+            "in": "query",
+            "maximum": 100,
+            "minimum": 1,
+            "name": "limit",
+            "type": "integer"
+          },
+          {
+            "default": "desc",
+            "description": "Sort order: asc or desc",
+            "enum": [
+              "asc",
+              "desc"
+            ],
+            "in": "query",
+            "name": "order",
+            "type": "string"
+          },
+          {
+            "collectionFormat": "csv",
+            "description": "Additional fields to include in response",
+            "in": "query",
+            "items": {
+              "type": "string"
+            },
+            "name": "include",
+            "type": "array"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully retrieved items list",
+            "schema": {
+              "$ref": "#/definitions/conversationresponses.ItemListResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request - invalid parameters or conversation ID",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Conversation not found or access denied",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error - listing failed",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "List conversation items",
+        "tags": [
+          "Conversations API"
+        ]
+      },
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Add items to a conversation. You may add up to 20 items at a time.\n\n**Features:**\n- Bulk item creation (max 20 items per request)\n- Automatic item ID generation with `msg_` prefix\n- Items added to conversation's active branch (default: MAIN)\n- Returns list of created items with generated IDs\n\n**Item Types:**\n- `message`: User or assistant messages\n- `tool_call`: Tool/function call items\n- `tool_response`: Tool/function response items\n- Other OpenAI-compatible item types\n\n**Constraints:**\n- Maximum 20 items per request\n- Each item must have valid type and content\n- Items are immutable after creation",
+        "parameters": [
+          {
+            "description": "Conversation ID (format: conv_xxxxx)",
+            "in": "path",
+            "name": "conv_public_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "collectionFormat": "csv",
+            "description": "Additional fields to include in response",
+            "in": "query",
+            "items": {
+              "type": "string"
+            },
+            "name": "include",
+            "type": "array"
+          },
+          {
+            "description": "Create items request with array of items",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/conversationrequests.CreateItemsRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully created items",
+            "schema": {
+              "$ref": "#/definitions/conversationresponses.ConversationItemCreatedResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request - too many items, invalid format, or validation failed",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Conversation not found or access denied",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error - item creation failed",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Create conversation items",
+        "tags": [
+          "Conversations API"
+        ]
+      }
+    },
+    "/v1/conversations/{conv_public_id}/items/{item_id}": {
+      "delete": {
+        "description": "Delete an item from a conversation. The item will be removed from the conversation.\n\n**Features:**\n- Remove specific item from conversation\n- Automatic ownership verification\n- Returns updated conversation object after deletion\n- Items are permanently removed (not soft delete)\n\n**Important:**\n- Deleting an item may affect conversation flow\n- Item IDs are not reused after deletion\n- Other items in conversation remain unchanged\n- Consider creating a new branch instead of deleting items\n\n**Response:**\nReturns the conversation object (not the deleted item)",
+        "parameters": [
+          {
+            "description": "Conversation ID (format: conv_xxxxx)",
+            "in": "path",
+            "name": "conv_public_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "description": "Item ID to delete (format: msg_xxxxx)",
+            "in": "path",
+            "name": "item_id",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully deleted item, returns conversation",
+            "schema": {
+              "$ref": "#/definitions/conversationresponses.ConversationResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid conversation ID or item ID format",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Conversation or item not found, or access denied",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error - deletion failed",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Delete a conversation item",
+        "tags": [
+          "Conversations API"
+        ]
+      },
+      "get": {
+        "description": "Retrieve a single item from a conversation by item ID\n\n**Features:**\n- Retrieve specific item by ID\n- Returns complete item with all content\n- Automatic ownership verification via conversation\n- Optional include parameter for additional fields\n\n**Response Fields:**\n- `id`: Item ID with `msg_` prefix\n- `type`: Item type (message, tool_call, etc.)\n- `role`: Role for message items (user, assistant)\n- `content`: Item content array\n- `status`: Item status (completed, incomplete, etc.)\n- `created_at`: Unix timestamp",
+        "parameters": [
+          {
+            "description": "Conversation ID (format: conv_xxxxx)",
+            "in": "path",
+            "name": "conv_public_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "description": "Item ID (format: msg_xxxxx)",
+            "in": "path",
+            "name": "item_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "collectionFormat": "csv",
+            "description": "Additional fields to include in response",
+            "in": "query",
+            "items": {
+              "type": "string"
+            },
+            "name": "include",
+            "type": "array"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully retrieved item",
+            "schema": {
+              "$ref": "#/definitions/conversationresponses.ItemResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid conversation ID or item ID format",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized - missing or invalid authentication",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Conversation or item not found, or access denied",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Get a conversation item",
+        "tags": [
+          "Conversations API"
+        ]
+      }
+    },
+    "/v1/healthz": {
+      "get": {
+        "description": "Returns the health status of the API server. Used by orchestrators and monitoring systems.",
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Health status OK",
+            "schema": {
+              "additionalProperties": {
+                "type": "string"
+              },
+              "type": "object"
+            }
+          }
+        },
+        "summary": "Health check endpoint",
+        "tags": [
+          "Server API"
+        ]
+      }
+    },
+    "/v1/models": {
+      "get": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Retrieves a list of available models that can be used for chat completions or other tasks. Returns either simple model list or detailed list with provider metadata based on X-PROVIDER-DATA header.",
+        "parameters": [
+          {
+            "description": "Set to 'true' to include provider metadata in response",
+            "enum": [
+              "true",
+              "false"
+            ],
+            "in": "header",
+            "name": "X-PROVIDER-DATA",
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "List of models with provider metadata (when X-PROVIDER-DATA=true)",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ModelWithProviderResponseList"
+            }
+          },
+          "404": {
+            "description": "Models or providers not found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Failed to retrieve models",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "List available models",
+        "tags": [
+          "Chat Completions API"
+        ]
+      }
+    },
+    "/v1/models/catalogs/{model_public_id}": {
+      "get": {
+        "description": "Retrieves detailed information about a model catalog entry by its public ID (supports IDs with slashes like openrouter/nova-lite-v1)",
+        "parameters": [
+          {
+            "description": "Model Catalog Public ID (can contain slashes)",
+            "in": "path",
+            "name": "model_public_id",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Model catalog details",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
+            }
+          },
+          "400": {
+            "description": "Invalid request",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Model catalog not found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal server error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Get a model catalog entry",
+        "tags": [
+          "Model API"
+        ]
+      }
+    },
+    "/v1/models/providers": {
+      "get": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Retrieves a list of available model providers that can be used for inference.",
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "List of providers",
+            "schema": {
+              "$ref": "#/definitions/modelresponses.ProviderResponseList"
+            }
+          },
+          "500": {
+            "description": "Failed to retrieve providers",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "List model providers",
+        "tags": [
+          "Model API"
+        ]
+      }
+    },
+    "/v1/projects": {
+      "get": {
+        "description": "List all projects for the authenticated user",
+        "parameters": [
+          {
+            "description": "Maximum number of projects to return",
+            "in": "query",
+            "name": "limit",
+            "type": "integer"
+          },
+          {
+            "description": "Return projects after the given numeric ID",
+            "in": "query",
+            "name": "after",
+            "type": "string"
+          },
+          {
+            "description": "Sort order (asc or desc)",
+            "in": "query",
+            "name": "order",
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "schema": {
+              "$ref": "#/definitions/projectres.ProjectListResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal Server Error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "List projects",
+        "tags": [
+          "Projects API"
+        ]
+      },
+      "post": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Create a new project for grouping conversations",
+        "parameters": [
+          {
+            "description": "Create project request",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/projectreq.CreateProjectRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "201": {
+            "description": "Created",
+            "schema": {
+              "$ref": "#/definitions/projectres.ProjectResponse"
+            }
+          },
+          "400": {
+            "description": "Bad Request",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal Server Error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Create project",
+        "tags": [
+          "Projects API"
+        ]
+      }
+    },
+    "/v1/projects/{project_id}": {
+      "delete": {
+        "description": "Soft-delete a project",
+        "parameters": [
+          {
+            "description": "Project ID",
+            "in": "path",
+            "name": "project_id",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "schema": {
+              "$ref": "#/definitions/projectres.ProjectDeletedResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Not Found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal Server Error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Delete project",
+        "tags": [
+          "Projects API"
+        ]
+      },
+      "get": {
+        "description": "Get a single project by ID",
+        "parameters": [
+          {
+            "description": "Project ID",
+            "in": "path",
+            "name": "project_id",
+            "required": true,
+            "type": "string"
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "schema": {
+              "$ref": "#/definitions/projectres.ProjectResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Not Found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal Server Error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Get project",
+        "tags": [
+          "Projects API"
+        ]
+      },
+      "patch": {
+        "consumes": [
+          "application/json"
+        ],
+        "description": "Update project name, instruction, or archived status",
+        "parameters": [
+          {
+            "description": "Project ID",
+            "in": "path",
+            "name": "project_id",
+            "required": true,
+            "type": "string"
+          },
+          {
+            "description": "Update request",
+            "in": "body",
+            "name": "request",
+            "required": true,
+            "schema": {
+              "$ref": "#/definitions/projectreq.UpdateProjectRequest"
+            }
+          }
+        ],
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "schema": {
+              "$ref": "#/definitions/projectres.ProjectResponse"
+            }
+          },
+          "400": {
+            "description": "Bad Request",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "401": {
+            "description": "Unauthorized",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "404": {
+            "description": "Not Found",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          },
+          "500": {
+            "description": "Internal Server Error",
+            "schema": {
+              "$ref": "#/definitions/responses.ErrorResponse"
+            }
+          }
+        },
+        "security": [
+          {
+            "BearerAuth": []
+          }
+        ],
+        "summary": "Update project",
+        "tags": [
+          "Projects API"
+        ]
+      }
+    },
+    "/v1/readyz": {
+      "get": {
+        "description": "Returns the readiness status of the API server. Indicates if the service is ready to accept traffic.",
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Readiness status ready",
+            "schema": {
+              "additionalProperties": {
+                "type": "string"
+              },
+              "type": "object"
+            }
+          }
+        },
+        "summary": "Readiness check endpoint",
+        "tags": [
+          "Server API"
+        ]
+      }
+    },
+    "/v1/version": {
+      "get": {
+        "description": "Returns the current build version of the API server and environment reload timestamp.",
+        "produces": [
+          "application/json"
+        ],
+        "responses": {
+          "200": {
+            "description": "Version information including version number and environment reload timestamp",
+            "schema": {
+              "additionalProperties": {
+                "type": "string"
+              },
+              "type": "object"
+            }
+          }
+        },
+        "summary": "Get API build version",
+        "tags": [
+          "Server API"
+        ]
+      }
+    }
+  },
+  "securityDefinitions": {
+    "BearerAuth": {
+      "description": "Type \"Bearer\" followed by a space and JWT token.",
+      "in": "header",
+      "name": "Authorization",
+      "type": "apiKey"
+    }
+  },
+  "swagger": "2.0",
+  "tags": [
+    {
+      "description": "Model Context Protocol tools",
+      "name": "MCP Tools"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/apps/jan-api-gateway/application/docs/swagger.json b/services/llm-api/docs/swagger/swagger.json
similarity index 54%
rename from apps/jan-api-gateway/application/docs/swagger.json
rename to services/llm-api/docs/swagger/swagger.json
index 37284e55..871d09b0 100644
--- a/apps/jan-api-gateway/application/docs/swagger.json
+++ b/services/llm-api/docs/swagger/swagger.json
@@ -1,16 +1,62 @@
 {
     "swagger": "2.0",
     "info": {
-        "description": "This is the API gateway for Jan Server.",
-        "title": "Jan Server",
-        "contact": {},
-        "version": "1.0"
+        "description": "OpenAI-compatible LLM API platform with enterprise authentication, conversation management, and streaming support.",
+        "title": "Jan Server LLM API",
+        "contact": {
+            "name": "Jan Server Team",
+            "url": "https://github.com/janhq/jan-server"
+        },
+        "version": "2.0"
     },
     "basePath": "/",
     "paths": {
-        "/v1/auth/google/callback": {
+        "/auth/api-keys": {
+            "get": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Returns all API keys created by the authenticated user. Key values are not returned, only metadata.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Authentication API"
+                ],
+                "summary": "List user's API keys",
+                "responses": {
+                    "200": {
+                        "description": "List of API keys with metadata",
+                        "schema": {
+                            "type": "object"
+                        }
+                    },
+                    "401": {
+                        "description": "Unauthorized - invalid or expired token",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    }
+                }
+            },
             "post": {
-                "description": "Handles the callback from the Google OAuth2 provider to exchange the authorization code for a token, verify the user, and issue access and refresh tokens.",
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Creates a new API key for the authenticated user. API keys provide programmatic access without requiring user credentials.",
                 "consumes": [
                     "application/json"
                 ],
@@ -20,104 +66,172 @@
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Google OAuth2 Callback",
+                "summary": "Create API key",
                 "parameters": [
                     {
-                        "description": "Request body containing the authorization code and state",
+                        "description": "API key creation request with name and optional scopes",
                         "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest"
+                            "type": "object"
                         }
                     }
                 ],
                 "responses": {
-                    "200": {
-                        "description": "Successfully authenticated and returned tokens",
+                    "201": {
+                        "description": "API key created successfully with key value",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.AccessTokenResponse"
+                            "type": "object"
                         }
                     },
                     "400": {
-                        "description": "Bad request (e.g., invalid state, missing code, or invalid claims)",
+                        "description": "Invalid request - missing required fields",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized (e.g., a user claim is not found or is invalid in the context)",
+                        "description": "Unauthorized - invalid or expired token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/google/login": {
-            "get": {
-                "description": "Redirects the user to the Google OAuth2 authorization page to initiate the login process.",
+        "/auth/api-keys/{id}": {
+            "delete": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Revokes and deletes an API key by ID. Deleted keys can no longer be used for authentication.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Google OAuth2 Login",
+                "summary": "Delete API key",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "API key ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
                 "responses": {
-                    "200": {
-                        "description": "redirect url",
+                    "204": {
+                        "description": "API key deleted successfully"
+                    },
+                    "401": {
+                        "description": "Unauthorized - invalid or expired token",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth_google.GoogleLoginUrl"
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "API key not found",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/guest-login": {
-            "post": {
-                "description": "JWT-base Guest Login.",
+        "/auth/callback": {
+            "get": {
+                "description": "Handles the OAuth2 callback from Keycloak, exchanges authorization code for JWT tokens",
+                "consumes": [
+                    "application/json"
+                ],
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Guest Login",
+                "summary": "Handle Keycloak OAuth2 callback",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Authorization code from Keycloak",
+                        "name": "code",
+                        "in": "query",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "State parameter for CSRF protection",
+                        "name": "state",
+                        "in": "query",
+                        "required": true
+                    }
+                ],
                 "responses": {
                     "200": {
-                        "description": "Successfully refreshed the access token",
+                        "description": "JWT tokens",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse"
+                            "type": "object",
+                            "properties": {
+                                "access_token": {
+                                    "type": "string"
+                                },
+                                "expires_in": {
+                                    "type": "integer"
+                                },
+                                "refresh_token": {
+                                    "type": "string"
+                                },
+                                "token_type": {
+                                    "type": "string"
+                                }
+                            }
                         }
                     },
                     "400": {
-                        "description": "Bad Request (e.g., invalid refresh token)",
+                        "description": "Missing code or state",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized (e.g., expired or missing refresh token)",
+                        "description": "Invalid state parameter",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Failed to exchange code for tokens",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/logout": {
-            "get": {
-                "description": "Use a valid refresh token to obtain a new access token. The refresh token is typically sent in a cookie.",
+        "/auth/guest-login": {
+            "post": {
+                "description": "Creates a temporary guest user account and returns JWT tokens. Guest users have limited access and can be upgraded to full accounts later.",
                 "consumes": [
                     "application/json"
                 ],
@@ -127,60 +241,76 @@
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Refresh an access token",
+                "summary": "Create guest user account",
                 "responses": {
                     "200": {
-                        "description": "Successfully logout"
-                    },
-                    "400": {
-                        "description": "Bad Request (e.g., invalid refresh token)",
+                        "description": "Guest user created with access and refresh tokens",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized (e.g., expired or missing refresh token)",
+                    "500": {
+                        "description": "Internal server error - failed to create guest user",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/me": {
+        "/auth/login": {
             "get": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
+                "description": "Returns the Keycloak authorization URL for frontend to redirect users. Supports OAuth2 authorization code flow with PKCE.",
+                "consumes": [
+                    "application/json"
                 ],
-                "description": "Retrieves the profile of the authenticated user based on the provided JWT.",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Get user profile",
+                "summary": "Initiate Keycloak OAuth2 login",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "URL to redirect after successful login",
+                        "name": "redirect_url",
+                        "in": "query"
+                    }
+                ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved user profile",
+                        "description": "Authorization URL and state parameter",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.GetMeResponse"
+                            "type": "object",
+                            "properties": {
+                                "authorization_url": {
+                                    "type": "string"
+                                },
+                                "state": {
+                                    "type": "string"
+                                }
+                            }
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized (e.g., missing or invalid JWT)",
+                    "500": {
+                        "description": "Failed to initiate login",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/auth/refresh-token": {
+        "/auth/logout": {
             "get": {
-                "description": "Use a valid refresh token to obtain a new access token. The refresh token is typically sent in a cookie.",
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Revokes the current access token and clears authentication cookies. After logout, the user must re-authenticate.",
                 "consumes": [
                     "application/json"
                 ],
@@ -190,196 +320,178 @@
                 "tags": [
                     "Authentication API"
                 ],
-                "summary": "Refresh an access token",
+                "summary": "Logout user",
                 "responses": {
                     "200": {
-                        "description": "Successfully refreshed the access token",
+                        "description": "Successfully logged out",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_auth.AccessTokenResponse"
+                            "type": "object"
                         }
                     },
-                    "400": {
-                        "description": "Bad Request (e.g., invalid refresh token)",
+                    "401": {
+                        "description": "Unauthorized - invalid token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized (e.g., expired or missing refresh token)",
+                    "500": {
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/chat/completions": {
-            "post": {
+        "/auth/me": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Generates a model response for the given chat conversation. This is a standard chat completion API that supports both streaming and non-streaming modes without conversation persistence.\n\n**Streaming Mode (stream=true):**\n- Returns Server-Sent Events (SSE) with real-time streaming\n- Streams completion chunks directly from the inference model\n- Final event contains \"[DONE]\" marker\n\n**Non-Streaming Mode (stream=false or omitted):**\n- Returns single JSON response with complete completion\n- Standard OpenAI ChatCompletionResponse format\n\n**Features:**\n- Supports all OpenAI ChatCompletionRequest parameters\n- User authentication required\n- Direct inference model integration\n- No conversation persistence (stateless)",
+                "description": "Returns the authenticated user's profile information including user ID, email, roles, and guest status.",
                 "consumes": [
                     "application/json"
                 ],
                 "produces": [
-                    "application/json",
-                    "text/event-stream"
+                    "application/json"
                 ],
                 "tags": [
-                    "Chat Completions API"
-                ],
-                "summary": "Create a chat completion",
-                "parameters": [
-                    {
-                        "description": "Chat completion request with streaming options",
-                        "name": "request",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/openai.ChatCompletionRequest"
-                        }
-                    }
+                    "Authentication API"
                 ],
+                "summary": "Get current user information",
                 "responses": {
                     "200": {
-                        "description": "Successful streaming response (when stream=true) - SSE format with data: {json} events",
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request payload, empty messages, or inference failure",
+                        "description": "User profile information",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - missing or invalid authentication",
+                        "description": "Unauthorized - invalid or expired token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conv/chat/completions": {
+        "/auth/refresh-token": {
             "post": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Generates a model response for the given chat conversation with conversation persistence and management. This is the conversation-aware version of the chat completion API that supports both streaming and non-streaming modes with conversation management and storage options.\n\n**Streaming Mode (stream=true):**\n- Returns Server-Sent Events (SSE) with real-time streaming\n- First event contains conversation metadata\n- Subsequent events contain completion chunks\n- Final event contains \"[DONE]\" marker\n\n**Non-Streaming Mode (stream=false or omitted):**\n- Returns single JSON response with complete completion\n- Includes conversation metadata in response\n\n**Storage Options:**\n- `store=true`: Saves user message and assistant response to conversation\n- `store_reasoning=true`: Includes reasoning content in stored messages\n- `conversation`: ID of existing conversation or empty for new conversation\n\n**Features:**\n- Conversation persistence and history management\n- Extended request format with conversation and storage options\n- User authentication required\n- Automatic conversation creation and management",
+                "description": "Exchanges a valid refresh token for a new access token. Refresh token must be provided in Authorization header or refresh_token cookie.",
                 "consumes": [
                     "application/json"
                 ],
                 "produces": [
-                    "application/json",
-                    "text/event-stream"
+                    "application/json"
                 ],
                 "tags": [
-                    "Conversation-aware Chat API"
+                    "Authentication API"
                 ],
-                "summary": "Create a conversation-aware chat completion",
+                "summary": "Refresh access token",
                 "parameters": [
                     {
-                        "description": "Extended chat completion request with streaming, storage, and conversation options",
-                        "name": "request",
+                        "description": "Refresh token (can also be in Authorization header)",
+                        "name": "refresh_token",
                         "in": "body",
-                        "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conv.ExtendedChatCompletionRequest"
+                            "type": "string"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successful streaming response (when stream=true) - SSE format with data: {json} events",
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request payload or conversation not found",
+                        "description": "New access token and refresh token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - missing or invalid authentication",
+                        "description": "Unauthorized - invalid or expired refresh token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Conversation not found or user not found",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conv/mcp": {
+        "/auth/revoke": {
             "post": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Handles Model Context Protocol (MCP) requests over an HTTP stream for conversation-aware chat functionality. The response is sent as a continuous stream of data with conversation context.",
+                "description": "Revokes a refresh token to invalidate it",
                 "consumes": [
                     "application/json"
                 ],
                 "produces": [
-                    "text/event-stream"
+                    "application/json"
                 ],
                 "tags": [
-                    "Conversation-aware Chat API"
+                    "Authentication API"
                 ],
-                "summary": "MCP streamable endpoint for conversation-aware chat",
+                "summary": "Revoke Keycloak refresh token",
                 "parameters": [
                     {
-                        "description": "MCP request payload",
+                        "description": "Token to revoke",
                         "name": "request",
                         "in": "body",
                         "required": true,
-                        "schema": {}
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "refresh_token": {
+                                    "type": "string"
+                                }
+                            }
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Streamed response (SSE or chunked transfer)",
+                        "description": "Token revoked successfully",
                         "schema": {
-                            "type": "string"
+                            "type": "object",
+                            "properties": {
+                                "message": {
+                                    "type": "string"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request body",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Keycloak OAuth is not configured",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conv/models": {
-            "get": {
+        "/auth/upgrade": {
+            "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a list of available models that can be used for conversation-aware chat completions. This endpoint provides the same model list as the standard /v1/models endpoint but is specifically designed for conversation-aware chat functionality.",
+                "description": "Converts a guest user account to a permanent account with email/password credentials. Guest flag is removed and user gains full access.",
                 "consumes": [
                     "application/json"
                 ],
@@ -387,92 +499,103 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Conversation-aware Chat API"
+                    "Authentication API"
+                ],
+                "summary": "Upgrade guest to permanent account",
+                "parameters": [
+                    {
+                        "description": "Upgrade request with email and password",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "type": "object"
+                        }
+                    }
                 ],
-                "summary": "List available models for conversation-aware chat",
                 "responses": {
                     "200": {
-                        "description": "Successful response",
+                        "description": "Account upgraded successfully with new tokens",
+                        "schema": {
+                            "type": "object"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request - missing email or password",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conv.ModelsResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - missing or invalid authentication",
+                        "description": "Unauthorized - not a guest user or invalid token",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conversations": {
-            "get": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
+        "/auth/validate": {
+            "post": {
+                "description": "Validates an access token against Keycloak's userinfo endpoint",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
                 ],
-                "description": "Retrieves a paginated list of conversations for the authenticated user with OpenAI-compatible response format.",
                 "tags": [
-                    "Conversations API"
+                    "Authentication API"
                 ],
-                "summary": "List Conversations",
+                "summary": "Validate Keycloak access token",
                 "parameters": [
-                    {
-                        "type": "integer",
-                        "default": 20,
-                        "description": "The maximum number of items to return",
-                        "name": "limit",
-                        "in": "query"
-                    },
-                    {
-                        "type": "string",
-                        "description": "A cursor for use in pagination. The ID of the last object from the previous page",
-                        "name": "after",
-                        "in": "query"
-                    },
                     {
                         "type": "string",
-                        "description": "Order of items (asc/desc)",
-                        "name": "order",
-                        "in": "query"
+                        "description": "Bearer token",
+                        "name": "Authorization",
+                        "in": "header",
+                        "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved the list of conversations",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ExtendedConversationResponse"
-                        }
-                    },
-                    "400": {
-                        "description": "Bad Request - Invalid pagination parameters",
+                        "description": "Token is valid with user information",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object",
+                            "properties": {
+                                "user_info": {
+                                    "type": "object"
+                                },
+                                "valid": {
+                                    "type": "boolean"
+                                }
+                            }
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Invalid or expired token",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Keycloak OAuth is not configured",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
+            }
+        },
+        "/auth/validate-api-key": {
             "post": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Creates a new conversation for the authenticated user with optional items",
+                "description": "Internal endpoint used by Kong API Gateway to validate API keys. Not intended for direct client use.",
                 "consumes": [
                     "application/json"
                 ],
@@ -480,157 +603,215 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Authentication API"
                 ],
-                "summary": "Create a conversation",
+                "summary": "Validate API key (Kong Plugin)",
                 "parameters": [
                     {
-                        "description": "Create conversation request",
+                        "description": "API key validation request",
                         "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.CreateConversationRequest"
+                            "type": "object"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Created conversation",
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request - Bad payload, too many items, or invalid item format",
+                        "description": "API key is valid with user information",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized",
+                        "description": "Invalid API key",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conversations/{conversation_id}": {
+        "/v1/admin/models/catalogs": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a conversation by its ID with full metadata and title",
+                "description": "Retrieves a paginated list of model catalogs with optional filtering and searching",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Get a conversation",
+                "summary": "List all model catalogs",
                 "parameters": [
+                    {
+                        "type": "integer",
+                        "description": "Number of records to return (default: 20, max: 100)",
+                        "name": "limit",
+                        "in": "query"
+                    },
+                    {
+                        "type": "integer",
+                        "description": "Number of records to skip for pagination",
+                        "name": "offset",
+                        "in": "query"
+                    },
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
-                        "in": "path",
-                        "required": true
+                        "description": "Sort order: asc or desc (default: desc)",
+                        "name": "order",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Filter by status: init, filled, updated",
+                        "name": "status",
+                        "in": "query"
+                    },
+                    {
+                        "type": "boolean",
+                        "description": "Filter by moderation status",
+                        "name": "is_moderated",
+                        "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Conversation details",
+                        "description": "List of model catalogs",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse"
+                            "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized",
+                    "400": {
+                        "description": "Invalid query parameters",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/admin/models/catalogs/bulk-toggle": {
+            "post": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "Enable or disable provider models for specific catalogs or ALL catalogs, with optional exception list. Supports \"enable/disable all except\" patterns globally or scoped to catalogs.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Admin Model API"
+                ],
+                "summary": "Bulk enable/disable provider models by catalog IDs or all catalogs",
+                "parameters": [
+                    {
+                        "description": "Bulk toggle request. If catalog_ids is empty, applies to ALL catalogs. Use except_models to exclude specific models.",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requestmodels.BulkToggleCatalogsRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Bulk operation result with counts and status",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/modelresponses.BulkOperationResponse"
                         }
                     },
-                    "403": {
-                        "description": "Access denied",
+                    "400": {
+                        "description": "Invalid request - exceeds limits or validation error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation not found",
+                        "description": "One or more catalog IDs not found (when catalog_ids provided)",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal server error during bulk operation",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
-            "delete": {
+            }
+        },
+        "/v1/admin/models/catalogs/{model_public_id}": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Deletes a conversation and all its items permanently",
+                "description": "Retrieves detailed information about a model catalog entry by its public ID (supports IDs with slashes)",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Delete a conversation",
+                "summary": "Get a model catalog entry",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
+                        "description": "Model Catalog Public ID (can contain slashes)",
+                        "name": "model_public_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Deleted conversation",
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.DeletedConversationResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
+                        "description": "Model catalog details",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
                         }
                     },
-                    "403": {
-                        "description": "Access denied",
+                    "400": {
+                        "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation not found",
+                        "description": "Model catalog not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -641,7 +822,7 @@
                         "BearerAuth": []
                     }
                 ],
-                "description": "Updates conversation title and/or metadata",
+                "description": "Updates metadata for a model catalog entry. Marks it as manually updated to prevent auto-sync overwrites.",
                 "consumes": [
                     "application/json"
                 ],
@@ -649,155 +830,144 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Update a conversation",
+                "summary": "Update a model catalog entry",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
+                        "description": "Model Catalog Public ID (can contain slashes)",
+                        "name": "model_public_id",
                         "in": "path",
                         "required": true
                     },
                     {
-                        "description": "Update conversation request",
-                        "name": "request",
+                        "description": "Update payload",
+                        "name": "payload",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.UpdateConversationRequest"
+                            "$ref": "#/definitions/requestmodels.UpdateModelCatalogRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Updated conversation",
+                        "description": "Updated model catalog",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse"
+                            "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request payload or update failed",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
+                        "description": "Invalid request payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation not found",
+                        "description": "Model catalog not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conversations/{conversation_id}/items": {
+        "/v1/admin/models/provider-models": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Lists all items in a conversation with OpenAI-compatible pagination",
+                "description": "Retrieves a paginated list of provider models with optional filtering",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "List items in a conversation",
+                "summary": "List all provider models",
                 "parameters": [
                     {
-                        "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
-                        "in": "path",
-                        "required": true
+                        "type": "integer",
+                        "description": "Number of records to return (default: 20, max: 100)",
+                        "name": "limit",
+                        "in": "query"
                     },
                     {
                         "type": "integer",
-                        "description": "Number of items to return (1-100)",
-                        "name": "limit",
+                        "description": "Number of records to skip for pagination",
+                        "name": "offset",
                         "in": "query"
                     },
                     {
                         "type": "string",
-                        "description": "Cursor for pagination - ID of the last item from previous page",
-                        "name": "after",
+                        "description": "Sort order: asc or desc (default: desc)",
+                        "name": "order",
                         "in": "query"
                     },
                     {
                         "type": "string",
-                        "description": "Order of items (asc/desc)",
-                        "name": "order",
+                        "description": "Filter by provider public ID",
+                        "name": "provider_id",
+                        "in": "query"
+                    },
+                    {
+                        "type": "string",
+                        "description": "Filter by model key",
+                        "name": "model_key",
+                        "in": "query"
+                    },
+                    {
+                        "type": "boolean",
+                        "description": "Filter by active status",
+                        "name": "active",
+                        "in": "query"
+                    },
+                    {
+                        "type": "boolean",
+                        "description": "Filter by image support",
+                        "name": "supports_images",
                         "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "List of items",
+                        "description": "List of provider models",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse"
+                            "$ref": "#/definitions/modelresponses.ProviderModelResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad Request - Invalid pagination parameters",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
+                        "description": "Invalid query parameters",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Conversation not found",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
+            }
+        },
+        "/v1/admin/models/provider-models/bulk-toggle": {
             "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Adds multiple items to a conversation with OpenAI-compatible format",
+                "description": "Enables or disables provider models with flexible patterns: enable all, disable all, enable all except, or disable all except. Optionally filter by provider.",
                 "consumes": [
                     "application/json"
                 ],
@@ -805,246 +975,196 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Create items in a conversation",
+                "summary": "Bulk enable or disable provider models",
                 "parameters": [
                     {
-                        "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
-                        "in": "path",
-                        "required": true
-                    },
-                    {
-                        "description": "Create items request",
-                        "name": "request",
+                        "description": "Bulk toggle payload with enable flag, optional provider filter, and exception list",
+                        "name": "payload",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.CreateItemsRequest"
+                            "$ref": "#/definitions/requestmodels.BulkEnableModelsRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Created items",
+                        "description": "Bulk operation result with counts and status",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse"
+                            "$ref": "#/definitions/modelresponses.BulkOperationResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request payload or invalid item format",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Conversation not found",
+                        "description": "Invalid request payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/conversations/{conversation_id}/items/{item_id}": {
+        "/v1/admin/models/provider-models/{provider_model_public_id}": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a specific item from a conversation with full content details",
+                "description": "Retrieves detailed information about a provider model by its public ID",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Get an item from a conversation",
+                "summary": "Get a provider model",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
-                        "in": "path",
-                        "required": true
-                    },
-                    {
-                        "type": "string",
-                        "description": "Item ID",
-                        "name": "item_id",
+                        "description": "Provider Model Public ID",
+                        "name": "provider_model_public_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Item details",
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
+                        "description": "Provider model details",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/modelresponses.ProviderModelResponse"
                         }
                     },
-                    "403": {
-                        "description": "Access denied",
+                    "400": {
+                        "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation or item not found",
+                        "description": "Provider model not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             },
-            "delete": {
+            "patch": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Deletes a specific item from a conversation and returns the deleted item details",
+                "description": "Updates configuration for a provider model including pricing, limits, and feature flags",
+                "consumes": [
+                    "application/json"
+                ],
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Conversations API"
+                    "Admin Model API"
                 ],
-                "summary": "Delete an item from a conversation",
+                "summary": "Update a provider model",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Conversation ID",
-                        "name": "conversation_id",
+                        "description": "Provider Model Public ID",
+                        "name": "provider_model_public_id",
                         "in": "path",
                         "required": true
                     },
                     {
-                        "type": "string",
-                        "description": "Item ID",
-                        "name": "item_id",
-                        "in": "path",
-                        "required": true
+                        "description": "Update payload",
+                        "name": "payload",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requestmodels.UpdateProviderModelRequest"
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Deleted item details",
+                        "description": "Updated provider model",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
+                            "$ref": "#/definitions/modelresponses.ProviderModelResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad Request - Deletion failed",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
+                        "description": "Invalid request payload",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Conversation or item not found",
+                        "description": "Provider model not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/mcp": {
-            "post": {
+        "/v1/admin/providers": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Handles Model Context Protocol (MCP) requests over an HTTP stream. The response is sent as a continuous stream of data.",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Retrieves all providers with their model counts",
                 "produces": [
-                    "text/event-stream"
+                    "application/json"
                 ],
                 "tags": [
-                    "Chat Completions API"
-                ],
-                "summary": "MCP streamable endpoint",
-                "parameters": [
-                    {
-                        "description": "MCP request payload",
-                        "name": "request",
-                        "in": "body",
-                        "required": true,
-                        "schema": {}
-                    }
+                    "Admin Provider API"
                 ],
+                "summary": "Get all providers",
                 "responses": {
                     "200": {
-                        "description": "Streamed response (SSE or chunked transfer)",
+                        "description": "List of providers with model counts",
                         "schema": {
-                            "type": "string"
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/modelresponses.ProviderWithModelCountResponse"
+                            }
+                        }
+                    },
+                    "500": {
+                        "description": "Failed to retrieve providers",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            }
-        },
-        "/v1/models": {
-            "get": {
+            },
+            "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a list of available models that can be used for chat completions or other tasks.",
+                "description": "Registers a new provider and synchronizes its available models.",
                 "consumes": [
                     "application/json"
                 ],
@@ -1052,250 +1172,234 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Chat Completions API"
+                    "Admin Provider API"
+                ],
+                "summary": "Register a provider",
+                "parameters": [
+                    {
+                        "description": "Provider registration payload",
+                        "name": "payload",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requestmodels.AddProviderRequest"
+                        }
+                    }
                 ],
-                "summary": "List available models",
                 "responses": {
                     "200": {
-                        "description": "Successful response",
+                        "description": "Registered provider with synced models",
+                        "schema": {
+                            "$ref": "#/definitions/modelresponses.ProviderWithModelsResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request payload",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Failed to register provider",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1.ModelsResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/admin_api_keys": {
-            "get": {
+        "/v1/admin/providers/{provider_public_id}": {
+            "patch": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a paginated list of all admin API keys for the authenticated organization.",
+                "description": "Updates an existing provider's configuration",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Admin Provider API"
                 ],
-                "summary": "List Admin API Keys",
+                "summary": "Update a provider",
                 "parameters": [
                     {
-                        "type": "integer",
-                        "default": 20,
-                        "description": "The maximum number of items to return",
-                        "name": "limit",
-                        "in": "query"
+                        "type": "string",
+                        "description": "Provider public ID",
+                        "name": "provider_public_id",
+                        "in": "path",
+                        "required": true
                     },
                     {
-                        "type": "string",
-                        "description": "A cursor for use in pagination. The ID of the last object from the previous page",
-                        "name": "after",
-                        "in": "query"
+                        "description": "Provider update payload",
+                        "name": "payload",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requestmodels.UpdateProviderRequest"
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved the list of admin API keys",
+                        "description": "Updated provider",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse"
+                            "$ref": "#/definitions/modelresponses.ProviderResponse"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                    "400": {
+                        "description": "Invalid request payload",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "Provider not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Failed to update provider",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
+            }
+        },
+        "/v1/chat/completions": {
             "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new admin API key for an organization. Requires a valid admin API key in the Authorization header.",
+                "description": "Generates a model response for the given chat conversation. This is a standard chat completion API that supports both streaming and non-streaming modes without conversation persistence.\n\n**Streaming Mode (stream=true):**\n- Returns Server-Sent Events (SSE) with real-time streaming\n- Streams completion chunks directly from the inference model\n- Final event contains \"[DONE]\" marker\n\n**Non-Streaming Mode (stream=false or omitted):**\n- Returns single JSON response with complete completion\n- Standard OpenAI ChatCompletionResponse format\n\n**Storage Options:**\n- `store=true`: Persist the latest input message and assistant response to the active conversation\n- `store_reasoning=true`: Additionally persist reasoning content provided by the model\n- When `store` is omitted or false, the conversation remains read-only\n\n**Features:**\n- Supports all OpenAI ChatCompletionRequest parameters\n- Optional conversation context for conversation persistence\n- User authentication required\n- Direct inference model integration",
                 "consumes": [
                     "application/json"
                 ],
                 "produces": [
-                    "application/json"
+                    "application/json",
+                    "text/event-stream"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Chat Completions API"
                 ],
-                "summary": "Create Admin API Key",
+                "summary": "Create a chat completion",
                 "parameters": [
                     {
-                        "description": "API key creation request",
-                        "name": "body",
+                        "description": "Chat completion request with streaming options and optional conversation",
+                        "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest"
+                            "$ref": "#/definitions/chatrequests.ChatCompletionRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully created admin API key",
+                        "description": "Successful streaming response (when stream=true) - SSE format with data: {json} events",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
+                            "type": "string"
                         }
                     },
                     "400": {
-                        "description": "Bad request - invalid payload",
+                        "description": "Invalid request payload, empty messages, or inference failure",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/admin_api_keys/{id}": {
+        "/v1/conversations": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a specific admin API key by its ID.",
+                "description": "List conversations for the authenticated user with optional referrer filtering.",
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Get Admin API Key",
+                "summary": "List conversations",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "ID of the admin API key",
-                        "name": "id",
-                        "in": "path",
-                        "required": true
-                    }
-                ],
-                "responses": {
-                    "200": {
-                        "description": "Successfully retrieved the admin API key",
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
+                        "description": "Referrer filter",
+                        "name": "referrer",
+                        "in": "query"
                     },
-                    "404": {
-                        "description": "Not Found - API key with the given ID does not exist or does not belong to the organization",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    }
-                }
-            },
-            "delete": {
-                "security": [
                     {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Deletes an admin API key by its ID.",
-                "tags": [
-                    "Administration API"
-                ],
-                "summary": "Delete Admin API Key",
-                "parameters": [
+                        "type": "integer",
+                        "description": "Maximum number of conversations to return",
+                        "name": "limit",
+                        "in": "query"
+                    },
                     {
                         "type": "string",
-                        "description": "ID of the admin API key to delete",
-                        "name": "id",
-                        "in": "path",
-                        "required": true
-                    }
-                ],
-                "responses": {
-                    "200": {
-                        "description": "Successfully deleted the admin API key",
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
+                        "description": "Return conversations created after the given numeric ID",
+                        "name": "after",
+                        "in": "query"
                     },
-                    "404": {
-                        "description": "Not Found - API key with the given ID does not exist or does not belong to the organization",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    }
-                }
-            }
-        },
-        "/v1/organization/invites": {
-            "get": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Retrieves a paginated list of invites for the current organization.",
-                "tags": [
-                    "Administration API"
-                ],
-                "summary": "List Organization Invites",
-                "parameters": [
                     {
                         "type": "string",
-                        "description": "Cursor pointing to a record after which to fetch results",
-                        "name": "after",
+                        "description": "Sort order (asc or desc)",
+                        "name": "order",
                         "in": "query"
                     },
                     {
-                        "type": "integer",
-                        "description": "Maximum number of results to return",
-                        "name": "limit",
+                        "type": "string",
+                        "description": "Set to 'all' to list conversations across the workspace (requires elevated permissions)",
+                        "name": "scope",
                         "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved list of invites",
+                        "description": "Successfully retrieved conversations",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_organization_invites_InviteResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationListResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid or missing query parameter",
+                        "description": "Invalid request parameters",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1306,7 +1410,7 @@
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new invite for a user to join the organization.",
+                "description": "Create a new conversation to store and retrieve conversation state across Response API calls\n\n**Features:**\n- Create conversation with optional metadata (max 16 key-value pairs)\n- Add up to 20 initial items to the conversation\n- Returns conversation ID with `conv_` prefix\n- Supports OpenAI Conversations API format\n\n**Metadata Constraints:**\n- Maximum 16 key-value pairs\n- Keys: max 64 characters\n- Values: max 512 characters",
                 "consumes": [
                     "application/json"
                 ],
@@ -1314,143 +1418,169 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Create Invite",
+                "summary": "Create a conversation",
                 "parameters": [
                     {
-                        "description": "Invite request payload",
-                        "name": "invite",
+                        "description": "Create conversation request with optional items and metadata",
+                        "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.CreateInviteUserRequest"
+                            "$ref": "#/definitions/conversationrequests.CreateConversationRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully created invite",
+                        "description": "Successfully created conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request payload or user already exists",
+                        "description": "Invalid request - validation failed or too many items",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal server error - conversation creation failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/invites/verification": {
-            "post": {
+        "/v1/conversations/{conv_public_id}": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Verifies an invitation code, checks expiration, registers the user if necessary, and assigns project memberships.",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Retrieve a conversation by ID with ownership verification\n\n**Features:**\n- Retrieves conversation metadata including creation timestamp\n- Automatic ownership verification (user can only access their own conversations)\n- Returns OpenAI-compatible conversation object\n\n**Response Fields:**\n- `id`: Conversation ID with `conv_` prefix\n- `object`: Always \"conversation\"\n- `created_at`: Unix timestamp\n- `metadata`: User-defined key-value pairs",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Verify Invite",
+                "summary": "Get a conversation",
                 "parameters": [
                     {
-                        "description": "Verification request payload",
-                        "name": "verification",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.VerifyInviteUserRequest"
-                        }
+                        "type": "string",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
+                        "in": "path",
+                        "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully verified invite",
+                        "description": "Successfully retrieved conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid or expired invite code",
+                        "description": "Invalid conversation ID format",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "Conversation not found or access denied",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            }
-        },
-        "/v1/organization/invites/{invite_id}": {
-            "get": {
+            },
+            "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a specific invite by its ID.",
+                "description": "Update a conversation's metadata while preserving existing items\n\n**Features:**\n- Update metadata key-value pairs\n- Replaces entire metadata object (not merged)\n- Items remain unchanged\n- Automatic ownership verification\n\n**Metadata Constraints:**\n- Maximum 16 key-value pairs\n- Keys: max 64 characters\n- Values: max 512 characters",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Retrieve Invite",
+                "summary": "Update a conversation",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Public ID of the invite",
-                        "name": "invite_id",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
                         "in": "path",
                         "required": true
+                    },
+                    {
+                        "description": "Update conversation request with new metadata",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/conversationrequests.UpdateConversationRequest"
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved invite",
+                        "description": "Successfully updated conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request - validation failed or invalid metadata",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Invite not found",
+                        "description": "Conversation not found or access denied",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error - update failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1461,98 +1591,146 @@
                         "BearerAuth": []
                     }
                 ],
-                "description": "Deletes a specific invite by its ID. Only organization owners can delete invites.",
+                "description": "Delete a conversation (soft delete). Items in the conversation will not be deleted but will be inaccessible.\n\n**Features:**\n- Soft delete (conversation marked as deleted, not physically removed)\n- Items remain in database but become inaccessible\n- Automatic ownership verification\n- Returns deletion confirmation with conversation ID\n\n**Response:**\n- `id`: Deleted conversation ID\n- `object`: Always \"conversation.deleted\"\n- `deleted`: Always true",
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Delete Invite",
+                "summary": "Delete a conversation",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Public ID of the invite",
-                        "name": "invite_id",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully deleted invite",
+                        "description": "Successfully deleted conversation",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.DeleteResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationDeletedResponse"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                    "400": {
+                        "description": "Invalid conversation ID format",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
-                    "403": {
-                        "description": "Forbidden - only owners can delete invites",
+                    "401": {
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Invite not found",
+                        "description": "Conversation not found or access denied",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error - deletion failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/projects": {
+        "/v1/conversations/{conv_public_id}/items": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a paginated list of all projects for the authenticated organization.",
+                "description": "List all items in a conversation with cursor-based pagination support\n\n**Features:**\n- Cursor-based pagination using item IDs\n- Configurable page size (1-100 items, default 20)\n- Sort order control (ascending or descending)\n- Optional include parameter for additional fields\n- Returns paginated list with navigation cursors\n\n**Pagination:**\n- Use `after` cursor from previous response for next page\n- `has_more` indicates if more items are available\n- `first_id` and `last_id` provide cursor references\n\n**Query Parameters:**\n- `limit`: Number of items (1-100, default 20)\n- `order`: Sort order (\"asc\" or \"desc\", default \"desc\")\n- `after`: Item ID cursor for pagination\n- `include`: Additional fields to include (optional)",
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "List Projects",
+                "summary": "List conversation items",
                 "parameters": [
                     {
+                        "type": "string",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Item ID cursor to list items after (pagination)",
+                        "name": "after",
+                        "in": "query"
+                    },
+                    {
+                        "maximum": 100,
+                        "minimum": 1,
                         "type": "integer",
                         "default": 20,
-                        "description": "The maximum number of items to return",
+                        "description": "Number of items to return (1-100)",
                         "name": "limit",
                         "in": "query"
                     },
                     {
+                        "enum": [
+                            "asc",
+                            "desc"
+                        ],
                         "type": "string",
-                        "description": "A cursor for use in pagination. The ID of the last object from the previous page",
-                        "name": "after",
+                        "default": "desc",
+                        "description": "Sort order: asc or desc",
+                        "name": "order",
                         "in": "query"
                     },
                     {
-                        "type": "string",
-                        "description": "Whether to include archived projects.",
-                        "name": "include_archived",
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "collectionFormat": "csv",
+                        "description": "Additional fields to include in response",
+                        "name": "include",
                         "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved the list of projects",
+                        "description": "Successfully retrieved items list",
+                        "schema": {
+                            "$ref": "#/definitions/conversationresponses.ItemListResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request - invalid parameters or conversation ID",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectListResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "Conversation not found or access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Internal server error - listing failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -1563,7 +1741,7 @@
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new project for an organization.",
+                "description": "Add items to a conversation. You may add up to 20 items at a time.\n\n**Features:**\n- Bulk item creation (max 20 items per request)\n- Automatic item ID generation with `msg_` prefix\n- Items added to conversation's active branch (default: MAIN)\n- Returns list of created items with generated IDs\n\n**Item Types:**\n- `message`: User or assistant messages\n- `tool_call`: Tool/function call items\n- `tool_response`: Tool/function response items\n- Other OpenAI-compatible item types\n\n**Constraints:**\n- Maximum 20 items per request\n- Each item must have valid type and content\n- Items are immutable after creation",
                 "consumes": [
                     "application/json"
                 ],
@@ -1571,263 +1749,344 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Create Project",
+                "summary": "Create conversation items",
                 "parameters": [
                     {
-                        "description": "Project creation request",
-                        "name": "body",
+                        "type": "string",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "collectionFormat": "csv",
+                        "description": "Additional fields to include in response",
+                        "name": "include",
+                        "in": "query"
+                    },
+                    {
+                        "description": "Create items request with array of items",
+                        "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest"
+                            "$ref": "#/definitions/conversationrequests.CreateItemsRequest"
                         }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully created project",
+                        "description": "Successfully created items",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationItemCreatedResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad request - invalid payload",
+                        "description": "Invalid request - too many items, invalid format, or validation failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "404": {
+                        "description": "Conversation not found or access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal Server Error",
+                        "description": "Internal server error - item creation failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/projects/{project_id}": {
+        "/v1/conversations/{conv_public_id}/items/{item_id}": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves a specific project by its ID.",
+                "description": "Retrieve a single item from a conversation by item ID\n\n**Features:**\n- Retrieve specific item by ID\n- Returns complete item with all content\n- Automatic ownership verification via conversation\n- Optional include parameter for additional fields\n\n**Response Fields:**\n- `id`: Item ID with `msg_` prefix\n- `type`: Item type (message, tool_call, etc.)\n- `role`: Role for message items (user, assistant)\n- `content`: Item content array\n- `status`: Item status (completed, incomplete, etc.)\n- `created_at`: Unix timestamp",
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Get Project",
+                "summary": "Get a conversation item",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "ID of the project",
-                        "name": "project_id",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Item ID (format: msg_xxxxx)",
+                        "name": "item_id",
                         "in": "path",
                         "required": true
+                    },
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "collectionFormat": "csv",
+                        "description": "Additional fields to include in response",
+                        "name": "include",
+                        "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully retrieved the project",
+                        "description": "Successfully retrieved item",
+                        "schema": {
+                            "$ref": "#/definitions/conversationresponses.ItemResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid conversation ID or item ID format",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Not Found - project with the given ID does not exist or does not belong to the organization",
+                        "description": "Conversation or item not found, or access denied",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             },
-            "post": {
+            "delete": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Updates a specific project by its ID.",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Delete an item from a conversation. The item will be removed from the conversation.\n\n**Features:**\n- Remove specific item from conversation\n- Automatic ownership verification\n- Returns updated conversation object after deletion\n- Items are permanently removed (not soft delete)\n\n**Important:**\n- Deleting an item may affect conversation flow\n- Item IDs are not reused after deletion\n- Other items in conversation remain unchanged\n- Consider creating a new branch instead of deleting items\n\n**Response:**\nReturns the conversation object (not the deleted item)",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Conversations API"
                 ],
-                "summary": "Update Project",
+                "summary": "Delete a conversation item",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "ID of the project to update",
-                        "name": "project_id",
+                        "description": "Conversation ID (format: conv_xxxxx)",
+                        "name": "conv_public_id",
                         "in": "path",
                         "required": true
                     },
                     {
-                        "description": "Project update request",
-                        "name": "body",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest"
-                        }
+                        "type": "string",
+                        "description": "Item ID to delete (format: msg_xxxxx)",
+                        "name": "item_id",
+                        "in": "path",
+                        "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully updated the project",
+                        "description": "Successfully deleted item, returns conversation",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/conversationresponses.ConversationResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad request - invalid payload",
+                        "description": "Invalid conversation ID or item ID format",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                        "description": "Unauthorized - missing or invalid authentication",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Not Found - project with the given ID does not exist",
+                        "description": "Conversation or item not found, or access denied",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error - deletion failed",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/projects/{project_id}/archive": {
-            "post": {
+        "/v1/healthz": {
+            "get": {
+                "description": "Returns the health status of the API server. Used by orchestrators and monitoring systems.",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Server API"
+                ],
+                "summary": "Health check endpoint",
+                "responses": {
+                    "200": {
+                        "description": "Health status OK",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/models": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Archives a specific project by its ID, making it inactive.",
+                "description": "Retrieves a list of available models that can be used for chat completions or other tasks. Returns either simple model list or detailed list with provider metadata based on X-PROVIDER-DATA header.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
                 "tags": [
-                    "Administration API"
+                    "Chat Completions API"
                 ],
-                "summary": "Archive Project",
+                "summary": "List available models",
                 "parameters": [
                     {
+                        "enum": [
+                            "true",
+                            "false"
+                        ],
                         "type": "string",
-                        "description": "ID of the project to archive",
-                        "name": "project_id",
-                        "in": "path",
-                        "required": true
+                        "description": "Set to 'true' to include provider metadata in response",
+                        "name": "X-PROVIDER-DATA",
+                        "in": "header"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Successfully archived the project",
+                        "description": "List of models with provider metadata (when X-PROVIDER-DATA=true)",
                         "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
+                            "$ref": "#/definitions/modelresponses.ModelWithProviderResponseList"
                         }
                     },
-                    "401": {
-                        "description": "Unauthorized - invalid or missing API key",
+                    "404": {
+                        "description": "Models or providers not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
-                    "404": {
-                        "description": "Not Found - project with the given ID does not exist",
+                    "500": {
+                        "description": "Failed to retrieve models",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/organization/projects/{project_public_id}/api_keys": {
+        "/v1/models/catalogs/{model_public_id}": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "List API keys for a specific project.",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Retrieves detailed information about a model catalog entry by its public ID (supports IDs with slashes like openrouter/nova-lite-v1)",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Model API"
                 ],
-                "summary": "List new project API key",
+                "summary": "Get a model catalog entry",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Project Public ID",
-                        "name": "project_public_id",
+                        "description": "Model Catalog Public ID (can contain slashes)",
+                        "name": "model_public_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "API key created successfully",
+                        "description": "Model catalog details",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse"
+                            "$ref": "#/definitions/modelresponses.ModelCatalogResponse"
                         }
                     },
                     "400": {
-                        "description": "Bad request, e.g., invalid payload or missing IDs",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized, e.g., invalid or missing token",
+                        "description": "Invalid request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Not Found, e.g., project or organization not found",
+                        "description": "Model catalog not found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
                         "description": "Internal server error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            },
-            "post": {
+            }
+        },
+        "/v1/models/providers": {
+            "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new API key for a specific project.",
+                "description": "Retrieves a list of available model providers that can be used for inference.",
                 "consumes": [
                     "application/json"
                 ],
@@ -1835,69 +2094,88 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Administration API"
+                    "Model API"
+                ],
+                "summary": "List model providers",
+                "responses": {
+                    "200": {
+                        "description": "List of providers",
+                        "schema": {
+                            "$ref": "#/definitions/modelresponses.ProviderResponseList"
+                        }
+                    },
+                    "500": {
+                        "description": "Failed to retrieve providers",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/projects": {
+            "get": {
+                "security": [
+                    {
+                        "BearerAuth": []
+                    }
+                ],
+                "description": "List all projects for the authenticated user",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Projects API"
                 ],
-                "summary": "Create a new project API key",
+                "summary": "List projects",
                 "parameters": [
+                    {
+                        "type": "integer",
+                        "description": "Maximum number of projects to return",
+                        "name": "limit",
+                        "in": "query"
+                    },
                     {
                         "type": "string",
-                        "description": "Project Public ID",
-                        "name": "project_public_id",
-                        "in": "path",
-                        "required": true
+                        "description": "Return projects after the given numeric ID",
+                        "name": "after",
+                        "in": "query"
                     },
                     {
-                        "description": "Request body for creating an API key",
-                        "name": "requestBody",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest"
-                        }
+                        "type": "string",
+                        "description": "Sort order (asc or desc)",
+                        "name": "order",
+                        "in": "query"
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "API key created successfully",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse"
-                        }
-                    },
-                    "400": {
-                        "description": "Bad request, e.g., invalid payload or missing IDs",
+                        "description": "OK",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/projectres.ProjectListResponse"
                         }
                     },
                     "401": {
-                        "description": "Unauthorized, e.g., invalid or missing token",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Not Found, e.g., project or organization not found",
+                        "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            }
-        },
-        "/v1/responses": {
+            },
             "post": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Creates a new LLM response for the given input. Supports multiple input types including text, images, files, web search, and more.\n\n**Supported Input Types:**\n- `text`: Plain text input\n- `image`: Image input (URL or base64)\n- `file`: File input by file ID\n- `web_search`: Web search input\n- `file_search`: File search input\n- `streaming`: Streaming input\n- `function_calls`: Function calls input\n- `reasoning`: Reasoning input\n\n**Example Request:**\n```json\n{\n\"model\": \"gpt-4\",\n\"input\": {\n\"type\": \"text\",\n\"text\": \"Hello, how are you?\"\n},\n\"max_tokens\": 100,\n\"temperature\": 0.7,\n\"stream\": false,\n\"background\": false\n}\n```\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `id`: Response identifier\n- `object`: Object type (\"response\")\n- `created`: Unix timestamp\n- `model`: Model used\n- `status`: Response status\n- `input`: Input data\n- `output`: Generated output\n\n**Example Response:**\n```json\n{\n\"jan_status\": \"000000\",\n\"id\": \"resp_1234567890\",\n\"object\": \"response\",\n\"created\": 1234567890,\n\"model\": \"gpt-4\",\n\"status\": \"completed\",\n\"input\": {\n\"type\": \"text\",\n\"text\": \"Hello, how are you?\"\n},\n\"output\": {\n\"type\": \"text\",\n\"text\": {\n\"value\": \"I'm doing well, thank you!\"\n}\n}\n}\n```\n\n**Response Status:**\n- `completed`: Response generation finished successfully\n- `processing`: Response is being generated\n- `failed`: Response generation failed\n- `cancelled`: Response was cancelled",
+                "description": "Create a new project for grouping conversations",
                 "consumes": [
                     "application/json"
                 ],
@@ -1905,128 +2183,95 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
+                    "Projects API"
                 ],
-                "summary": "Create a response",
+                "summary": "Create project",
                 "parameters": [
                     {
-                        "description": "Request payload containing model, input, and generation parameters",
+                        "description": "Create project request",
                         "name": "request",
                         "in": "body",
                         "required": true,
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.CreateResponseRequest"
+                            "$ref": "#/definitions/projectreq.CreateProjectRequest"
                         }
                     }
                 ],
                 "responses": {
-                    "200": {
-                        "description": "Created response",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
-                        }
-                    },
-                    "202": {
-                        "description": "Response accepted for background processing",
+                    "201": {
+                        "description": "Created",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
+                            "$ref": "#/definitions/projectres.ProjectResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request payload",
+                        "description": "Bad Request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "422": {
-                        "description": "Validation error",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "429": {
-                        "description": "Rate limit exceeded",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/responses/{response_id}": {
+        "/v1/projects/{project_id}": {
             "get": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Retrieves an LLM response by its ID. Returns the complete response object with embedded structure where all fields are at the top level.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `id`: Response identifier\n- `object`: Object type (\"response\")\n- `created`: Unix timestamp\n- `model`: Model used\n- `status`: Response status\n- `input`: Input data\n- `output`: Generated output",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Get a single project by ID",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
+                    "Projects API"
                 ],
-                "summary": "Get a response",
+                "summary": "Get project",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Unique identifier of the response",
-                        "name": "response_id",
+                        "description": "Project ID",
+                        "name": "project_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Response details",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request",
+                        "description": "OK",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/projectres.ProjectResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Response not found",
+                        "description": "Not Found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
@@ -2037,74 +2282,57 @@
                         "BearerAuth": []
                     }
                 ],
-                "description": "Deletes an LLM response by its ID. Returns the deleted response object with embedded structure where all fields are at the top level.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `id`: Response identifier\n- `object`: Object type (\"response\")\n- `created`: Unix timestamp\n- `model`: Model used\n- `status`: Response status (will be \"cancelled\")\n- `input`: Input data\n- `cancelled_at`: Cancellation timestamp",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Soft-delete a project",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
+                    "Projects API"
                 ],
-                "summary": "Delete a response",
+                "summary": "Delete project",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Unique identifier of the response",
-                        "name": "response_id",
+                        "description": "Project ID",
+                        "name": "project_id",
                         "in": "path",
                         "required": true
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Deleted response",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request",
+                        "description": "OK",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/projectres.ProjectDeletedResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Response not found",
+                        "description": "Not Found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
-            }
-        },
-        "/v1/responses/{response_id}/cancel": {
-            "post": {
+            },
+            "patch": {
                 "security": [
                     {
                         "BearerAuth": []
                     }
                 ],
-                "description": "Cancels a running LLM response that was created with background=true. Only responses that are currently processing can be cancelled.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `id`: Response identifier\n- `object`: Object type (\"response\")\n- `created`: Unix timestamp\n- `model`: Model used\n- `status`: Response status (will be \"cancelled\")\n- `input`: Input data\n- `cancelled_at`: Cancellation timestamp",
+                "description": "Update project name, instruction, or archived status",
                 "consumes": [
                     "application/json"
                 ],
@@ -2112,138 +2340,79 @@
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
+                    "Projects API"
                 ],
-                "summary": "Cancel a response",
+                "summary": "Update project",
                 "parameters": [
                     {
                         "type": "string",
-                        "description": "Unique identifier of the response to cancel",
-                        "name": "response_id",
+                        "description": "Project ID",
+                        "name": "project_id",
                         "in": "path",
                         "required": true
+                    },
+                    {
+                        "description": "Update request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/projectreq.UpdateProjectRequest"
+                        }
                     }
                 ],
                 "responses": {
                     "200": {
-                        "description": "Response cancelled successfully",
+                        "description": "OK",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response"
+                            "$ref": "#/definitions/projectres.ProjectResponse"
                         }
                     },
                     "400": {
-                        "description": "Invalid request or response cannot be cancelled",
+                        "description": "Bad Request",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "401": {
                         "description": "Unauthorized",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "404": {
-                        "description": "Response not found",
+                        "description": "Not Found",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     },
                     "500": {
-                        "description": "Internal server error",
+                        "description": "Internal Server Error",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "$ref": "#/definitions/responses.ErrorResponse"
                         }
                     }
                 }
             }
         },
-        "/v1/responses/{response_id}/input_items": {
+        "/v1/readyz": {
             "get": {
-                "security": [
-                    {
-                        "BearerAuth": []
-                    }
-                ],
-                "description": "Retrieves a paginated list of input items for a response. Supports cursor-based pagination for efficient retrieval of large datasets.\n\n**Response Format:**\nThe response uses embedded structure where all fields are at the top level:\n- `jan_status`: Jan API status code (optional)\n- `first_id`: First item ID for pagination (optional)\n- `last_id`: Last item ID for pagination (optional)\n- `has_more`: Whether more items are available (optional)\n- `id`: Input item identifier\n- `object`: Object type (\"input_item\")\n- `created`: Unix timestamp\n- `type`: Input type\n- `text`: Text content (for text type)\n- `image`: Image content (for image type)\n- `file`: File content (for file type)\n\n**Example Response:**\n```json\n{\n\"jan_status\": \"000000\",\n\"first_id\": \"input_123\",\n\"last_id\": \"input_456\",\n\"has_more\": false,\n\"id\": \"input_1234567890\",\n\"object\": \"input_item\",\n\"created\": 1234567890,\n\"type\": \"text\",\n\"text\": \"Hello, world!\"\n}\n```",
-                "consumes": [
-                    "application/json"
-                ],
+                "description": "Returns the readiness status of the API server. Indicates if the service is ready to accept traffic.",
                 "produces": [
                     "application/json"
                 ],
                 "tags": [
-                    "Responses API"
-                ],
-                "summary": "List input items",
-                "parameters": [
-                    {
-                        "type": "string",
-                        "description": "Unique identifier of the response",
-                        "name": "response_id",
-                        "in": "path",
-                        "required": true
-                    },
-                    {
-                        "type": "integer",
-                        "description": "Maximum number of items to return (default: 20, max: 100)",
-                        "name": "limit",
-                        "in": "query"
-                    },
-                    {
-                        "type": "string",
-                        "description": "Cursor for pagination - return items after this ID",
-                        "name": "after",
-                        "in": "query"
-                    },
-                    {
-                        "type": "string",
-                        "description": "Cursor for pagination - return items before this ID",
-                        "name": "before",
-                        "in": "query"
-                    }
+                    "Server API"
                 ],
+                "summary": "Readiness check endpoint",
                 "responses": {
                     "200": {
-                        "description": "List of input items",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ListInputItemsResponse"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request or pagination parameters",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "401": {
-                        "description": "Unauthorized",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "403": {
-                        "description": "Access denied",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "404": {
-                        "description": "Response not found",
-                        "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
-                        }
-                    },
-                    "500": {
-                        "description": "Internal server error",
+                        "description": "Readiness status ready",
                         "schema": {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse"
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
                         }
                     }
                 }
@@ -2251,7 +2420,7 @@
         },
         "/v1/version": {
             "get": {
-                "description": "Returns the current build version of the API server.",
+                "description": "Returns the current build version of the API server and environment reload timestamp.",
                 "produces": [
                     "application/json"
                 ],
@@ -2261,7 +2430,7 @@
                 "summary": "Get API build version",
                 "responses": {
                     "200": {
-                        "description": "version info",
+                        "description": "Version information including version number and environment reload timestamp",
                         "schema": {
                             "type": "object",
                             "additionalProperties": {
@@ -2274,126 +2443,30 @@
         }
     },
     "definitions": {
-        "app_interfaces_http_routes_v1.Model": {
+        "chatrequests.ChatCompletionRequest": {
             "type": "object",
             "properties": {
-                "created": {
-                    "type": "integer"
+                "chat_template_kwargs": {
+                    "description": "ChatTemplateKwargs provides a way to add non-standard parameters to the request body.\nAdditional kwargs to pass to the template renderer. Will be accessible by the chat template.\nSuch as think mode for qwen3. \"chat_template_kwargs\": {\"enable_thinking\": false}\nhttps://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes",
+                    "type": "object",
+                    "additionalProperties": {}
                 },
-                "id": {
-                    "type": "string"
+                "conversation": {
+                    "description": "Conversation can be either a string (conversation ID) or a conversation object\nItems from this conversation are prepended to Messages for this response request.\nInput items and output items from this response are automatically added to this conversation after completion.",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/chatrequests.ConversationReference"
+                        }
+                    ]
                 },
-                "object": {
-                    "type": "string"
+                "frequency_penalty": {
+                    "type": "number"
                 },
-                "owned_by": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1.ModelsResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1.Model"
-                    }
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth.AccessTokenResponse": {
-            "type": "object",
-            "properties": {
-                "access_token": {
-                    "type": "string"
-                },
-                "expires_in": {
-                    "type": "integer"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth.GetMeResponse": {
-            "type": "object",
-            "properties": {
-                "email": {
-                    "type": "string"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "name": {
-                    "type": "string"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth_google.AccessTokenResponse": {
-            "type": "object",
-            "properties": {
-                "access_token": {
-                    "type": "string"
-                },
-                "expires_in": {
-                    "type": "integer"
-                },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth_google.GoogleCallbackRequest": {
-            "type": "object",
-            "required": [
-                "code"
-            ],
-            "properties": {
-                "code": {
-                    "type": "string"
-                },
-                "state": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_auth_google.GoogleLoginUrl": {
-            "type": "object",
-            "properties": {
-                "object": {
-                    "type": "string"
-                },
-                "url": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conv.ExtendedChatCompletionRequest": {
-            "type": "object",
-            "properties": {
-                "chat_template_kwargs": {
-                    "description": "ChatTemplateKwargs provides a way to add non-standard parameters to the request body.\nAdditional kwargs to pass to the template renderer. Will be accessible by the chat template.\nSuch as think mode for qwen3. \"chat_template_kwargs\": {\"enable_thinking\": false}\nhttps://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "conversation": {
-                    "type": "string"
-                },
-                "frequency_penalty": {
-                    "type": "number"
-                },
-                "function_call": {
-                    "description": "Deprecated: use ToolChoice instead."
-                },
-                "functions": {
-                    "description": "Deprecated: use Tools instead.",
+                "function_call": {
+                    "description": "Deprecated: use ToolChoice instead."
+                },
+                "functions": {
+                    "description": "Deprecated: use Tools instead.",
                     "type": "array",
                     "items": {
                         "$ref": "#/definitions/openai.FunctionDefinition"
@@ -2487,11 +2560,11 @@
                     }
                 },
                 "store": {
-                    "description": "If true, the response will be stored in the conversation, default is false",
+                    "description": "Store controls whether the latest input and generated response should be persisted",
                     "type": "boolean"
                 },
                 "store_reasoning": {
-                    "description": "If true, the reasoning will be stored in the conversation, default is false",
+                    "description": "StoreReasoning controls whether reasoning content (if present) should also be persisted",
                     "type": "boolean"
                 },
                 "stream": {
@@ -2526,10 +2599,17 @@
                 },
                 "user": {
                     "type": "string"
+                },
+                "verbosity": {
+                    "description": "Verbosity determines how many output tokens are generated. Lowering the number of\ntokens reduces overall latency. It can be set to \"low\", \"medium\", or \"high\".\nNote: This field is only confirmed to work with gpt-5, gpt-5-mini and gpt-5-nano.\nAlso, it is not in the API reference of chat completion at the time of writing,\nthough it is supported by the API.",
+                    "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conv.ExtendedCompletionResponse": {
+        "chatrequests.ConversationReference": {
+            "type": "object"
+        },
+        "chatresponses.ChatCompletionResponse": {
             "type": "object",
             "properties": {
                 "choices": {
@@ -2538,15 +2618,15 @@
                         "$ref": "#/definitions/openai.ChatCompletionChoice"
                     }
                 },
+                "conversation": {
+                    "$ref": "#/definitions/chatresponses.ConversationContext"
+                },
                 "created": {
                     "type": "integer"
                 },
                 "id": {
                     "type": "string"
                 },
-                "metadata": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conv.ResponseMetadata"
-                },
                 "model": {
                     "type": "string"
                 },
@@ -2570,1568 +2650,1403 @@
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conv.Model": {
+        "chatresponses.ConversationContext": {
             "type": "object",
             "properties": {
-                "created": {
-                    "type": "integer"
-                },
                 "id": {
+                    "description": "The unique ID of the conversation",
                     "type": "string"
                 },
-                "object": {
-                    "type": "string"
-                },
-                "owned_by": {
+                "title": {
+                    "description": "The title of the conversation (optional)",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conv.ModelsResponse": {
+        "conversation.Annotation": {
             "type": "object",
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conv.Model"
-                    }
+                "bounding_box": {
+                    "description": "Bounding box for image/PDF annotations",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.BBox"
+                        }
+                    ]
                 },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conv.ResponseMetadata": {
-            "type": "object",
-            "properties": {
-                "ask_item_id": {
-                    "type": "string"
+                "confidence": {
+                    "description": "Citation confidence score (0.0-1.0)",
+                    "type": "number"
                 },
-                "completion_item_id": {
+                "container_id": {
+                    "description": "Document container reference",
                     "type": "string"
                 },
-                "conversation_created": {
-                    "type": "boolean"
+                "end_index": {
+                    "description": "End position in text",
+                    "type": "integer"
                 },
-                "conversation_id": {
+                "file_id": {
+                    "description": "For file citations",
                     "type": "string"
                 },
-                "conversation_title": {
+                "filename": {
+                    "description": "File name for citations",
                     "type": "string"
                 },
-                "store": {
-                    "type": "boolean"
+                "index": {
+                    "description": "Citation index",
+                    "type": "integer"
                 },
-                "store_reasoning": {
-                    "type": "boolean"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.AnnotationResponse": {
-            "type": "object",
-            "properties": {
-                "end_index": {
+                "page_number": {
+                    "description": "Page reference for documents",
                     "type": "integer"
                 },
-                "file_id": {
+                "quote": {
+                    "description": "Actual quoted text from source",
                     "type": "string"
                 },
-                "index": {
-                    "type": "integer"
-                },
                 "start_index": {
+                    "description": "Start position in text",
                     "type": "integer"
                 },
                 "text": {
+                    "description": "Display text",
                     "type": "string"
                 },
                 "type": {
+                    "description": "\"file_citation\", \"url_citation\", \"file_path\", etc.",
                     "type": "string"
                 },
                 "url": {
+                    "description": "For URL citations",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.ContentResponse": {
+        "conversation.AudioContent": {
             "type": "object",
             "properties": {
-                "file": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.FileContentResponse"
-                },
-                "finish_reason": {
+                "data": {
+                    "description": "Base64 encoded audio data",
                     "type": "string"
                 },
-                "image": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ImageContentResponse"
-                },
-                "input_text": {
+                "format": {
+                    "description": "Audio format: mp3, wav, pcm16, etc.",
                     "type": "string"
                 },
-                "output_text": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.OutputTextResponse"
-                },
-                "reasoning_content": {
+                "id": {
                     "type": "string"
                 },
-                "text": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.TextResponse"
-                },
-                "type": {
+                "transcript": {
+                    "description": "Text transcription of audio",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.ConversationContentRequest": {
+        "conversation.BBox": {
             "type": "object",
-            "required": [
-                "type"
-            ],
             "properties": {
-                "text": {
-                    "type": "string"
+                "height": {
+                    "type": "number"
                 },
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemRequest": {
-            "type": "object",
-            "required": [
-                "content",
-                "type"
-            ],
-            "properties": {
-                "content": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationContentRequest"
-                    }
+                "width": {
+                    "type": "number"
                 },
-                "role": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole"
+                "x": {
+                    "type": "number"
                 },
-                "type": {
-                    "type": "string"
+                "y": {
+                    "type": "number"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.ConversationItemResponse": {
+        "conversation.CodeContent": {
             "type": "object",
             "properties": {
-                "content": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ContentResponse"
-                    }
-                },
-                "created_at": {
-                    "type": "integer"
-                },
-                "id": {
+                "code": {
+                    "description": "Code content",
                     "type": "string"
                 },
-                "object": {
+                "error": {
+                    "description": "Execution error",
                     "type": "string"
                 },
-                "role": {
+                "execution_id": {
+                    "description": "Execution session ID",
                     "type": "string"
                 },
-                "status": {
-                    "type": "string"
+                "exit_code": {
+                    "description": "Process exit code",
+                    "type": "integer"
                 },
-                "type": {
+                "language": {
+                    "description": "Programming language",
                     "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.CreateConversationRequest": {
-            "type": "object",
-            "properties": {
-                "items": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest"
-                    }
                 },
                 "metadata": {
+                    "description": "Additional metadata",
                     "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
+                    "additionalProperties": {}
                 },
-                "title": {
+                "output": {
+                    "description": "Execution output",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.CreateItemsRequest": {
-            "type": "object",
-            "required": [
-                "items"
-            ],
-            "properties": {
-                "items": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemRequest"
-                    }
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.DeletedConversationResponse": {
+        "conversation.ComputerAction": {
             "type": "object",
             "properties": {
-                "deleted": {
-                    "type": "boolean"
-                },
-                "id": {
+                "action": {
+                    "description": "Action type: \"click\", \"type\", \"key\", \"scroll\", \"move\", etc.",
                     "type": "string"
                 },
-                "object": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse": {
-            "type": "object",
-            "properties": {
-                "created_at": {
-                    "type": "integer"
+                "coordinates": {
+                    "description": "Screen coordinates for mouse actions",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.Coordinates"
+                        }
+                    ]
                 },
-                "id": {
+                "key": {
+                    "description": "Key for keyboard actions",
                     "type": "string"
                 },
                 "metadata": {
+                    "description": "Additional action metadata",
                     "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
+                    "additionalProperties": {}
                 },
-                "object": {
-                    "type": "string"
+                "scroll_delta": {
+                    "description": "Scroll amount",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ScrollDelta"
+                        }
+                    ]
                 },
-                "title": {
+                "text": {
+                    "description": "Text for typing actions",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.FileContentResponse": {
+        "conversation.Content": {
             "type": "object",
             "properties": {
-                "file_id": {
-                    "type": "string"
-                },
-                "mime_type": {
-                    "type": "string"
+                "audio": {
+                    "description": "Audio content for speech",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.AudioContent"
+                        }
+                    ]
                 },
-                "name": {
+                "code": {
+                    "description": "Code block with execution metadata",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.CodeContent"
+                        }
+                    ]
+                },
+                "computer_action": {
+                    "description": "Computer interaction details",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ComputerAction"
+                        }
+                    ]
+                },
+                "computer_screenshot": {
+                    "description": "Screenshot from computer use",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ScreenshotContent"
+                        }
+                    ]
+                },
+                "file": {
+                    "description": "File content",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.FileContent"
+                        }
+                    ]
+                },
+                "finish_reason": {
+                    "description": "Finish reason",
                     "type": "string"
                 },
-                "size": {
-                    "type": "integer"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.ImageContentResponse": {
-            "type": "object",
-            "properties": {
-                "detail": {
+                "function_call": {
+                    "description": "Function call content (deprecated, use tool_calls)",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.FunctionCall"
+                        }
+                    ]
+                },
+                "function_call_output": {
+                    "description": "Function call output",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.FunctionCallOut"
+                        }
+                    ]
+                },
+                "image": {
+                    "description": "Image content",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ImageContent"
+                        }
+                    ]
+                },
+                "input_audio": {
+                    "description": "User audio input",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.InputAudio"
+                        }
+                    ]
+                },
+                "input_text": {
+                    "description": "User input text (simple)",
                     "type": "string"
                 },
-                "file_id": {
+                "output_text": {
+                    "description": "AI output text (with annotations)",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.OutputText"
+                        }
+                    ]
+                },
+                "reasoning_content": {
+                    "description": "AI reasoning content",
                     "type": "string"
                 },
-                "url": {
+                "refusal": {
+                    "description": "Model refusal message",
                     "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.OutputTextResponse": {
-            "type": "object",
-            "properties": {
-                "annotations": {
+                },
+                "summary_text": {
+                    "description": "Summary content",
+                    "type": "string"
+                },
+                "text": {
+                    "description": "Generic text content",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.Text"
+                        }
+                    ]
+                },
+                "thinking": {
+                    "description": "Internal reasoning (o1 models)",
+                    "type": "string"
+                },
+                "tool_call_id": {
+                    "description": "Tool call ID (for tool responses)",
+                    "type": "string"
+                },
+                "tool_calls": {
+                    "description": "Tool calls (for assistant messages)",
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.AnnotationResponse"
+                        "$ref": "#/definitions/conversation.ToolCall"
                     }
                 },
-                "text": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_conversations.TextResponse": {
-            "type": "object",
-            "properties": {
-                "value": {
+                "type": {
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_conversations.UpdateConversationRequest": {
+        "conversation.Coordinates": {
             "type": "object",
             "properties": {
-                "metadata": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
+                "x": {
+                    "type": "integer"
                 },
-                "title": {
-                    "type": "string"
+                "y": {
+                    "type": "integer"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization.AdminAPIKeyDeletedResponse": {
+        "conversation.FileContent": {
             "type": "object",
             "properties": {
-                "deleted": {
-                    "type": "boolean"
+                "file_id": {
+                    "type": "string"
                 },
-                "id": {
+                "mime_type": {
                     "type": "string"
                 },
-                "object": {
+                "name": {
                     "type": "string"
+                },
+                "size": {
+                    "type": "integer"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization.AdminApiKeyListResponse": {
+        "conversation.FunctionCall": {
             "type": "object",
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse"
-                    }
-                },
-                "first_id": {
+                "arguments": {
+                    "description": "JSON-encoded arguments",
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
+                "id": {
+                    "description": "Call ID",
                     "type": "string"
                 },
-                "object": {
-                    "type": "string",
-                    "example": "list"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization.CreateOrganizationAdminAPIKeyRequest": {
-            "type": "object",
-            "required": [
-                "name"
-            ],
-            "properties": {
                 "name": {
-                    "type": "string",
-                    "example": "My Admin API Key"
+                    "description": "Function name",
+                    "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization.OrganizationAdminAPIKeyResponse": {
+        "conversation.FunctionCallOut": {
             "type": "object",
             "properties": {
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "key_1234567890"
-                },
-                "last_used_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "name": {
-                    "type": "string",
-                    "example": "My Admin API Key"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "api_key"
-                },
-                "owner": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_organization.Owner"
-                },
-                "redacted_value": {
-                    "type": "string",
-                    "example": "sk-...abcd"
+                "call_id": {
+                    "description": "ID of the function call this responds to",
+                    "type": "string"
                 },
-                "value": {
-                    "type": "string",
-                    "example": "sk-abcdef1234567890"
+                "output": {
+                    "description": "String output from the function",
+                    "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization.Owner": {
+        "conversation.ImageContent": {
             "type": "object",
             "properties": {
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "user_1234567890"
-                },
-                "name": {
-                    "type": "string",
-                    "example": "John Doe"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "user"
+                "detail": {
+                    "description": "\"low\", \"high\", \"auto\"",
+                    "type": "string"
                 },
-                "role": {
-                    "type": "string",
-                    "example": "admin"
+                "file_id": {
+                    "type": "string"
                 },
-                "type": {
-                    "type": "string",
-                    "example": "user"
+                "url": {
+                    "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_invites.CreateInviteUserRequest": {
+        "conversation.IncompleteDetails": {
             "type": "object",
             "properties": {
-                "email": {
+                "error": {
+                    "description": "Error message if applicable",
                     "type": "string"
                 },
-                "projects": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteProject"
-                    }
-                },
-                "role": {
+                "reason": {
+                    "description": "\"max_tokens\", \"content_filter\", \"tool_calls\", etc.",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_invites.InviteProject": {
+        "conversation.InputAudio": {
             "type": "object",
             "properties": {
-                "id": {
+                "data": {
+                    "description": "Base64 encoded audio data",
                     "type": "string"
                 },
-                "role": {
+                "format": {
+                    "description": "Audio format: mp3, wav, pcm16, etc.",
+                    "type": "string"
+                },
+                "transcript": {
+                    "description": "Optional text transcription",
                     "type": "string"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_invites.InviteResponse": {
+        "conversation.Item": {
             "type": "object",
             "properties": {
-                "accepted_at": {
+                "branch": {
+                    "description": "Branch identifier (MAIN, EDIT_1, etc.)",
                     "type": "string"
                 },
-                "email": {
+                "completed_at": {
                     "type": "string"
                 },
-                "expires_at": {
+                "content": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Content"
+                    }
+                },
+                "created_at": {
                     "type": "string"
                 },
                 "id": {
                     "type": "string"
                 },
-                "invited_at": {
+                "incomplete_at": {
                     "type": "string"
                 },
+                "incomplete_details": {
+                    "$ref": "#/definitions/conversation.IncompleteDetails"
+                },
                 "object": {
+                    "description": "Always \"conversation.item\" for OpenAI compatibility",
                     "type": "string"
                 },
-                "projects": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteProject"
-                    }
-                },
-                "role": {
+                "rated_at": {
+                    "description": "When rating was given",
                     "type": "string"
                 },
-                "status": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_invites.VerifyInviteUserRequest": {
-            "type": "object",
-            "properties": {
-                "code": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.CreateProjectRequest": {
-            "type": "object",
-            "required": [
-                "name"
-            ],
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "New AI Project"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.ProjectListResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects.ProjectResponse"
-                    }
+                "rating": {
+                    "description": "User feedback/rating",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/conversation.ItemRating"
+                        }
+                    ]
                 },
-                "first_id": {
+                "rating_comment": {
+                    "description": "Optional comment with rating",
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
+                "role": {
+                    "$ref": "#/definitions/conversation.ItemRole"
                 },
-                "last_id": {
-                    "type": "string"
+                "sequence_number": {
+                    "description": "Order within branch",
+                    "type": "integer"
                 },
-                "object": {
-                    "type": "string",
-                    "example": "list"
+                "status": {
+                    "$ref": "#/definitions/conversation.ItemStatus"
+                },
+                "type": {
+                    "$ref": "#/definitions/conversation.ItemType"
                 }
             }
         },
-        "app_interfaces_http_routes_v1_organization_projects.ProjectResponse": {
-            "type": "object",
-            "properties": {
-                "archived_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "created_at": {
-                    "type": "integer",
-                    "example": 1698765432
-                },
-                "id": {
-                    "type": "string",
-                    "example": "proj_1234567890"
-                },
-                "name": {
-                    "type": "string",
-                    "example": "My First Project"
-                },
-                "object": {
-                    "type": "string",
-                    "example": "project"
-                },
-                "status": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects.UpdateProjectRequest": {
-            "type": "object",
-            "properties": {
-                "name": {
-                    "type": "string",
-                    "example": "Updated AI Project"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse": {
-            "type": "object",
-            "properties": {
-                "apikeyType": {
-                    "type": "string"
-                },
-                "description": {
-                    "type": "string"
-                },
-                "enabled": {
-                    "type": "boolean"
-                },
-                "expiresAt": {
-                    "type": "string"
-                },
-                "id": {
-                    "type": "string"
-                },
-                "key": {
-                    "type": "string"
-                },
-                "last_usedAt": {
-                    "type": "string"
-                },
-                "permissions": {
-                    "type": "string"
-                },
-                "plaintextHint": {
-                    "type": "string"
-                }
-            }
-        },
-        "app_interfaces_http_routes_v1_organization_projects_api_keys.CreateApiKeyRequest": {
-            "type": "object",
-            "properties": {
-                "description": {
-                    "type": "string"
-                },
-                "expiresAt": {
-                    "type": "string"
-                }
-            }
+        "conversation.ItemRating": {
+            "type": "string",
+            "enum": [
+                "like",
+                "unlike"
+            ],
+            "x-enum-comments": {
+                "ItemRatingLike": "Positive feedback (like)",
+                "ItemRatingUnlike": "Negative feedback (unlike)"
+            },
+            "x-enum-varnames": [
+                "ItemRatingLike",
+                "ItemRatingUnlike"
+            ]
         },
-        "menlo_ai_jan-api-gateway_app_domain_conversation.ItemRole": {
+        "conversation.ItemRole": {
             "type": "string",
             "enum": [
                 "system",
                 "user",
                 "assistant",
-                "tool"
+                "tool",
+                "developer",
+                "critic",
+                "discriminator",
+                "unknown"
             ],
+            "x-enum-comments": {
+                "ItemRoleCritic": "For critique/evaluation workflows",
+                "ItemRoleDeveloper": "System-level instructions (OpenAI replacement for system)",
+                "ItemRoleDiscriminator": "For adversarial/validation workflows",
+                "ItemRoleUnknown": "Fallback for unrecognized roles"
+            },
             "x-enum-varnames": [
                 "ItemRoleSystem",
                 "ItemRoleUser",
                 "ItemRoleAssistant",
-                "ItemRoleTool"
+                "ItemRoleTool",
+                "ItemRoleDeveloper",
+                "ItemRoleCritic",
+                "ItemRoleDiscriminator",
+                "ItemRoleUnknown"
             ]
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.CreateResponseRequest": {
-            "type": "object",
-            "required": [
-                "input",
-                "model"
+        "conversation.ItemStatus": {
+            "type": "string",
+            "enum": [
+                "incomplete",
+                "in_progress",
+                "completed",
+                "failed",
+                "cancelled",
+                "searching",
+                "generating",
+                "calling",
+                "streaming",
+                "rate_limited"
+            ],
+            "x-enum-comments": {
+                "ItemStatusCalling": "Function/tool call in progress",
+                "ItemStatusCancelled": "Cancelled by user or system",
+                "ItemStatusCompleted": "Successfully finished",
+                "ItemStatusFailed": "Failed with error",
+                "ItemStatusGenerating": "Image generation in progress",
+                "ItemStatusInProgress": "Currently processing",
+                "ItemStatusIncomplete": "Not started or partially complete (OpenAI uses this instead of \"pending\")",
+                "ItemStatusRateLimited": "Rate limit hit",
+                "ItemStatusSearching": "File/web search in progress",
+                "ItemStatusStreaming": "Streaming response in progress"
+            },
+            "x-enum-varnames": [
+                "ItemStatusIncomplete",
+                "ItemStatusInProgress",
+                "ItemStatusCompleted",
+                "ItemStatusFailed",
+                "ItemStatusCancelled",
+                "ItemStatusSearching",
+                "ItemStatusGenerating",
+                "ItemStatusCalling",
+                "ItemStatusStreaming",
+                "ItemStatusRateLimited"
+            ]
+        },
+        "conversation.ItemType": {
+            "type": "string",
+            "enum": [
+                "message",
+                "function_call",
+                "function_call_output",
+                "reasoning",
+                "file_search",
+                "web_search",
+                "code_interpreter",
+                "computer_use",
+                "custom_tool_call",
+                "mcp_item",
+                "image_generation"
             ],
+            "x-enum-comments": {
+                "ItemTypeCodeInterpreter": "Code execution",
+                "ItemTypeComputerUse": "Computer interaction",
+                "ItemTypeCustomToolCall": "Custom tool invocations",
+                "ItemTypeFileSearch": "RAG/retrieval operations",
+                "ItemTypeImageGeneration": "DALL-E image generation",
+                "ItemTypeMCPItem": "Model Context Protocol items",
+                "ItemTypeReasoning": "For o1/reasoning models",
+                "ItemTypeWebSearch": "Web browsing operations"
+            },
+            "x-enum-varnames": [
+                "ItemTypeMessage",
+                "ItemTypeFunctionCall",
+                "ItemTypeFunctionCallOut",
+                "ItemTypeReasoning",
+                "ItemTypeFileSearch",
+                "ItemTypeWebSearch",
+                "ItemTypeCodeInterpreter",
+                "ItemTypeComputerUse",
+                "ItemTypeCustomToolCall",
+                "ItemTypeMCPItem",
+                "ItemTypeImageGeneration"
+            ]
+        },
+        "conversation.LogProb": {
+            "type": "object",
             "properties": {
-                "background": {
-                    "description": "Whether to run the response in the background.",
-                    "type": "boolean"
-                },
-                "conversation": {
-                    "description": "The conversation ID to append items to. If not set or set to ClientCreatedRootConversationID, a new conversation will be created.",
-                    "type": "string"
-                },
-                "frequency_penalty": {
-                    "description": "The frequency penalty to use for this response.",
-                    "type": "number"
-                },
-                "input": {
-                    "description": "The input to the model. Can be a string or array of strings."
-                },
-                "logit_bias": {
-                    "description": "The logit bias to use for this response.",
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "number",
-                        "format": "float64"
+                "bytes": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
                     }
                 },
-                "max_tokens": {
-                    "description": "The maximum number of tokens to generate.",
-                    "type": "integer"
-                },
-                "metadata": {
-                    "description": "The metadata to use for this response.",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "model": {
-                    "description": "The ID of the model to use for this response.",
-                    "type": "string"
-                },
-                "presence_penalty": {
-                    "description": "The presence penalty to use for this response.",
+                "logprob": {
                     "type": "number"
                 },
-                "previous_response_id": {
-                    "description": "The ID of the previous response to continue from. If set, the conversation will be loaded from the previous response.",
+                "token": {
                     "type": "string"
                 },
-                "repetition_penalty": {
-                    "description": "The repetition penalty to use for this response.",
-                    "type": "number"
-                },
-                "response_format": {
-                    "description": "The response format to use for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ResponseFormat"
-                        }
-                    ]
-                },
-                "seed": {
-                    "description": "The seed to use for this response.",
-                    "type": "integer"
+                "top_logprobs": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.TopLogProb"
+                    }
+                }
+            }
+        },
+        "conversation.OutputText": {
+            "type": "object",
+            "properties": {
+                "annotations": {
+                    "description": "Required for OpenAI compatibility",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Annotation"
+                    }
                 },
-                "stop": {
-                    "description": "The stop sequences to use for this response.",
+                "logprobs": {
+                    "description": "Token probabilities",
                     "type": "array",
                     "items": {
-                        "type": "string"
+                        "$ref": "#/definitions/conversation.LogProb"
                     }
                 },
-                "store": {
-                    "description": "Whether to store the conversation. If false, no conversation will be created or used.",
-                    "type": "boolean"
+                "text": {
+                    "type": "string"
+                }
+            }
+        },
+        "conversation.ScreenshotContent": {
+            "type": "object",
+            "properties": {
+                "description": {
+                    "description": "Optional description",
+                    "type": "string"
                 },
-                "stream": {
-                    "description": "Whether to stream the response.",
-                    "type": "boolean"
+                "height": {
+                    "description": "Image height in pixels",
+                    "type": "integer"
                 },
-                "system_prompt": {
-                    "description": "The system prompt to use for this response.",
+                "image_data": {
+                    "description": "Base64 encoded image data",
                     "type": "string"
                 },
-                "temperature": {
-                    "description": "The temperature to use for this response.",
-                    "type": "number"
+                "image_url": {
+                    "description": "URL to screenshot image",
+                    "type": "string"
                 },
-                "timeout": {
-                    "description": "The timeout in seconds for this response.",
+                "timestamp": {
+                    "description": "Unix timestamp when screenshot was taken",
                     "type": "integer"
                 },
-                "tool_choice": {
-                    "description": "The tool choice to use for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ToolChoice"
-                        }
-                    ]
+                "width": {
+                    "description": "Image width in pixels",
+                    "type": "integer"
+                }
+            }
+        },
+        "conversation.ScrollDelta": {
+            "type": "object",
+            "properties": {
+                "x": {
+                    "type": "integer"
                 },
-                "tools": {
-                    "description": "The tools to use for this response.",
+                "y": {
+                    "type": "integer"
+                }
+            }
+        },
+        "conversation.Text": {
+            "type": "object",
+            "properties": {
+                "annotations": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.Tool"
+                        "$ref": "#/definitions/conversation.Annotation"
                     }
                 },
-                "top_k": {
-                    "description": "The top_k to use for this response.",
-                    "type": "integer"
-                },
-                "top_p": {
-                    "description": "The top_p to use for this response.",
-                    "type": "number"
-                },
-                "user": {
-                    "description": "The user to use for this response.",
+                "text": {
+                    "description": "Changed from \"value\" to match OpenAI spec",
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileInput": {
+        "conversation.ToolCall": {
             "type": "object",
-            "required": [
-                "file_id"
-            ],
             "properties": {
-                "file_id": {
-                    "description": "The ID of the file.",
+                "function": {
+                    "$ref": "#/definitions/conversation.FunctionCall"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "type": {
+                    "description": "\"function\", \"file_search\", \"code_interpreter\"",
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileSearchInput": {
+        "conversation.TopLogProb": {
             "type": "object",
-            "required": [
-                "file_ids",
-                "query"
-            ],
             "properties": {
-                "file_ids": {
-                    "description": "The IDs of the files to search in.",
+                "bytes": {
                     "type": "array",
                     "items": {
-                        "type": "string"
+                        "type": "integer"
                     }
                 },
-                "max_results": {
-                    "description": "The number of results to return.",
-                    "type": "integer"
+                "logprob": {
+                    "type": "number"
                 },
-                "query": {
-                    "description": "The query to search for.",
+                "token": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCall": {
+        "conversationrequests.CreateConversationRequest": {
             "type": "object",
-            "required": [
-                "name"
-            ],
             "properties": {
-                "arguments": {
-                    "description": "The arguments to pass to the function.",
+                "items": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Item"
+                    }
+                },
+                "metadata": {
                     "type": "object",
-                    "additionalProperties": {}
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
-                "name": {
-                    "description": "The name of the function to call.",
+                "project_id": {
+                    "type": "string"
+                },
+                "referrer": {
+                    "type": "string"
+                },
+                "title": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCallsInput": {
+        "conversationrequests.CreateItemsRequest": {
             "type": "object",
             "required": [
-                "calls"
+                "items"
             ],
             "properties": {
-                "calls": {
-                    "description": "The function calls to make.",
+                "items": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCall"
+                        "$ref": "#/definitions/conversation.Item"
                     }
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionChoice": {
+        "conversationrequests.UpdateConversationRequest": {
             "type": "object",
-            "required": [
-                "name"
-            ],
             "properties": {
-                "name": {
-                    "description": "The name of the function.",
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "referrer": {
+                    "type": "string"
+                },
+                "title": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionDefinition": {
+        "conversationresponses.ConversationDeletedResponse": {
             "type": "object",
-            "required": [
-                "name"
-            ],
             "properties": {
-                "description": {
-                    "description": "The description of the function.",
-                    "type": "string"
+                "deleted": {
+                    "type": "boolean"
                 },
-                "name": {
-                    "description": "The name of the function.",
+                "id": {
                     "type": "string"
                 },
-                "parameters": {
-                    "description": "The parameters of the function.",
-                    "type": "object",
-                    "additionalProperties": {}
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.ImageInput": {
+        "conversationresponses.ConversationItemCreatedResponse": {
             "type": "object",
             "properties": {
                 "data": {
-                    "description": "The base64 encoded image data.",
-                    "type": "string"
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Item"
+                    }
                 },
-                "detail": {
-                    "description": "The detail level for the image.",
+                "first_id": {
                     "type": "string"
                 },
-                "url": {
-                    "description": "The URL of the image.",
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
+                    "type": "string"
+                },
+                "object": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.InputType": {
-            "type": "string",
-            "enum": [
-                "text",
-                "image",
-                "file",
-                "web_search",
-                "file_search",
-                "streaming",
-                "function_calls",
-                "reasoning"
-            ],
-            "x-enum-varnames": [
-                "InputTypeText",
-                "InputTypeImage",
-                "InputTypeFile",
-                "InputTypeWebSearch",
-                "InputTypeFileSearch",
-                "InputTypeStreaming",
-                "InputTypeFunctionCalls",
-                "InputTypeReasoning"
-            ]
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.ReasoningInput": {
+        "conversationresponses.ConversationListResponse": {
             "type": "object",
-            "required": [
-                "task"
-            ],
             "properties": {
-                "context": {
-                    "description": "The context for the reasoning task.",
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversationresponses.ConversationResponse"
+                    }
+                },
+                "first_id": {
                     "type": "string"
                 },
-                "task": {
-                    "description": "The reasoning task to perform.",
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
                     "type": "string"
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.ResponseFormat": {
-            "type": "object",
-            "required": [
-                "type"
-            ],
-            "properties": {
-                "type": {
-                    "description": "The type of response format.",
+                },
+                "object": {
                     "type": "string"
+                },
+                "total": {
+                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.StreamingInput": {
+        "conversationresponses.ConversationResponse": {
             "type": "object",
-            "required": [
-                "url"
-            ],
             "properties": {
-                "body": {
-                    "description": "The body to send with the request.",
+                "created_at": {
+                    "type": "integer"
+                },
+                "id": {
                     "type": "string"
                 },
-                "headers": {
-                    "description": "The headers to send with the request.",
+                "metadata": {
                     "type": "object",
                     "additionalProperties": {
                         "type": "string"
                     }
                 },
-                "method": {
-                    "description": "The method to use for the request.",
+                "object": {
                     "type": "string"
                 },
-                "url": {
-                    "description": "The URL to stream from.",
+                "project_id": {
+                    "type": "string"
+                },
+                "referrer": {
+                    "type": "string"
+                },
+                "title": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.Tool": {
+        "conversationresponses.ItemListResponse": {
             "type": "object",
-            "required": [
-                "type"
-            ],
             "properties": {
-                "function": {
-                    "description": "The function definition for function tools.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionDefinition"
-                        }
-                    ]
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Item"
+                    }
                 },
-                "type": {
-                    "description": "The type of tool.",
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
+                    "type": "string"
+                },
+                "object": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.ToolChoice": {
+        "conversationresponses.ItemResponse": {
             "type": "object",
-            "required": [
-                "type"
-            ],
             "properties": {
-                "function": {
-                    "description": "The function to use for function tool choice.",
+                "branch": {
+                    "description": "Branch identifier (MAIN, EDIT_1, etc.)",
+                    "type": "string"
+                },
+                "completed_at": {
+                    "type": "string"
+                },
+                "content": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/conversation.Content"
+                    }
+                },
+                "created_at": {
+                    "type": "string"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "incomplete_at": {
+                    "type": "string"
+                },
+                "incomplete_details": {
+                    "$ref": "#/definitions/conversation.IncompleteDetails"
+                },
+                "object": {
+                    "description": "Always \"conversation.item\" for OpenAI compatibility",
+                    "type": "string"
+                },
+                "rated_at": {
+                    "description": "When rating was given",
+                    "type": "string"
+                },
+                "rating": {
+                    "description": "User feedback/rating",
                     "allOf": [
                         {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionChoice"
+                            "$ref": "#/definitions/conversation.ItemRating"
                         }
                     ]
                 },
-                "type": {
-                    "description": "The type of tool choice.",
+                "rating_comment": {
+                    "description": "Optional comment with rating",
                     "type": "string"
+                },
+                "role": {
+                    "$ref": "#/definitions/conversation.ItemRole"
+                },
+                "sequence_number": {
+                    "description": "Order within branch",
+                    "type": "integer"
+                },
+                "status": {
+                    "$ref": "#/definitions/conversation.ItemStatus"
+                },
+                "type": {
+                    "$ref": "#/definitions/conversation.ItemType"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_requests.WebSearchInput": {
+        "model.Architecture": {
             "type": "object",
-            "required": [
-                "query"
-            ],
             "properties": {
-                "max_results": {
-                    "description": "The number of results to return.",
-                    "type": "integer"
+                "input_modalities": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "instruct_type": {
+                    "description": "nullable",
+                    "type": "string"
                 },
-                "query": {
-                    "description": "The query to search for.",
+                "modality": {
+                    "description": "\"text+image-\u003etext\"",
                     "type": "string"
                 },
-                "search_engine": {
-                    "description": "The search engine to use.",
+                "output_modalities": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "tokenizer": {
+                    "description": "\"GPT\" / \"SentencePiece\" / etc.",
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ConversationInfo": {
+        "model.ModelCatalogStatus": {
+            "type": "string",
+            "enum": [
+                "init",
+                "filled",
+                "updated"
+            ],
+            "x-enum-comments": {
+                "ModelCatalogStatusFilled": "may update from Provider like OpenRouter",
+                "ModelCatalogStatusInit": "default status when creating entry",
+                "ModelCatalogStatusUpdated": "manually updated by admin (cannot be auto-updated anymore"
+            },
+            "x-enum-varnames": [
+                "ModelCatalogStatusInit",
+                "ModelCatalogStatusFilled",
+                "ModelCatalogStatusUpdated"
+            ]
+        },
+        "model.PriceLine": {
             "type": "object",
             "properties": {
-                "id": {
-                    "description": "The unique ID of the conversation.",
+                "amount_micro_usd": {
+                    "description": "e.g., 15000 -\u003e $0.0150",
+                    "type": "integer"
+                },
+                "currency": {
+                    "description": "\"USD\" (fixed if you only bill in USD)",
                     "type": "string"
+                },
+                "unit": {
+                    "$ref": "#/definitions/model.PriceUnit"
+                }
+            }
+        },
+        "model.PriceUnit": {
+            "type": "string",
+            "enum": [
+                "per_1k_prompt_tokens",
+                "per_1k_completion_tokens",
+                "per_request",
+                "per_image",
+                "per_web_search",
+                "per_internal_reasoning"
+            ],
+            "x-enum-varnames": [
+                "Per1KPromptTokens",
+                "Per1KCompletionTokens",
+                "PerRequest",
+                "PerImage",
+                "PerWebSearch",
+                "PerInternalReasoning"
+            ]
+        },
+        "model.Pricing": {
+            "type": "object",
+            "properties": {
+                "lines": {
+                    "description": "flexible: add/remove units without schema churn",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/model.PriceLine"
+                    }
+                }
+            }
+        },
+        "model.SupportedParameters": {
+            "type": "object",
+            "properties": {
+                "default": {
+                    "description": "temperature/top_p/frequency_penalty, null allowed",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "number"
+                    }
+                },
+                "names": {
+                    "description": "e.g., [\"include_reasoning\",\"max_tokens\",...]",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.DetailedUsage": {
+        "model.TokenLimits": {
             "type": "object",
             "properties": {
-                "input_tokens": {
-                    "description": "The number of tokens in the prompt.",
+                "context_length": {
+                    "description": "e.g., 400000",
                     "type": "integer"
                 },
-                "input_tokens_details": {
-                    "description": "Details about input tokens.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.TokenDetails"
-                        }
-                    ]
+                "max_completion_tokens": {
+                    "description": "e.g., 128000",
+                    "type": "integer"
+                }
+            }
+        },
+        "modelresponses.BulkOperationResponse": {
+            "type": "object",
+            "properties": {
+                "failed_count": {
+                    "type": "integer"
+                },
+                "failed_models": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
                 },
-                "output_tokens": {
-                    "description": "The number of tokens in the completion.",
+                "skipped_count": {
                     "type": "integer"
                 },
-                "output_tokens_details": {
-                    "description": "Details about output tokens.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.TokenDetails"
-                        }
-                    ]
+                "total_checked": {
+                    "type": "integer"
                 },
-                "total_tokens": {
-                    "description": "The total number of tokens used.",
+                "updated_count": {
                     "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ErrorResponse": {
+        "modelresponses.ModelCatalogResponse": {
             "type": "object",
             "properties": {
-                "code": {
+                "active": {
+                    "type": "boolean"
+                },
+                "architecture": {
+                    "$ref": "#/definitions/model.Architecture"
+                },
+                "created_at": {
+                    "type": "integer"
+                },
+                "extras": {
+                    "type": "object",
+                    "additionalProperties": {}
+                },
+                "id": {
                     "type": "string"
                 },
-                "error": {
+                "is_moderated": {
+                    "type": "boolean"
+                },
+                "last_synced_at": {
+                    "type": "integer"
+                },
+                "notes": {
                     "type": "string"
+                },
+                "status": {
+                    "$ref": "#/definitions/model.ModelCatalogStatus"
+                },
+                "supported_parameters": {
+                    "$ref": "#/definitions/model.SupportedParameters"
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "updated_at": {
+                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.FormatType": {
+        "modelresponses.ModelResponse": {
             "type": "object",
             "properties": {
-                "type": {
-                    "description": "The type of format.",
+                "created": {
+                    "type": "integer"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "owned_by": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.GeneralResponse-app_interfaces_http_routes_v1_organization_projects_api_keys_ApiKeyResponse": {
+        "modelresponses.ModelResponseList": {
             "type": "object",
             "properties": {
-                "result": {
-                    "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_projects_api_keys.ApiKeyResponse"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/modelresponses.ModelResponse"
+                    }
                 },
-                "status": {
+                "object": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.InputItem": {
+        "modelresponses.ModelResponseWithProvider": {
             "type": "object",
             "properties": {
                 "created": {
-                    "description": "The Unix timestamp (in seconds) when the input item was created.",
                     "type": "integer"
                 },
-                "file": {
-                    "description": "The file content (for file type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileInput"
-                        }
-                    ]
-                },
-                "file_search": {
-                    "description": "The file search content (for file_search type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FileSearchInput"
-                        }
-                    ]
-                },
-                "function_calls": {
-                    "description": "The function calls content (for function_calls type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.FunctionCallsInput"
-                        }
-                    ]
-                },
                 "id": {
-                    "description": "The unique identifier for the input item.",
                     "type": "string"
                 },
-                "image": {
-                    "description": "The image content (for image type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ImageInput"
-                        }
-                    ]
-                },
                 "object": {
-                    "description": "The object type, which is always \"input_item\".",
                     "type": "string"
                 },
-                "reasoning": {
-                    "description": "The reasoning content (for reasoning type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ReasoningInput"
-                        }
-                    ]
-                },
-                "streaming": {
-                    "description": "The streaming content (for streaming type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.StreamingInput"
-                        }
-                    ]
+                "owned_by": {
+                    "type": "string"
                 },
-                "text": {
-                    "description": "The text content (for text type).",
+                "provider_id": {
                     "type": "string"
                 },
-                "type": {
-                    "description": "The type of input item.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.InputType"
-                        }
-                    ]
+                "provider_name": {
+                    "type": "string"
                 },
-                "web_search": {
-                    "description": "The web search content (for web_search type).",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.WebSearchInput"
-                        }
-                    ]
+                "provider_vendor": {
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ListInputItemsResponse": {
+        "modelresponses.ModelWithProviderResponseList": {
             "type": "object",
             "properties": {
                 "data": {
-                    "description": "The list of input items.",
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.InputItem"
+                        "$ref": "#/definitions/modelresponses.ModelResponseWithProvider"
                     }
                 },
-                "first_id": {
-                    "description": "The first ID in the list.",
-                    "type": "string"
-                },
-                "has_more": {
-                    "description": "Whether there are more items available.",
-                    "type": "boolean"
-                },
-                "last_id": {
-                    "description": "The last ID in the list.",
-                    "type": "string"
-                },
                 "object": {
-                    "description": "The object type, which is always \"list\".",
-                    "type": "string"
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.Reasoning": {
-            "type": "object",
-            "properties": {
-                "effort": {
-                    "description": "The effort level for reasoning.",
-                    "type": "string"
-                },
-                "summary": {
-                    "description": "The summary of reasoning.",
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.Response": {
+        "modelresponses.ProviderModelResponse": {
             "type": "object",
             "properties": {
-                "background": {
-                    "description": "Whether the response was run in the background.",
+                "active": {
                     "type": "boolean"
                 },
-                "cancelled_at": {
-                    "description": "The Unix timestamp (in seconds) when the response was cancelled.",
-                    "type": "integer"
-                },
-                "completed_at": {
-                    "description": "The Unix timestamp (in seconds) when the response was completed.",
-                    "type": "integer"
-                },
-                "conversation": {
-                    "description": "The conversation that this response belongs to.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ConversationInfo"
-                        }
-                    ]
-                },
-                "created": {
-                    "description": "The Unix timestamp (in seconds) when the response was created.",
+                "created_at": {
                     "type": "integer"
                 },
-                "error": {
-                    "description": "The error that occurred during processing, if any.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseError"
-                        }
-                    ]
-                },
-                "failed_at": {
-                    "description": "The Unix timestamp (in seconds) when the response was failed.",
-                    "type": "integer"
+                "display_name": {
+                    "type": "string"
                 },
-                "frequency_penalty": {
-                    "description": "The frequency penalty that was used for this response.",
-                    "type": "number"
+                "family": {
+                    "type": "string"
                 },
                 "id": {
-                    "description": "The unique identifier for the response.",
                     "type": "string"
                 },
-                "incomplete_details": {
-                    "description": "OpenAI API response fields"
-                },
-                "input": {
-                    "description": "The input that was provided to the model. Can be a string or array of strings."
-                },
-                "instructions": {},
-                "logit_bias": {
-                    "description": "The logit bias that was used for this response.",
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "number",
-                        "format": "float64"
-                    }
-                },
-                "max_output_tokens": {
-                    "type": "integer"
-                },
-                "max_tokens": {
-                    "description": "The maximum number of tokens that were generated.",
-                    "type": "integer"
-                },
-                "metadata": {
-                    "description": "The metadata that was provided for this response.",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "model": {
-                    "description": "The ID of the model used for this response.",
+                "model_catalog_id": {
                     "type": "string"
                 },
-                "object": {
-                    "description": "The object type, which is always \"response\".",
+                "model_public_id": {
                     "type": "string"
                 },
-                "output": {
-                    "description": "The output generated by the model."
-                },
-                "parallel_tool_calls": {
-                    "type": "boolean"
+                "pricing": {
+                    "$ref": "#/definitions/model.Pricing"
                 },
-                "presence_penalty": {
-                    "description": "The presence penalty that was used for this response.",
-                    "type": "number"
-                },
-                "previous_response_id": {
+                "provider_id": {
                     "type": "string"
                 },
-                "reasoning": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.Reasoning"
-                },
-                "repetition_penalty": {
-                    "description": "The repetition penalty that was used for this response.",
-                    "type": "number"
-                },
-                "response_format": {
-                    "description": "The response format that was used for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ResponseFormat"
-                        }
-                    ]
-                },
-                "seed": {
-                    "description": "The seed that was used for this response.",
-                    "type": "integer"
-                },
-                "status": {
-                    "description": "The status of the response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseStatus"
-                        }
-                    ]
+                "provider_original_model_id": {
+                    "type": "string"
                 },
-                "stop": {
-                    "description": "The stop sequences that were used for this response.",
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
+                "provider_vendor": {
+                    "type": "string"
                 },
-                "store": {
+                "supports_audio": {
                     "type": "boolean"
                 },
-                "stream": {
-                    "description": "Whether the response was streamed.",
+                "supports_embeddings": {
                     "type": "boolean"
                 },
-                "system_prompt": {
-                    "description": "The system prompt that was used for this response.",
-                    "type": "string"
-                },
-                "temperature": {
-                    "description": "The temperature that was used for this response.",
-                    "type": "number"
-                },
-                "text": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.TextFormat"
+                "supports_images": {
+                    "type": "boolean"
                 },
-                "timeout": {
-                    "description": "The timeout in seconds that was used for this response.",
-                    "type": "integer"
+                "supports_reasoning": {
+                    "type": "boolean"
                 },
-                "tool_choice": {
-                    "description": "The tool choice that was used for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.ToolChoice"
-                        }
-                    ]
+                "supports_video": {
+                    "type": "boolean"
                 },
-                "tools": {
-                    "description": "The tools that were used for this response.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_requests.Tool"
-                    }
+                "token_limits": {
+                    "$ref": "#/definitions/model.TokenLimits"
                 },
-                "top_k": {
-                    "description": "The top_k that was used for this response.",
+                "updated_at": {
                     "type": "integer"
-                },
-                "top_p": {
-                    "description": "The top_p that was used for this response.",
-                    "type": "number"
-                },
-                "truncation": {
-                    "type": "string"
-                },
-                "usage": {
-                    "description": "The usage statistics for this response.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.DetailedUsage"
-                        }
-                    ]
-                },
-                "user": {
-                    "description": "The user that was provided for this response.",
-                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseError": {
+        "modelresponses.ProviderResponse": {
             "type": "object",
             "properties": {
-                "code": {
-                    "description": "The error code.",
+                "active": {
+                    "type": "boolean"
+                },
+                "base_url": {
+                    "type": "string"
+                },
+                "id": {
                     "type": "string"
                 },
-                "details": {
-                    "description": "The error details.",
+                "metadata": {
                     "type": "object",
-                    "additionalProperties": {}
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
-                "message": {
-                    "description": "The error message.",
+                "name": {
+                    "type": "string"
+                },
+                "vendor": {
                     "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.ResponseStatus": {
-            "type": "string",
-            "enum": [
-                "pending",
-                "running",
-                "completed",
-                "cancelled",
-                "failed"
-            ],
-            "x-enum-varnames": [
-                "ResponseStatusPending",
-                "ResponseStatusRunning",
-                "ResponseStatusCompleted",
-                "ResponseStatusCancelled",
-                "ResponseStatusFailed"
-            ]
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.TextFormat": {
-            "type": "object",
-            "properties": {
-                "format": {
-                    "description": "The format type.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses.FormatType"
-                        }
-                    ]
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses.TokenDetails": {
+        "modelresponses.ProviderResponseList": {
             "type": "object",
             "properties": {
-                "cached_tokens": {
-                    "description": "The number of cached tokens.",
-                    "type": "integer"
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/modelresponses.ProviderResponse"
+                    }
                 },
-                "reasoning_tokens": {
-                    "description": "The number of reasoning tokens.",
-                    "type": "integer"
+                "object": {
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.DeleteResponse": {
+        "modelresponses.ProviderWithModelCountResponse": {
             "type": "object",
             "properties": {
-                "deleted": {
+                "active": {
                     "type": "boolean"
                 },
-                "id": {
+                "base_url": {
                     "type": "string"
                 },
-                "object": {
+                "id": {
                     "type": "string"
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ConversationItemResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ConversationItemResponse"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
                     }
                 },
-                "first_id": {
-                    "type": "string"
+                "model_active_count": {
+                    "type": "integer"
                 },
-                "has_more": {
-                    "type": "boolean"
+                "model_count": {
+                    "type": "integer"
                 },
-                "last_id": {
+                "name": {
                     "type": "string"
                 },
-                "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
-                },
-                "total": {
-                    "type": "integer"
+                "vendor": {
+                    "type": "string"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_conversations_ExtendedConversationResponse": {
+        "modelresponses.ProviderWithModelsResponse": {
             "type": "object",
             "properties": {
-                "data": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_conversations.ExtendedConversationResponse"
-                    }
+                "active": {
+                    "type": "boolean"
                 },
-                "first_id": {
+                "base_url": {
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
+                "id": {
                     "type": "string"
                 },
-                "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
                 },
-                "total": {
-                    "type": "integer"
-                }
-            }
-        },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ListResponse-app_interfaces_http_routes_v1_organization_invites_InviteResponse": {
-            "type": "object",
-            "properties": {
-                "data": {
+                "models": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/app_interfaces_http_routes_v1_organization_invites.InviteResponse"
+                        "$ref": "#/definitions/modelresponses.ModelResponse"
                     }
                 },
-                "first_id": {
+                "name": {
                     "type": "string"
                 },
-                "has_more": {
-                    "type": "boolean"
-                },
-                "last_id": {
+                "vendor": {
                     "type": "string"
-                },
-                "object": {
-                    "$ref": "#/definitions/menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList"
-                },
-                "total": {
-                    "type": "integer"
                 }
             }
         },
-        "menlo_ai_jan-api-gateway_app_interfaces_http_responses_openai.ObjectTypeList": {
-            "type": "string",
-            "enum": [
-                "list"
-            ],
-            "x-enum-varnames": [
-                "ObjectTypeListList"
-            ]
-        },
         "openai.ChatCompletionChoice": {
             "type": "object",
             "properties": {
@@ -4164,223 +4079,38 @@
                     "type": "string"
                 },
                 "function_call": {
-                    "$ref": "#/definitions/openai.FunctionCall"
-                },
-                "multiContent": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.ChatMessagePart"
-                    }
-                },
-                "name": {
-                    "description": "This property isn't in the official documentation, but it's in\nthe documentation for the official library for python:\n- https://github.com/openai/openai-python/blob/main/chatml.md\n- https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb",
-                    "type": "string"
-                },
-                "reasoning_content": {
-                    "description": "This property is used for the \"reasoning\" feature supported by deepseek-reasoner\nwhich is not in the official documentation.\nthe doc from deepseek:\n- https://api-docs.deepseek.com/api/create-chat-completion#responses",
-                    "type": "string"
-                },
-                "refusal": {
-                    "type": "string"
-                },
-                "role": {
-                    "type": "string"
-                },
-                "tool_call_id": {
-                    "description": "For Role=tool prompts this should be set to the ID given in the assistant's prior request to call a tool.",
-                    "type": "string"
-                },
-                "tool_calls": {
-                    "description": "For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.ToolCall"
-                    }
-                }
-            }
-        },
-        "openai.ChatCompletionRequest": {
-            "type": "object",
-            "properties": {
-                "chat_template_kwargs": {
-                    "description": "ChatTemplateKwargs provides a way to add non-standard parameters to the request body.\nAdditional kwargs to pass to the template renderer. Will be accessible by the chat template.\nSuch as think mode for qwen3. \"chat_template_kwargs\": {\"enable_thinking\": false}\nhttps://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes",
-                    "type": "object",
-                    "additionalProperties": {}
-                },
-                "frequency_penalty": {
-                    "type": "number"
-                },
-                "function_call": {
-                    "description": "Deprecated: use ToolChoice instead."
-                },
-                "functions": {
-                    "description": "Deprecated: use Tools instead.",
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.FunctionDefinition"
-                    }
-                },
-                "guided_choice": {
-                    "description": "GuidedChoice is a vLLM-specific extension that restricts the model's output\nto one of the predefined string choices provided in this field. This feature\nis used to constrain the model's responses to a controlled set of options,\nensuring predictable and consistent outputs in scenarios where specific\nchoices are required.",
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "logit_bias": {
-                    "description": "LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.\nincorrect: `\"logit_bias\":{\"You\": 6}`, correct: `\"logit_bias\":{\"1639\": 6}`\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias",
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "integer"
-                    }
-                },
-                "logprobs": {
-                    "description": "LogProbs indicates whether to return log probabilities of the output tokens or not.\nIf true, returns the log probabilities of each output token returned in the content of message.\nThis option is currently not available on the gpt-4-vision-preview model.",
-                    "type": "boolean"
-                },
-                "max_completion_tokens": {
-                    "description": "MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,\nincluding visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning",
-                    "type": "integer"
-                },
-                "max_tokens": {
-                    "description": "MaxTokens The maximum number of tokens that can be generated in the chat completion.\nThis value can be used to control costs for text generated via API.\nDeprecated: use MaxCompletionTokens. Not compatible with o1-series models.\nrefs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens",
-                    "type": "integer"
-                },
-                "messages": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.ChatCompletionMessage"
-                    }
-                },
-                "metadata": {
-                    "description": "Metadata to store with the completion.",
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "string"
-                    }
-                },
-                "model": {
-                    "type": "string"
-                },
-                "n": {
-                    "type": "integer"
-                },
-                "parallel_tool_calls": {
-                    "description": "Disable the default behavior of parallel tool calls by setting it: false."
-                },
-                "prediction": {
-                    "description": "Configuration for a predicted output.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.Prediction"
-                        }
-                    ]
-                },
-                "presence_penalty": {
-                    "type": "number"
-                },
-                "reasoning_effort": {
-                    "description": "Controls effort on reasoning for reasoning models. It can be set to \"low\", \"medium\", or \"high\".",
-                    "type": "string"
-                },
-                "response_format": {
-                    "$ref": "#/definitions/openai.ChatCompletionResponseFormat"
-                },
-                "safety_identifier": {
-                    "description": "A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.\nThe IDs should be a string that uniquely identifies each user.\nWe recommend hashing their username or email address, in order to avoid sending us any identifying information.\nhttps://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier",
-                    "type": "string"
-                },
-                "seed": {
-                    "type": "integer"
-                },
-                "service_tier": {
-                    "description": "Specifies the latency tier to use for processing the request.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.ServiceTier"
-                        }
-                    ]
-                },
-                "stop": {
-                    "type": "array",
-                    "items": {
-                        "type": "string"
-                    }
-                },
-                "store": {
-                    "description": "Store can be set to true to store the output of this completion request for use in distillations and evals.\nhttps://platform.openai.com/docs/api-reference/chat/create#chat-create-store",
-                    "type": "boolean"
-                },
-                "stream": {
-                    "type": "boolean"
-                },
-                "stream_options": {
-                    "description": "Options for streaming response. Only set this when you set stream: true.",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/openai.StreamOptions"
-                        }
-                    ]
-                },
-                "temperature": {
-                    "type": "number"
-                },
-                "tool_choice": {
-                    "description": "This can be either a string or an ToolChoice object."
-                },
-                "tools": {
-                    "type": "array",
-                    "items": {
-                        "$ref": "#/definitions/openai.Tool"
-                    }
-                },
-                "top_logprobs": {
-                    "description": "TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each\ntoken position, each with an associated log probability.\nlogprobs must be set to true if this parameter is used.",
-                    "type": "integer"
-                },
-                "top_p": {
-                    "type": "number"
-                },
-                "user": {
-                    "type": "string"
-                }
-            }
-        },
-        "openai.ChatCompletionResponse": {
-            "type": "object",
-            "properties": {
-                "choices": {
+                    "$ref": "#/definitions/openai.FunctionCall"
+                },
+                "multiContent": {
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.ChatCompletionChoice"
+                        "$ref": "#/definitions/openai.ChatMessagePart"
                     }
                 },
-                "created": {
-                    "type": "integer"
+                "name": {
+                    "description": "This property isn't in the official documentation, but it's in\nthe documentation for the official library for python:\n- https://github.com/openai/openai-python/blob/main/chatml.md\n- https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb",
+                    "type": "string"
                 },
-                "id": {
+                "reasoning_content": {
+                    "description": "This property is used for the \"reasoning\" feature supported by deepseek-reasoner\nwhich is not in the official documentation.\nthe doc from deepseek:\n- https://api-docs.deepseek.com/api/create-chat-completion#responses",
                     "type": "string"
                 },
-                "model": {
+                "refusal": {
                     "type": "string"
                 },
-                "object": {
+                "role": {
                     "type": "string"
                 },
-                "prompt_filter_results": {
+                "tool_call_id": {
+                    "description": "For Role=tool prompts this should be set to the ID given in the assistant's prior request to call a tool.",
+                    "type": "string"
+                },
+                "tool_calls": {
+                    "description": "For Role=assistant prompts this may be set to the tool calls generated by the model, such as function calls.",
                     "type": "array",
                     "items": {
-                        "$ref": "#/definitions/openai.PromptFilterResult"
+                        "$ref": "#/definitions/openai.ToolCall"
                     }
-                },
-                "service_tier": {
-                    "$ref": "#/definitions/openai.ServiceTier"
-                },
-                "system_fingerprint": {
-                    "type": "string"
-                },
-                "usage": {
-                    "$ref": "#/definitions/openai.Usage"
                 }
             }
         },
@@ -4794,6 +4524,296 @@
                     "type": "string"
                 }
             }
+        },
+        "projectreq.CreateProjectRequest": {
+            "type": "object",
+            "required": [
+                "name"
+            ],
+            "properties": {
+                "instruction": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
+        },
+        "projectreq.UpdateProjectRequest": {
+            "type": "object",
+            "properties": {
+                "instruction": {
+                    "type": "string"
+                },
+                "is_archived": {
+                    "type": "boolean"
+                },
+                "is_favorite": {
+                    "type": "boolean"
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
+        },
+        "projectres.ProjectDeletedResponse": {
+            "type": "object",
+            "properties": {
+                "deleted": {
+                    "type": "boolean"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                }
+            }
+        },
+        "projectres.ProjectListResponse": {
+            "type": "object",
+            "properties": {
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/projectres.ProjectResponse"
+                    }
+                },
+                "first_id": {
+                    "type": "string"
+                },
+                "has_more": {
+                    "type": "boolean"
+                },
+                "last_id": {
+                    "type": "string"
+                },
+                "next_cursor": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "total": {
+                    "type": "integer"
+                }
+            }
+        },
+        "projectres.ProjectResponse": {
+            "type": "object",
+            "properties": {
+                "archived_at": {
+                    "type": "integer"
+                },
+                "created_at": {
+                    "type": "integer"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "instruction": {
+                    "type": "string"
+                },
+                "is_archived": {
+                    "type": "boolean"
+                },
+                "is_favorite": {
+                    "type": "boolean"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "updated_at": {
+                    "type": "integer"
+                }
+            }
+        },
+        "requestmodels.AddProviderRequest": {
+            "type": "object",
+            "required": [
+                "base_url",
+                "name",
+                "vendor"
+            ],
+            "properties": {
+                "active": {
+                    "type": "boolean"
+                },
+                "api_key": {
+                    "type": "string"
+                },
+                "base_url": {
+                    "type": "string"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "name": {
+                    "type": "string"
+                },
+                "vendor": {
+                    "type": "string"
+                }
+            }
+        },
+        "requestmodels.BulkEnableModelsRequest": {
+            "type": "object",
+            "required": [
+                "enable"
+            ],
+            "properties": {
+                "enable": {
+                    "description": "Required: true to enable, false to disable",
+                    "type": "boolean"
+                },
+                "except_models": {
+                    "description": "List of model keys to exclude",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "provider_id": {
+                    "description": "Optional: filter by provider",
+                    "type": "string",
+                    "minLength": 1
+                }
+            }
+        },
+        "requestmodels.BulkToggleCatalogsRequest": {
+            "type": "object",
+            "required": [
+                "enable"
+            ],
+            "properties": {
+                "catalog_ids": {
+                    "description": "Optional: specific catalog public IDs. If empty, applies to all catalogs",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "enable": {
+                    "description": "Required: true to enable, false to disable",
+                    "type": "boolean"
+                },
+                "except_models": {
+                    "description": "List of model keys to exclude from the operation",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
+        "requestmodels.UpdateModelCatalogRequest": {
+            "type": "object",
+            "properties": {
+                "architecture": {
+                    "$ref": "#/definitions/model.Architecture"
+                },
+                "extras": {
+                    "type": "object",
+                    "additionalProperties": {}
+                },
+                "is_moderated": {
+                    "type": "boolean"
+                },
+                "notes": {
+                    "type": "string"
+                },
+                "supported_parameters": {
+                    "$ref": "#/definitions/model.SupportedParameters"
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            }
+        },
+        "requestmodels.UpdateProviderModelRequest": {
+            "type": "object",
+            "properties": {
+                "active": {
+                    "type": "boolean"
+                },
+                "display_name": {
+                    "type": "string"
+                },
+                "family": {
+                    "type": "string"
+                },
+                "pricing": {
+                    "$ref": "#/definitions/model.Pricing"
+                },
+                "supports_audio": {
+                    "type": "boolean"
+                },
+                "supports_embeddings": {
+                    "type": "boolean"
+                },
+                "supports_images": {
+                    "type": "boolean"
+                },
+                "supports_reasoning": {
+                    "type": "boolean"
+                },
+                "supports_video": {
+                    "type": "boolean"
+                },
+                "token_limits": {
+                    "$ref": "#/definitions/model.TokenLimits"
+                }
+            }
+        },
+        "requestmodels.UpdateProviderRequest": {
+            "type": "object",
+            "properties": {
+                "active": {
+                    "type": "boolean"
+                },
+                "api_key": {
+                    "type": "string"
+                },
+                "base_url": {
+                    "type": "string"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
+        },
+        "responses.ErrorResponse": {
+            "type": "object",
+            "properties": {
+                "code": {
+                    "description": "UUID from PlatformError",
+                    "type": "string"
+                },
+                "error": {
+                    "type": "string"
+                },
+                "message": {
+                    "type": "string"
+                },
+                "request_id": {
+                    "type": "string"
+                }
+            }
         }
     },
     "securityDefinitions": {
diff --git a/services/llm-api/docs/swagger/swagger.yaml b/services/llm-api/docs/swagger/swagger.yaml
new file mode 100644
index 00000000..5ede38bc
--- /dev/null
+++ b/services/llm-api/docs/swagger/swagger.yaml
@@ -0,0 +1,3423 @@
+basePath: /
+definitions:
+  chatrequests.ChatCompletionRequest:
+    properties:
+      chat_template_kwargs:
+        additionalProperties: {}
+        description: |-
+          ChatTemplateKwargs provides a way to add non-standard parameters to the request body.
+          Additional kwargs to pass to the template renderer. Will be accessible by the chat template.
+          Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false}
+          https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes
+        type: object
+      conversation:
+        allOf:
+        - $ref: '#/definitions/chatrequests.ConversationReference'
+        description: |-
+          Conversation can be either a string (conversation ID) or a conversation object
+          Items from this conversation are prepended to Messages for this response request.
+          Input items and output items from this response are automatically added to this conversation after completion.
+      frequency_penalty:
+        type: number
+      function_call:
+        description: 'Deprecated: use ToolChoice instead.'
+      functions:
+        description: 'Deprecated: use Tools instead.'
+        items:
+          $ref: '#/definitions/openai.FunctionDefinition'
+        type: array
+      guided_choice:
+        description: |-
+          GuidedChoice is a vLLM-specific extension that restricts the model's output
+          to one of the predefined string choices provided in this field. This feature
+          is used to constrain the model's responses to a controlled set of options,
+          ensuring predictable and consistent outputs in scenarios where specific
+          choices are required.
+        items:
+          type: string
+        type: array
+      logit_bias:
+        additionalProperties:
+          type: integer
+        description: |-
+          LogitBias is must be a token id string (specified by their token ID in the tokenizer), not a word string.
+          incorrect: `"logit_bias":{"You": 6}`, correct: `"logit_bias":{"1639": 6}`
+          refs: https://platform.openai.com/docs/api-reference/chat/create#chat/create-logit_bias
+        type: object
+      logprobs:
+        description: |-
+          LogProbs indicates whether to return log probabilities of the output tokens or not.
+          If true, returns the log probabilities of each output token returned in the content of message.
+          This option is currently not available on the gpt-4-vision-preview model.
+        type: boolean
+      max_completion_tokens:
+        description: |-
+          MaxCompletionTokens An upper bound for the number of tokens that can be generated for a completion,
+          including visible output tokens and reasoning tokens https://platform.openai.com/docs/guides/reasoning
+        type: integer
+      max_tokens:
+        description: |-
+          MaxTokens The maximum number of tokens that can be generated in the chat completion.
+          This value can be used to control costs for text generated via API.
+          Deprecated: use MaxCompletionTokens. Not compatible with o1-series models.
+          refs: https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens
+        type: integer
+      messages:
+        items:
+          $ref: '#/definitions/openai.ChatCompletionMessage'
+        type: array
+      metadata:
+        additionalProperties:
+          type: string
+        description: Metadata to store with the completion.
+        type: object
+      model:
+        type: string
+      "n":
+        type: integer
+      parallel_tool_calls:
+        description: 'Disable the default behavior of parallel tool calls by setting
+          it: false.'
+      prediction:
+        allOf:
+        - $ref: '#/definitions/openai.Prediction'
+        description: Configuration for a predicted output.
+      presence_penalty:
+        type: number
+      reasoning_effort:
+        description: Controls effort on reasoning for reasoning models. It can be
+          set to "low", "medium", or "high".
+        type: string
+      response_format:
+        $ref: '#/definitions/openai.ChatCompletionResponseFormat'
+      safety_identifier:
+        description: |-
+          A stable identifier used to help detect users of your application that may be violating OpenAI's usage policies.
+          The IDs should be a string that uniquely identifies each user.
+          We recommend hashing their username or email address, in order to avoid sending us any identifying information.
+          https://platform.openai.com/docs/api-reference/chat/create#chat_create-safety_identifier
+        type: string
+      seed:
+        type: integer
+      service_tier:
+        allOf:
+        - $ref: '#/definitions/openai.ServiceTier'
+        description: Specifies the latency tier to use for processing the request.
+      stop:
+        items:
+          type: string
+        type: array
+      store:
+        description: Store controls whether the latest input and generated response
+          should be persisted
+        type: boolean
+      store_reasoning:
+        description: StoreReasoning controls whether reasoning content (if present)
+          should also be persisted
+        type: boolean
+      stream:
+        type: boolean
+      stream_options:
+        allOf:
+        - $ref: '#/definitions/openai.StreamOptions'
+        description: 'Options for streaming response. Only set this when you set stream:
+          true.'
+      temperature:
+        type: number
+      tool_choice:
+        description: This can be either a string or an ToolChoice object.
+      tools:
+        items:
+          $ref: '#/definitions/openai.Tool'
+        type: array
+      top_logprobs:
+        description: |-
+          TopLogProbs is an integer between 0 and 5 specifying the number of most likely tokens to return at each
+          token position, each with an associated log probability.
+          logprobs must be set to true if this parameter is used.
+        type: integer
+      top_p:
+        type: number
+      user:
+        type: string
+      verbosity:
+        description: |-
+          Verbosity determines how many output tokens are generated. Lowering the number of
+          tokens reduces overall latency. It can be set to "low", "medium", or "high".
+          Note: This field is only confirmed to work with gpt-5, gpt-5-mini and gpt-5-nano.
+          Also, it is not in the API reference of chat completion at the time of writing,
+          though it is supported by the API.
+        type: string
+    type: object
+  chatrequests.ConversationReference:
+    type: object
+  chatresponses.ChatCompletionResponse:
+    properties:
+      choices:
+        items:
+          $ref: '#/definitions/openai.ChatCompletionChoice'
+        type: array
+      conversation:
+        $ref: '#/definitions/chatresponses.ConversationContext'
+      created:
+        type: integer
+      id:
+        type: string
+      model:
+        type: string
+      object:
+        type: string
+      prompt_filter_results:
+        items:
+          $ref: '#/definitions/openai.PromptFilterResult'
+        type: array
+      service_tier:
+        $ref: '#/definitions/openai.ServiceTier'
+      system_fingerprint:
+        type: string
+      usage:
+        $ref: '#/definitions/openai.Usage'
+    type: object
+  chatresponses.ConversationContext:
+    properties:
+      id:
+        description: The unique ID of the conversation
+        type: string
+      title:
+        description: The title of the conversation (optional)
+        type: string
+    type: object
+  conversation.Annotation:
+    properties:
+      bounding_box:
+        allOf:
+        - $ref: '#/definitions/conversation.BBox'
+        description: Bounding box for image/PDF annotations
+      confidence:
+        description: Citation confidence score (0.0-1.0)
+        type: number
+      container_id:
+        description: Document container reference
+        type: string
+      end_index:
+        description: End position in text
+        type: integer
+      file_id:
+        description: For file citations
+        type: string
+      filename:
+        description: File name for citations
+        type: string
+      index:
+        description: Citation index
+        type: integer
+      page_number:
+        description: Page reference for documents
+        type: integer
+      quote:
+        description: Actual quoted text from source
+        type: string
+      start_index:
+        description: Start position in text
+        type: integer
+      text:
+        description: Display text
+        type: string
+      type:
+        description: '"file_citation", "url_citation", "file_path", etc.'
+        type: string
+      url:
+        description: For URL citations
+        type: string
+    type: object
+  conversation.AudioContent:
+    properties:
+      data:
+        description: Base64 encoded audio data
+        type: string
+      format:
+        description: 'Audio format: mp3, wav, pcm16, etc.'
+        type: string
+      id:
+        type: string
+      transcript:
+        description: Text transcription of audio
+        type: string
+    type: object
+  conversation.BBox:
+    properties:
+      height:
+        type: number
+      width:
+        type: number
+      x:
+        type: number
+      "y":
+        type: number
+    type: object
+  conversation.CodeContent:
+    properties:
+      code:
+        description: Code content
+        type: string
+      error:
+        description: Execution error
+        type: string
+      execution_id:
+        description: Execution session ID
+        type: string
+      exit_code:
+        description: Process exit code
+        type: integer
+      language:
+        description: Programming language
+        type: string
+      metadata:
+        additionalProperties: {}
+        description: Additional metadata
+        type: object
+      output:
+        description: Execution output
+        type: string
+    type: object
+  conversation.ComputerAction:
+    properties:
+      action:
+        description: 'Action type: "click", "type", "key", "scroll", "move", etc.'
+        type: string
+      coordinates:
+        allOf:
+        - $ref: '#/definitions/conversation.Coordinates'
+        description: Screen coordinates for mouse actions
+      key:
+        description: Key for keyboard actions
+        type: string
+      metadata:
+        additionalProperties: {}
+        description: Additional action metadata
+        type: object
+      scroll_delta:
+        allOf:
+        - $ref: '#/definitions/conversation.ScrollDelta'
+        description: Scroll amount
+      text:
+        description: Text for typing actions
+        type: string
+    type: object
+  conversation.Content:
+    properties:
+      audio:
+        allOf:
+        - $ref: '#/definitions/conversation.AudioContent'
+        description: Audio content for speech
+      code:
+        allOf:
+        - $ref: '#/definitions/conversation.CodeContent'
+        description: Code block with execution metadata
+      computer_action:
+        allOf:
+        - $ref: '#/definitions/conversation.ComputerAction'
+        description: Computer interaction details
+      computer_screenshot:
+        allOf:
+        - $ref: '#/definitions/conversation.ScreenshotContent'
+        description: Screenshot from computer use
+      file:
+        allOf:
+        - $ref: '#/definitions/conversation.FileContent'
+        description: File content
+      finish_reason:
+        description: Finish reason
+        type: string
+      function_call:
+        allOf:
+        - $ref: '#/definitions/conversation.FunctionCall'
+        description: Function call content (deprecated, use tool_calls)
+      function_call_output:
+        allOf:
+        - $ref: '#/definitions/conversation.FunctionCallOut'
+        description: Function call output
+      image:
+        allOf:
+        - $ref: '#/definitions/conversation.ImageContent'
+        description: Image content
+      input_audio:
+        allOf:
+        - $ref: '#/definitions/conversation.InputAudio'
+        description: User audio input
+      input_text:
+        description: User input text (simple)
+        type: string
+      output_text:
+        allOf:
+        - $ref: '#/definitions/conversation.OutputText'
+        description: AI output text (with annotations)
+      reasoning_content:
+        description: AI reasoning content
+        type: string
+      refusal:
+        description: Model refusal message
+        type: string
+      summary_text:
+        description: Summary content
+        type: string
+      text:
+        allOf:
+        - $ref: '#/definitions/conversation.Text'
+        description: Generic text content
+      thinking:
+        description: Internal reasoning (o1 models)
+        type: string
+      tool_call_id:
+        description: Tool call ID (for tool responses)
+        type: string
+      tool_calls:
+        description: Tool calls (for assistant messages)
+        items:
+          $ref: '#/definitions/conversation.ToolCall'
+        type: array
+      type:
+        type: string
+    type: object
+  conversation.Coordinates:
+    properties:
+      x:
+        type: integer
+      "y":
+        type: integer
+    type: object
+  conversation.FileContent:
+    properties:
+      file_id:
+        type: string
+      mime_type:
+        type: string
+      name:
+        type: string
+      size:
+        type: integer
+    type: object
+  conversation.FunctionCall:
+    properties:
+      arguments:
+        description: JSON-encoded arguments
+        type: string
+      id:
+        description: Call ID
+        type: string
+      name:
+        description: Function name
+        type: string
+    type: object
+  conversation.FunctionCallOut:
+    properties:
+      call_id:
+        description: ID of the function call this responds to
+        type: string
+      output:
+        description: String output from the function
+        type: string
+    type: object
+  conversation.ImageContent:
+    properties:
+      detail:
+        description: '"low", "high", "auto"'
+        type: string
+      file_id:
+        type: string
+      url:
+        type: string
+    type: object
+  conversation.IncompleteDetails:
+    properties:
+      error:
+        description: Error message if applicable
+        type: string
+      reason:
+        description: '"max_tokens", "content_filter", "tool_calls", etc.'
+        type: string
+    type: object
+  conversation.InputAudio:
+    properties:
+      data:
+        description: Base64 encoded audio data
+        type: string
+      format:
+        description: 'Audio format: mp3, wav, pcm16, etc.'
+        type: string
+      transcript:
+        description: Optional text transcription
+        type: string
+    type: object
+  conversation.Item:
+    properties:
+      branch:
+        description: Branch identifier (MAIN, EDIT_1, etc.)
+        type: string
+      completed_at:
+        type: string
+      content:
+        items:
+          $ref: '#/definitions/conversation.Content'
+        type: array
+      created_at:
+        type: string
+      id:
+        type: string
+      incomplete_at:
+        type: string
+      incomplete_details:
+        $ref: '#/definitions/conversation.IncompleteDetails'
+      object:
+        description: Always "conversation.item" for OpenAI compatibility
+        type: string
+      rated_at:
+        description: When rating was given
+        type: string
+      rating:
+        allOf:
+        - $ref: '#/definitions/conversation.ItemRating'
+        description: User feedback/rating
+      rating_comment:
+        description: Optional comment with rating
+        type: string
+      role:
+        $ref: '#/definitions/conversation.ItemRole'
+      sequence_number:
+        description: Order within branch
+        type: integer
+      status:
+        $ref: '#/definitions/conversation.ItemStatus'
+      type:
+        $ref: '#/definitions/conversation.ItemType'
+    type: object
+  conversation.ItemRating:
+    enum:
+    - like
+    - unlike
+    type: string
+    x-enum-comments:
+      ItemRatingLike: Positive feedback (like)
+      ItemRatingUnlike: Negative feedback (unlike)
+    x-enum-varnames:
+    - ItemRatingLike
+    - ItemRatingUnlike
+  conversation.ItemRole:
+    enum:
+    - system
+    - user
+    - assistant
+    - tool
+    - developer
+    - critic
+    - discriminator
+    - unknown
+    type: string
+    x-enum-comments:
+      ItemRoleCritic: For critique/evaluation workflows
+      ItemRoleDeveloper: System-level instructions (OpenAI replacement for system)
+      ItemRoleDiscriminator: For adversarial/validation workflows
+      ItemRoleUnknown: Fallback for unrecognized roles
+    x-enum-varnames:
+    - ItemRoleSystem
+    - ItemRoleUser
+    - ItemRoleAssistant
+    - ItemRoleTool
+    - ItemRoleDeveloper
+    - ItemRoleCritic
+    - ItemRoleDiscriminator
+    - ItemRoleUnknown
+  conversation.ItemStatus:
+    enum:
+    - incomplete
+    - in_progress
+    - completed
+    - failed
+    - cancelled
+    - searching
+    - generating
+    - calling
+    - streaming
+    - rate_limited
+    type: string
+    x-enum-comments:
+      ItemStatusCalling: Function/tool call in progress
+      ItemStatusCancelled: Cancelled by user or system
+      ItemStatusCompleted: Successfully finished
+      ItemStatusFailed: Failed with error
+      ItemStatusGenerating: Image generation in progress
+      ItemStatusInProgress: Currently processing
+      ItemStatusIncomplete: Not started or partially complete (OpenAI uses this instead
+        of "pending")
+      ItemStatusRateLimited: Rate limit hit
+      ItemStatusSearching: File/web search in progress
+      ItemStatusStreaming: Streaming response in progress
+    x-enum-varnames:
+    - ItemStatusIncomplete
+    - ItemStatusInProgress
+    - ItemStatusCompleted
+    - ItemStatusFailed
+    - ItemStatusCancelled
+    - ItemStatusSearching
+    - ItemStatusGenerating
+    - ItemStatusCalling
+    - ItemStatusStreaming
+    - ItemStatusRateLimited
+  conversation.ItemType:
+    enum:
+    - message
+    - function_call
+    - function_call_output
+    - reasoning
+    - file_search
+    - web_search
+    - code_interpreter
+    - computer_use
+    - custom_tool_call
+    - mcp_item
+    - image_generation
+    type: string
+    x-enum-comments:
+      ItemTypeCodeInterpreter: Code execution
+      ItemTypeComputerUse: Computer interaction
+      ItemTypeCustomToolCall: Custom tool invocations
+      ItemTypeFileSearch: RAG/retrieval operations
+      ItemTypeImageGeneration: DALL-E image generation
+      ItemTypeMCPItem: Model Context Protocol items
+      ItemTypeReasoning: For o1/reasoning models
+      ItemTypeWebSearch: Web browsing operations
+    x-enum-varnames:
+    - ItemTypeMessage
+    - ItemTypeFunctionCall
+    - ItemTypeFunctionCallOut
+    - ItemTypeReasoning
+    - ItemTypeFileSearch
+    - ItemTypeWebSearch
+    - ItemTypeCodeInterpreter
+    - ItemTypeComputerUse
+    - ItemTypeCustomToolCall
+    - ItemTypeMCPItem
+    - ItemTypeImageGeneration
+  conversation.LogProb:
+    properties:
+      bytes:
+        items:
+          type: integer
+        type: array
+      logprob:
+        type: number
+      token:
+        type: string
+      top_logprobs:
+        items:
+          $ref: '#/definitions/conversation.TopLogProb'
+        type: array
+    type: object
+  conversation.OutputText:
+    properties:
+      annotations:
+        description: Required for OpenAI compatibility
+        items:
+          $ref: '#/definitions/conversation.Annotation'
+        type: array
+      logprobs:
+        description: Token probabilities
+        items:
+          $ref: '#/definitions/conversation.LogProb'
+        type: array
+      text:
+        type: string
+    type: object
+  conversation.ScreenshotContent:
+    properties:
+      description:
+        description: Optional description
+        type: string
+      height:
+        description: Image height in pixels
+        type: integer
+      image_data:
+        description: Base64 encoded image data
+        type: string
+      image_url:
+        description: URL to screenshot image
+        type: string
+      timestamp:
+        description: Unix timestamp when screenshot was taken
+        type: integer
+      width:
+        description: Image width in pixels
+        type: integer
+    type: object
+  conversation.ScrollDelta:
+    properties:
+      x:
+        type: integer
+      "y":
+        type: integer
+    type: object
+  conversation.Text:
+    properties:
+      annotations:
+        items:
+          $ref: '#/definitions/conversation.Annotation'
+        type: array
+      text:
+        description: Changed from "value" to match OpenAI spec
+        type: string
+    type: object
+  conversation.ToolCall:
+    properties:
+      function:
+        $ref: '#/definitions/conversation.FunctionCall'
+      id:
+        type: string
+      type:
+        description: '"function", "file_search", "code_interpreter"'
+        type: string
+    type: object
+  conversation.TopLogProb:
+    properties:
+      bytes:
+        items:
+          type: integer
+        type: array
+      logprob:
+        type: number
+      token:
+        type: string
+    type: object
+  conversationrequests.CreateConversationRequest:
+    properties:
+      items:
+        items:
+          $ref: '#/definitions/conversation.Item'
+        type: array
+      metadata:
+        additionalProperties:
+          type: string
+        type: object
+      project_id:
+        type: string
+      referrer:
+        type: string
+      title:
+        type: string
+    type: object
+  conversationrequests.CreateItemsRequest:
+    properties:
+      items:
+        items:
+          $ref: '#/definitions/conversation.Item'
+        type: array
+    required:
+    - items
+    type: object
+  conversationrequests.UpdateConversationRequest:
+    properties:
+      metadata:
+        additionalProperties:
+          type: string
+        type: object
+      referrer:
+        type: string
+      title:
+        type: string
+    type: object
+  conversationresponses.ConversationDeletedResponse:
+    properties:
+      deleted:
+        type: boolean
+      id:
+        type: string
+      object:
+        type: string
+    type: object
+  conversationresponses.ConversationItemCreatedResponse:
+    properties:
+      data:
+        items:
+          $ref: '#/definitions/conversation.Item'
+        type: array
+      first_id:
+        type: string
+      has_more:
+        type: boolean
+      last_id:
+        type: string
+      object:
+        type: string
+    type: object
+  conversationresponses.ConversationListResponse:
+    properties:
+      data:
+        items:
+          $ref: '#/definitions/conversationresponses.ConversationResponse'
+        type: array
+      first_id:
+        type: string
+      has_more:
+        type: boolean
+      last_id:
+        type: string
+      object:
+        type: string
+      total:
+        type: integer
+    type: object
+  conversationresponses.ConversationResponse:
+    properties:
+      created_at:
+        type: integer
+      id:
+        type: string
+      metadata:
+        additionalProperties:
+          type: string
+        type: object
+      object:
+        type: string
+      project_id:
+        type: string
+      referrer:
+        type: string
+      title:
+        type: string
+    type: object
+  conversationresponses.ItemListResponse:
+    properties:
+      data:
+        items:
+          $ref: '#/definitions/conversation.Item'
+        type: array
+      first_id:
+        type: string
+      has_more:
+        type: boolean
+      last_id:
+        type: string
+      object:
+        type: string
+    type: object
+  conversationresponses.ItemResponse:
+    properties:
+      branch:
+        description: Branch identifier (MAIN, EDIT_1, etc.)
+        type: string
+      completed_at:
+        type: string
+      content:
+        items:
+          $ref: '#/definitions/conversation.Content'
+        type: array
+      created_at:
+        type: string
+      id:
+        type: string
+      incomplete_at:
+        type: string
+      incomplete_details:
+        $ref: '#/definitions/conversation.IncompleteDetails'
+      object:
+        description: Always "conversation.item" for OpenAI compatibility
+        type: string
+      rated_at:
+        description: When rating was given
+        type: string
+      rating:
+        allOf:
+        - $ref: '#/definitions/conversation.ItemRating'
+        description: User feedback/rating
+      rating_comment:
+        description: Optional comment with rating
+        type: string
+      role:
+        $ref: '#/definitions/conversation.ItemRole'
+      sequence_number:
+        description: Order within branch
+        type: integer
+      status:
+        $ref: '#/definitions/conversation.ItemStatus'
+      type:
+        $ref: '#/definitions/conversation.ItemType'
+    type: object
+  model.Architecture:
+    properties:
+      input_modalities:
+        items:
+          type: string
+        type: array
+      instruct_type:
+        description: nullable
+        type: string
+      modality:
+        description: '"text+image->text"'
+        type: string
+      output_modalities:
+        items:
+          type: string
+        type: array
+      tokenizer:
+        description: '"GPT" / "SentencePiece" / etc.'
+        type: string
+    type: object
+  model.ModelCatalogStatus:
+    enum:
+    - init
+    - filled
+    - updated
+    type: string
+    x-enum-comments:
+      ModelCatalogStatusFilled: may update from Provider like OpenRouter
+      ModelCatalogStatusInit: default status when creating entry
+      ModelCatalogStatusUpdated: manually updated by admin (cannot be auto-updated
+        anymore
+    x-enum-varnames:
+    - ModelCatalogStatusInit
+    - ModelCatalogStatusFilled
+    - ModelCatalogStatusUpdated
+  model.PriceLine:
+    properties:
+      amount_micro_usd:
+        description: e.g., 15000 -> $0.0150
+        type: integer
+      currency:
+        description: '"USD" (fixed if you only bill in USD)'
+        type: string
+      unit:
+        $ref: '#/definitions/model.PriceUnit'
+    type: object
+  model.PriceUnit:
+    enum:
+    - per_1k_prompt_tokens
+    - per_1k_completion_tokens
+    - per_request
+    - per_image
+    - per_web_search
+    - per_internal_reasoning
+    type: string
+    x-enum-varnames:
+    - Per1KPromptTokens
+    - Per1KCompletionTokens
+    - PerRequest
+    - PerImage
+    - PerWebSearch
+    - PerInternalReasoning
+  model.Pricing:
+    properties:
+      lines:
+        description: 'flexible: add/remove units without schema churn'
+        items:
+          $ref: '#/definitions/model.PriceLine'
+        type: array
+    type: object
+  model.SupportedParameters:
+    properties:
+      default:
+        additionalProperties:
+          type: number
+        description: temperature/top_p/frequency_penalty, null allowed
+        type: object
+      names:
+        description: e.g., ["include_reasoning","max_tokens",...]
+        items:
+          type: string
+        type: array
+    type: object
+  model.TokenLimits:
+    properties:
+      context_length:
+        description: e.g., 400000
+        type: integer
+      max_completion_tokens:
+        description: e.g., 128000
+        type: integer
+    type: object
+  modelresponses.BulkOperationResponse:
+    properties:
+      failed_count:
+        type: integer
+      failed_models:
+        items:
+          type: string
+        type: array
+      skipped_count:
+        type: integer
+      total_checked:
+        type: integer
+      updated_count:
+        type: integer
+    type: object
+  modelresponses.ModelCatalogResponse:
+    properties:
+      active:
+        type: boolean
+      architecture:
+        $ref: '#/definitions/model.Architecture'
+      created_at:
+        type: integer
+      extras:
+        additionalProperties: {}
+        type: object
+      id:
+        type: string
+      is_moderated:
+        type: boolean
+      last_synced_at:
+        type: integer
+      notes:
+        type: string
+      status:
+        $ref: '#/definitions/model.ModelCatalogStatus'
+      supported_parameters:
+        $ref: '#/definitions/model.SupportedParameters'
+      tags:
+        items:
+          type: string
+        type: array
+      updated_at:
+        type: integer
+    type: object
+  modelresponses.ModelResponse:
+    properties:
+      created:
+        type: integer
+      id:
+        type: string
+      object:
+        type: string
+      owned_by:
+        type: string
+    type: object
+  modelresponses.ModelResponseList:
+    properties:
+      data:
+        items:
+          $ref: '#/definitions/modelresponses.ModelResponse'
+        type: array
+      object:
+        type: string
+    type: object
+  modelresponses.ModelResponseWithProvider:
+    properties:
+      created:
+        type: integer
+      id:
+        type: string
+      object:
+        type: string
+      owned_by:
+        type: string
+      provider_id:
+        type: string
+      provider_name:
+        type: string
+      provider_vendor:
+        type: string
+    type: object
+  modelresponses.ModelWithProviderResponseList:
+    properties:
+      data:
+        items:
+          $ref: '#/definitions/modelresponses.ModelResponseWithProvider'
+        type: array
+      object:
+        type: string
+    type: object
+  modelresponses.ProviderModelResponse:
+    properties:
+      active:
+        type: boolean
+      created_at:
+        type: integer
+      display_name:
+        type: string
+      family:
+        type: string
+      id:
+        type: string
+      model_catalog_id:
+        type: string
+      model_public_id:
+        type: string
+      pricing:
+        $ref: '#/definitions/model.Pricing'
+      provider_id:
+        type: string
+      provider_original_model_id:
+        type: string
+      provider_vendor:
+        type: string
+      supports_audio:
+        type: boolean
+      supports_embeddings:
+        type: boolean
+      supports_images:
+        type: boolean
+      supports_reasoning:
+        type: boolean
+      supports_video:
+        type: boolean
+      token_limits:
+        $ref: '#/definitions/model.TokenLimits'
+      updated_at:
+        type: integer
+    type: object
+  modelresponses.ProviderResponse:
+    properties:
+      active:
+        type: boolean
+      base_url:
+        type: string
+      id:
+        type: string
+      metadata:
+        additionalProperties:
+          type: string
+        type: object
+      name:
+        type: string
+      vendor:
+        type: string
+    type: object
+  modelresponses.ProviderResponseList:
+    properties:
+      data:
+        items:
+          $ref: '#/definitions/modelresponses.ProviderResponse'
+        type: array
+      object:
+        type: string
+    type: object
+  modelresponses.ProviderWithModelCountResponse:
+    properties:
+      active:
+        type: boolean
+      base_url:
+        type: string
+      id:
+        type: string
+      metadata:
+        additionalProperties:
+          type: string
+        type: object
+      model_active_count:
+        type: integer
+      model_count:
+        type: integer
+      name:
+        type: string
+      vendor:
+        type: string
+    type: object
+  modelresponses.ProviderWithModelsResponse:
+    properties:
+      active:
+        type: boolean
+      base_url:
+        type: string
+      id:
+        type: string
+      metadata:
+        additionalProperties:
+          type: string
+        type: object
+      models:
+        items:
+          $ref: '#/definitions/modelresponses.ModelResponse'
+        type: array
+      name:
+        type: string
+      vendor:
+        type: string
+    type: object
+  openai.ChatCompletionChoice:
+    properties:
+      content_filter_results:
+        $ref: '#/definitions/openai.ContentFilterResults'
+      finish_reason:
+        allOf:
+        - $ref: '#/definitions/openai.FinishReason'
+        description: |-
+          FinishReason
+          stop: API returned complete message,
+          or a message terminated by one of the stop sequences provided via the stop parameter
+          length: Incomplete model output due to max_tokens parameter or token limit
+          function_call: The model decided to call a function
+          content_filter: Omitted content due to a flag from our content filters
+          null: API response still in progress or incomplete
+      index:
+        type: integer
+      logprobs:
+        $ref: '#/definitions/openai.LogProbs'
+      message:
+        $ref: '#/definitions/openai.ChatCompletionMessage'
+    type: object
+  openai.ChatCompletionMessage:
+    properties:
+      content:
+        type: string
+      function_call:
+        $ref: '#/definitions/openai.FunctionCall'
+      multiContent:
+        items:
+          $ref: '#/definitions/openai.ChatMessagePart'
+        type: array
+      name:
+        description: |-
+          This property isn't in the official documentation, but it's in
+          the documentation for the official library for python:
+          - https://github.com/openai/openai-python/blob/main/chatml.md
+          - https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+        type: string
+      reasoning_content:
+        description: |-
+          This property is used for the "reasoning" feature supported by deepseek-reasoner
+          which is not in the official documentation.
+          the doc from deepseek:
+          - https://api-docs.deepseek.com/api/create-chat-completion#responses
+        type: string
+      refusal:
+        type: string
+      role:
+        type: string
+      tool_call_id:
+        description: For Role=tool prompts this should be set to the ID given in the
+          assistant's prior request to call a tool.
+        type: string
+      tool_calls:
+        description: For Role=assistant prompts this may be set to the tool calls
+          generated by the model, such as function calls.
+        items:
+          $ref: '#/definitions/openai.ToolCall'
+        type: array
+    type: object
+  openai.ChatCompletionResponseFormat:
+    properties:
+      json_schema:
+        $ref: '#/definitions/openai.ChatCompletionResponseFormatJSONSchema'
+      type:
+        $ref: '#/definitions/openai.ChatCompletionResponseFormatType'
+    type: object
+  openai.ChatCompletionResponseFormatJSONSchema:
+    properties:
+      description:
+        type: string
+      name:
+        type: string
+      schema: {}
+      strict:
+        type: boolean
+    type: object
+  openai.ChatCompletionResponseFormatType:
+    enum:
+    - json_object
+    - json_schema
+    - text
+    type: string
+    x-enum-varnames:
+    - ChatCompletionResponseFormatTypeJSONObject
+    - ChatCompletionResponseFormatTypeJSONSchema
+    - ChatCompletionResponseFormatTypeText
+  openai.ChatMessageImageURL:
+    properties:
+      detail:
+        $ref: '#/definitions/openai.ImageURLDetail'
+      url:
+        type: string
+    type: object
+  openai.ChatMessagePart:
+    properties:
+      image_url:
+        $ref: '#/definitions/openai.ChatMessageImageURL'
+      text:
+        type: string
+      type:
+        $ref: '#/definitions/openai.ChatMessagePartType'
+    type: object
+  openai.ChatMessagePartType:
+    enum:
+    - text
+    - image_url
+    type: string
+    x-enum-varnames:
+    - ChatMessagePartTypeText
+    - ChatMessagePartTypeImageURL
+  openai.CompletionTokensDetails:
+    properties:
+      accepted_prediction_tokens:
+        type: integer
+      audio_tokens:
+        type: integer
+      reasoning_tokens:
+        type: integer
+      rejected_prediction_tokens:
+        type: integer
+    type: object
+  openai.ContentFilterResults:
+    properties:
+      hate:
+        $ref: '#/definitions/openai.Hate'
+      jailbreak:
+        $ref: '#/definitions/openai.JailBreak'
+      profanity:
+        $ref: '#/definitions/openai.Profanity'
+      self_harm:
+        $ref: '#/definitions/openai.SelfHarm'
+      sexual:
+        $ref: '#/definitions/openai.Sexual'
+      violence:
+        $ref: '#/definitions/openai.Violence'
+    type: object
+  openai.FinishReason:
+    enum:
+    - stop
+    - length
+    - function_call
+    - tool_calls
+    - content_filter
+    - "null"
+    type: string
+    x-enum-varnames:
+    - FinishReasonStop
+    - FinishReasonLength
+    - FinishReasonFunctionCall
+    - FinishReasonToolCalls
+    - FinishReasonContentFilter
+    - FinishReasonNull
+  openai.FunctionCall:
+    properties:
+      arguments:
+        description: call function with arguments in JSON format
+        type: string
+      name:
+        type: string
+    type: object
+  openai.FunctionDefinition:
+    properties:
+      description:
+        type: string
+      name:
+        type: string
+      parameters:
+        description: |-
+          Parameters is an object describing the function.
+          You can pass json.RawMessage to describe the schema,
+          or you can pass in a struct which serializes to the proper JSON schema.
+          The jsonschema package is provided for convenience, but you should
+          consider another specialized library if you require more complex schemas.
+      strict:
+        type: boolean
+    type: object
+  openai.Hate:
+    properties:
+      filtered:
+        type: boolean
+      severity:
+        type: string
+    type: object
+  openai.ImageURLDetail:
+    enum:
+    - high
+    - low
+    - auto
+    type: string
+    x-enum-varnames:
+    - ImageURLDetailHigh
+    - ImageURLDetailLow
+    - ImageURLDetailAuto
+  openai.JailBreak:
+    properties:
+      detected:
+        type: boolean
+      filtered:
+        type: boolean
+    type: object
+  openai.LogProb:
+    properties:
+      bytes:
+        description: Omitting the field if it is null
+        items:
+          type: integer
+        type: array
+      logprob:
+        type: number
+      token:
+        type: string
+      top_logprobs:
+        description: |-
+          TopLogProbs is a list of the most likely tokens and their log probability, at this token position.
+          In rare cases, there may be fewer than the number of requested top_logprobs returned.
+        items:
+          $ref: '#/definitions/openai.TopLogProbs'
+        type: array
+    type: object
+  openai.LogProbs:
+    properties:
+      content:
+        description: Content is a list of message content tokens with log probability
+          information.
+        items:
+          $ref: '#/definitions/openai.LogProb'
+        type: array
+    type: object
+  openai.Prediction:
+    properties:
+      content:
+        type: string
+      type:
+        type: string
+    type: object
+  openai.Profanity:
+    properties:
+      detected:
+        type: boolean
+      filtered:
+        type: boolean
+    type: object
+  openai.PromptFilterResult:
+    properties:
+      content_filter_results:
+        $ref: '#/definitions/openai.ContentFilterResults'
+      index:
+        type: integer
+    type: object
+  openai.PromptTokensDetails:
+    properties:
+      audio_tokens:
+        type: integer
+      cached_tokens:
+        type: integer
+    type: object
+  openai.SelfHarm:
+    properties:
+      filtered:
+        type: boolean
+      severity:
+        type: string
+    type: object
+  openai.ServiceTier:
+    enum:
+    - auto
+    - default
+    - flex
+    - priority
+    type: string
+    x-enum-varnames:
+    - ServiceTierAuto
+    - ServiceTierDefault
+    - ServiceTierFlex
+    - ServiceTierPriority
+  openai.Sexual:
+    properties:
+      filtered:
+        type: boolean
+      severity:
+        type: string
+    type: object
+  openai.StreamOptions:
+    properties:
+      include_usage:
+        description: |-
+          If set, an additional chunk will be streamed before the data: [DONE] message.
+          The usage field on this chunk shows the token usage statistics for the entire request,
+          and the choices field will always be an empty array.
+          All other chunks will also include a usage field, but with a null value.
+        type: boolean
+    type: object
+  openai.Tool:
+    properties:
+      function:
+        $ref: '#/definitions/openai.FunctionDefinition'
+      type:
+        $ref: '#/definitions/openai.ToolType'
+    type: object
+  openai.ToolCall:
+    properties:
+      function:
+        $ref: '#/definitions/openai.FunctionCall'
+      id:
+        type: string
+      index:
+        description: Index is not nil only in chat completion chunk object
+        type: integer
+      type:
+        $ref: '#/definitions/openai.ToolType'
+    type: object
+  openai.ToolType:
+    enum:
+    - function
+    type: string
+    x-enum-varnames:
+    - ToolTypeFunction
+  openai.TopLogProbs:
+    properties:
+      bytes:
+        items:
+          type: integer
+        type: array
+      logprob:
+        type: number
+      token:
+        type: string
+    type: object
+  openai.Usage:
+    properties:
+      completion_tokens:
+        type: integer
+      completion_tokens_details:
+        $ref: '#/definitions/openai.CompletionTokensDetails'
+      prompt_tokens:
+        type: integer
+      prompt_tokens_details:
+        $ref: '#/definitions/openai.PromptTokensDetails'
+      total_tokens:
+        type: integer
+    type: object
+  openai.Violence:
+    properties:
+      filtered:
+        type: boolean
+      severity:
+        type: string
+    type: object
+  projectreq.CreateProjectRequest:
+    properties:
+      instruction:
+        type: string
+      name:
+        type: string
+    required:
+    - name
+    type: object
+  projectreq.UpdateProjectRequest:
+    properties:
+      instruction:
+        type: string
+      is_archived:
+        type: boolean
+      is_favorite:
+        type: boolean
+      name:
+        type: string
+    type: object
+  projectres.ProjectDeletedResponse:
+    properties:
+      deleted:
+        type: boolean
+      id:
+        type: string
+      object:
+        type: string
+    type: object
+  projectres.ProjectListResponse:
+    properties:
+      data:
+        items:
+          $ref: '#/definitions/projectres.ProjectResponse'
+        type: array
+      first_id:
+        type: string
+      has_more:
+        type: boolean
+      last_id:
+        type: string
+      next_cursor:
+        type: string
+      object:
+        type: string
+      total:
+        type: integer
+    type: object
+  projectres.ProjectResponse:
+    properties:
+      archived_at:
+        type: integer
+      created_at:
+        type: integer
+      id:
+        type: string
+      instruction:
+        type: string
+      is_archived:
+        type: boolean
+      is_favorite:
+        type: boolean
+      name:
+        type: string
+      object:
+        type: string
+      updated_at:
+        type: integer
+    type: object
+  requestmodels.AddProviderRequest:
+    properties:
+      active:
+        type: boolean
+      api_key:
+        type: string
+      base_url:
+        type: string
+      metadata:
+        additionalProperties:
+          type: string
+        type: object
+      name:
+        type: string
+      vendor:
+        type: string
+    required:
+    - base_url
+    - name
+    - vendor
+    type: object
+  requestmodels.BulkEnableModelsRequest:
+    properties:
+      enable:
+        description: 'Required: true to enable, false to disable'
+        type: boolean
+      except_models:
+        description: List of model keys to exclude
+        items:
+          type: string
+        type: array
+      provider_id:
+        description: 'Optional: filter by provider'
+        minLength: 1
+        type: string
+    required:
+    - enable
+    type: object
+  requestmodels.BulkToggleCatalogsRequest:
+    properties:
+      catalog_ids:
+        description: 'Optional: specific catalog public IDs. If empty, applies to
+          all catalogs'
+        items:
+          type: string
+        type: array
+      enable:
+        description: 'Required: true to enable, false to disable'
+        type: boolean
+      except_models:
+        description: List of model keys to exclude from the operation
+        items:
+          type: string
+        type: array
+    required:
+    - enable
+    type: object
+  requestmodels.UpdateModelCatalogRequest:
+    properties:
+      architecture:
+        $ref: '#/definitions/model.Architecture'
+      extras:
+        additionalProperties: {}
+        type: object
+      is_moderated:
+        type: boolean
+      notes:
+        type: string
+      supported_parameters:
+        $ref: '#/definitions/model.SupportedParameters'
+      tags:
+        items:
+          type: string
+        type: array
+    type: object
+  requestmodels.UpdateProviderModelRequest:
+    properties:
+      active:
+        type: boolean
+      display_name:
+        type: string
+      family:
+        type: string
+      pricing:
+        $ref: '#/definitions/model.Pricing'
+      supports_audio:
+        type: boolean
+      supports_embeddings:
+        type: boolean
+      supports_images:
+        type: boolean
+      supports_reasoning:
+        type: boolean
+      supports_video:
+        type: boolean
+      token_limits:
+        $ref: '#/definitions/model.TokenLimits'
+    type: object
+  requestmodels.UpdateProviderRequest:
+    properties:
+      active:
+        type: boolean
+      api_key:
+        type: string
+      base_url:
+        type: string
+      metadata:
+        additionalProperties:
+          type: string
+        type: object
+      name:
+        type: string
+    type: object
+  responses.ErrorResponse:
+    properties:
+      code:
+        description: UUID from PlatformError
+        type: string
+      error:
+        type: string
+      message:
+        type: string
+      request_id:
+        type: string
+    type: object
+info:
+  contact:
+    name: Jan Server Team
+    url: https://github.com/janhq/jan-server
+  description: OpenAI-compatible LLM API platform with enterprise authentication,
+    conversation management, and streaming support.
+  title: Jan Server LLM API
+  version: "2.0"
+paths:
+  /auth/api-keys:
+    get:
+      consumes:
+      - application/json
+      description: Returns all API keys created by the authenticated user. Key values
+        are not returned, only metadata.
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: List of API keys with metadata
+          schema:
+            type: object
+        "401":
+          description: Unauthorized - invalid or expired token
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: List user's API keys
+      tags:
+      - Authentication API
+    post:
+      consumes:
+      - application/json
+      description: Creates a new API key for the authenticated user. API keys provide
+        programmatic access without requiring user credentials.
+      parameters:
+      - description: API key creation request with name and optional scopes
+        in: body
+        name: request
+        required: true
+        schema:
+          type: object
+      produces:
+      - application/json
+      responses:
+        "201":
+          description: API key created successfully with key value
+          schema:
+            type: object
+        "400":
+          description: Invalid request - missing required fields
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - invalid or expired token
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Create API key
+      tags:
+      - Authentication API
+  /auth/api-keys/{id}:
+    delete:
+      consumes:
+      - application/json
+      description: Revokes and deletes an API key by ID. Deleted keys can no longer
+        be used for authentication.
+      parameters:
+      - description: API key ID
+        in: path
+        name: id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "204":
+          description: API key deleted successfully
+        "401":
+          description: Unauthorized - invalid or expired token
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: API key not found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Delete API key
+      tags:
+      - Authentication API
+  /auth/callback:
+    get:
+      consumes:
+      - application/json
+      description: Handles the OAuth2 callback from Keycloak, exchanges authorization
+        code for JWT tokens
+      parameters:
+      - description: Authorization code from Keycloak
+        in: query
+        name: code
+        required: true
+        type: string
+      - description: State parameter for CSRF protection
+        in: query
+        name: state
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: JWT tokens
+          schema:
+            properties:
+              access_token:
+                type: string
+              expires_in:
+                type: integer
+              refresh_token:
+                type: string
+              token_type:
+                type: string
+            type: object
+        "400":
+          description: Missing code or state
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Invalid state parameter
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Failed to exchange code for tokens
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      summary: Handle Keycloak OAuth2 callback
+      tags:
+      - Authentication API
+  /auth/guest-login:
+    post:
+      consumes:
+      - application/json
+      description: Creates a temporary guest user account and returns JWT tokens.
+        Guest users have limited access and can be upgraded to full accounts later.
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Guest user created with access and refresh tokens
+          schema:
+            type: object
+        "500":
+          description: Internal server error - failed to create guest user
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      summary: Create guest user account
+      tags:
+      - Authentication API
+  /auth/login:
+    get:
+      consumes:
+      - application/json
+      description: Returns the Keycloak authorization URL for frontend to redirect
+        users. Supports OAuth2 authorization code flow with PKCE.
+      parameters:
+      - description: URL to redirect after successful login
+        in: query
+        name: redirect_url
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Authorization URL and state parameter
+          schema:
+            properties:
+              authorization_url:
+                type: string
+              state:
+                type: string
+            type: object
+        "500":
+          description: Failed to initiate login
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      summary: Initiate Keycloak OAuth2 login
+      tags:
+      - Authentication API
+  /auth/logout:
+    get:
+      consumes:
+      - application/json
+      description: Revokes the current access token and clears authentication cookies.
+        After logout, the user must re-authenticate.
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully logged out
+          schema:
+            type: object
+        "401":
+          description: Unauthorized - invalid token
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Logout user
+      tags:
+      - Authentication API
+  /auth/me:
+    get:
+      consumes:
+      - application/json
+      description: Returns the authenticated user's profile information including
+        user ID, email, roles, and guest status.
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: User profile information
+          schema:
+            type: object
+        "401":
+          description: Unauthorized - invalid or expired token
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Get current user information
+      tags:
+      - Authentication API
+  /auth/refresh-token:
+    post:
+      consumes:
+      - application/json
+      description: Exchanges a valid refresh token for a new access token. Refresh
+        token must be provided in Authorization header or refresh_token cookie.
+      parameters:
+      - description: Refresh token (can also be in Authorization header)
+        in: body
+        name: refresh_token
+        schema:
+          type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: New access token and refresh token
+          schema:
+            type: object
+        "401":
+          description: Unauthorized - invalid or expired refresh token
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      summary: Refresh access token
+      tags:
+      - Authentication API
+  /auth/revoke:
+    post:
+      consumes:
+      - application/json
+      description: Revokes a refresh token to invalidate it
+      parameters:
+      - description: Token to revoke
+        in: body
+        name: request
+        required: true
+        schema:
+          properties:
+            refresh_token:
+              type: string
+          type: object
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Token revoked successfully
+          schema:
+            properties:
+              message:
+                type: string
+            type: object
+        "400":
+          description: Invalid request body
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Keycloak OAuth is not configured
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      summary: Revoke Keycloak refresh token
+      tags:
+      - Authentication API
+  /auth/upgrade:
+    post:
+      consumes:
+      - application/json
+      description: Converts a guest user account to a permanent account with email/password
+        credentials. Guest flag is removed and user gains full access.
+      parameters:
+      - description: Upgrade request with email and password
+        in: body
+        name: request
+        required: true
+        schema:
+          type: object
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Account upgraded successfully with new tokens
+          schema:
+            type: object
+        "400":
+          description: Invalid request - missing email or password
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - not a guest user or invalid token
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Upgrade guest to permanent account
+      tags:
+      - Authentication API
+  /auth/validate:
+    post:
+      consumes:
+      - application/json
+      description: Validates an access token against Keycloak's userinfo endpoint
+      parameters:
+      - description: Bearer token
+        in: header
+        name: Authorization
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Token is valid with user information
+          schema:
+            properties:
+              user_info:
+                type: object
+              valid:
+                type: boolean
+            type: object
+        "401":
+          description: Invalid or expired token
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Keycloak OAuth is not configured
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      summary: Validate Keycloak access token
+      tags:
+      - Authentication API
+  /auth/validate-api-key:
+    post:
+      consumes:
+      - application/json
+      description: Internal endpoint used by Kong API Gateway to validate API keys.
+        Not intended for direct client use.
+      parameters:
+      - description: API key validation request
+        in: body
+        name: request
+        required: true
+        schema:
+          type: object
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: API key is valid with user information
+          schema:
+            type: object
+        "401":
+          description: Invalid API key
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      summary: Validate API key (Kong Plugin)
+      tags:
+      - Authentication API
+  /v1/admin/models/catalogs:
+    get:
+      description: Retrieves a paginated list of model catalogs with optional filtering
+        and searching
+      parameters:
+      - description: 'Number of records to return (default: 20, max: 100)'
+        in: query
+        name: limit
+        type: integer
+      - description: Number of records to skip for pagination
+        in: query
+        name: offset
+        type: integer
+      - description: 'Sort order: asc or desc (default: desc)'
+        in: query
+        name: order
+        type: string
+      - description: 'Filter by status: init, filled, updated'
+        in: query
+        name: status
+        type: string
+      - description: Filter by moderation status
+        in: query
+        name: is_moderated
+        type: boolean
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: List of model catalogs
+          schema:
+            $ref: '#/definitions/modelresponses.ModelCatalogResponse'
+        "400":
+          description: Invalid query parameters
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: List all model catalogs
+      tags:
+      - Admin Model API
+  /v1/admin/models/catalogs/{model_public_id}:
+    get:
+      description: Retrieves detailed information about a model catalog entry by its
+        public ID (supports IDs with slashes)
+      parameters:
+      - description: Model Catalog Public ID (can contain slashes)
+        in: path
+        name: model_public_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Model catalog details
+          schema:
+            $ref: '#/definitions/modelresponses.ModelCatalogResponse'
+        "400":
+          description: Invalid request
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Model catalog not found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Get a model catalog entry
+      tags:
+      - Admin Model API
+    patch:
+      consumes:
+      - application/json
+      description: Updates metadata for a model catalog entry. Marks it as manually
+        updated to prevent auto-sync overwrites.
+      parameters:
+      - description: Model Catalog Public ID (can contain slashes)
+        in: path
+        name: model_public_id
+        required: true
+        type: string
+      - description: Update payload
+        in: body
+        name: payload
+        required: true
+        schema:
+          $ref: '#/definitions/requestmodels.UpdateModelCatalogRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Updated model catalog
+          schema:
+            $ref: '#/definitions/modelresponses.ModelCatalogResponse'
+        "400":
+          description: Invalid request payload
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Model catalog not found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Update a model catalog entry
+      tags:
+      - Admin Model API
+  /v1/admin/models/catalogs/bulk-toggle:
+    post:
+      consumes:
+      - application/json
+      description: Enable or disable provider models for specific catalogs or ALL
+        catalogs, with optional exception list. Supports "enable/disable all except"
+        patterns globally or scoped to catalogs.
+      parameters:
+      - description: Bulk toggle request. If catalog_ids is empty, applies to ALL
+          catalogs. Use except_models to exclude specific models.
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/requestmodels.BulkToggleCatalogsRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Bulk operation result with counts and status
+          schema:
+            $ref: '#/definitions/modelresponses.BulkOperationResponse'
+        "400":
+          description: Invalid request - exceeds limits or validation error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: One or more catalog IDs not found (when catalog_ids provided)
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error during bulk operation
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Bulk enable/disable provider models by catalog IDs or all catalogs
+      tags:
+      - Admin Model API
+  /v1/admin/models/provider-models:
+    get:
+      description: Retrieves a paginated list of provider models with optional filtering
+      parameters:
+      - description: 'Number of records to return (default: 20, max: 100)'
+        in: query
+        name: limit
+        type: integer
+      - description: Number of records to skip for pagination
+        in: query
+        name: offset
+        type: integer
+      - description: 'Sort order: asc or desc (default: desc)'
+        in: query
+        name: order
+        type: string
+      - description: Filter by provider public ID
+        in: query
+        name: provider_id
+        type: string
+      - description: Filter by model key
+        in: query
+        name: model_key
+        type: string
+      - description: Filter by active status
+        in: query
+        name: active
+        type: boolean
+      - description: Filter by image support
+        in: query
+        name: supports_images
+        type: boolean
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: List of provider models
+          schema:
+            $ref: '#/definitions/modelresponses.ProviderModelResponse'
+        "400":
+          description: Invalid query parameters
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: List all provider models
+      tags:
+      - Admin Model API
+  /v1/admin/models/provider-models/{provider_model_public_id}:
+    get:
+      description: Retrieves detailed information about a provider model by its public
+        ID
+      parameters:
+      - description: Provider Model Public ID
+        in: path
+        name: provider_model_public_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Provider model details
+          schema:
+            $ref: '#/definitions/modelresponses.ProviderModelResponse'
+        "400":
+          description: Invalid request
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Provider model not found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Get a provider model
+      tags:
+      - Admin Model API
+    patch:
+      consumes:
+      - application/json
+      description: Updates configuration for a provider model including pricing, limits,
+        and feature flags
+      parameters:
+      - description: Provider Model Public ID
+        in: path
+        name: provider_model_public_id
+        required: true
+        type: string
+      - description: Update payload
+        in: body
+        name: payload
+        required: true
+        schema:
+          $ref: '#/definitions/requestmodels.UpdateProviderModelRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Updated provider model
+          schema:
+            $ref: '#/definitions/modelresponses.ProviderModelResponse'
+        "400":
+          description: Invalid request payload
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Provider model not found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Update a provider model
+      tags:
+      - Admin Model API
+  /v1/admin/models/provider-models/bulk-toggle:
+    post:
+      consumes:
+      - application/json
+      description: 'Enables or disables provider models with flexible patterns: enable
+        all, disable all, enable all except, or disable all except. Optionally filter
+        by provider.'
+      parameters:
+      - description: Bulk toggle payload with enable flag, optional provider filter,
+          and exception list
+        in: body
+        name: payload
+        required: true
+        schema:
+          $ref: '#/definitions/requestmodels.BulkEnableModelsRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Bulk operation result with counts and status
+          schema:
+            $ref: '#/definitions/modelresponses.BulkOperationResponse'
+        "400":
+          description: Invalid request payload
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Bulk enable or disable provider models
+      tags:
+      - Admin Model API
+  /v1/admin/providers:
+    get:
+      description: Retrieves all providers with their model counts
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: List of providers with model counts
+          schema:
+            items:
+              $ref: '#/definitions/modelresponses.ProviderWithModelCountResponse'
+            type: array
+        "500":
+          description: Failed to retrieve providers
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Get all providers
+      tags:
+      - Admin Provider API
+    post:
+      consumes:
+      - application/json
+      description: Registers a new provider and synchronizes its available models.
+      parameters:
+      - description: Provider registration payload
+        in: body
+        name: payload
+        required: true
+        schema:
+          $ref: '#/definitions/requestmodels.AddProviderRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Registered provider with synced models
+          schema:
+            $ref: '#/definitions/modelresponses.ProviderWithModelsResponse'
+        "400":
+          description: Invalid request payload
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Failed to register provider
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Register a provider
+      tags:
+      - Admin Provider API
+  /v1/admin/providers/{provider_public_id}:
+    patch:
+      consumes:
+      - application/json
+      description: Updates an existing provider's configuration
+      parameters:
+      - description: Provider public ID
+        in: path
+        name: provider_public_id
+        required: true
+        type: string
+      - description: Provider update payload
+        in: body
+        name: payload
+        required: true
+        schema:
+          $ref: '#/definitions/requestmodels.UpdateProviderRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Updated provider
+          schema:
+            $ref: '#/definitions/modelresponses.ProviderResponse'
+        "400":
+          description: Invalid request payload
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Provider not found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Failed to update provider
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Update a provider
+      tags:
+      - Admin Provider API
+  /v1/chat/completions:
+    post:
+      consumes:
+      - application/json
+      description: |-
+        Generates a model response for the given chat conversation. This is a standard chat completion API that supports both streaming and non-streaming modes without conversation persistence.
+
+        **Streaming Mode (stream=true):**
+        - Returns Server-Sent Events (SSE) with real-time streaming
+        - Streams completion chunks directly from the inference model
+        - Final event contains "[DONE]" marker
+
+        **Non-Streaming Mode (stream=false or omitted):**
+        - Returns single JSON response with complete completion
+        - Standard OpenAI ChatCompletionResponse format
+
+        **Storage Options:**
+        - `store=true`: Persist the latest input message and assistant response to the active conversation
+        - `store_reasoning=true`: Additionally persist reasoning content provided by the model
+        - When `store` is omitted or false, the conversation remains read-only
+
+        **Features:**
+        - Supports all OpenAI ChatCompletionRequest parameters
+        - Optional conversation context for conversation persistence
+        - User authentication required
+        - Direct inference model integration
+      parameters:
+      - description: Chat completion request with streaming options and optional conversation
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/chatrequests.ChatCompletionRequest'
+      produces:
+      - application/json
+      - text/event-stream
+      responses:
+        "200":
+          description: 'Successful streaming response (when stream=true) - SSE format
+            with data: {json} events'
+          schema:
+            type: string
+        "400":
+          description: Invalid request payload, empty messages, or inference failure
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Create a chat completion
+      tags:
+      - Chat Completions API
+  /v1/conversations:
+    get:
+      description: List conversations for the authenticated user with optional referrer
+        filtering.
+      parameters:
+      - description: Referrer filter
+        in: query
+        name: referrer
+        type: string
+      - description: Maximum number of conversations to return
+        in: query
+        name: limit
+        type: integer
+      - description: Return conversations created after the given numeric ID
+        in: query
+        name: after
+        type: string
+      - description: Sort order (asc or desc)
+        in: query
+        name: order
+        type: string
+      - description: Set to 'all' to list conversations across the workspace (requires
+          elevated permissions)
+        in: query
+        name: scope
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully retrieved conversations
+          schema:
+            $ref: '#/definitions/conversationresponses.ConversationListResponse'
+        "400":
+          description: Invalid request parameters
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: List conversations
+      tags:
+      - Conversations API
+    post:
+      consumes:
+      - application/json
+      description: |-
+        Create a new conversation to store and retrieve conversation state across Response API calls
+
+        **Features:**
+        - Create conversation with optional metadata (max 16 key-value pairs)
+        - Add up to 20 initial items to the conversation
+        - Returns conversation ID with `conv_` prefix
+        - Supports OpenAI Conversations API format
+
+        **Metadata Constraints:**
+        - Maximum 16 key-value pairs
+        - Keys: max 64 characters
+        - Values: max 512 characters
+      parameters:
+      - description: Create conversation request with optional items and metadata
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/conversationrequests.CreateConversationRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully created conversation
+          schema:
+            $ref: '#/definitions/conversationresponses.ConversationResponse'
+        "400":
+          description: Invalid request - validation failed or too many items
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error - conversation creation failed
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Create a conversation
+      tags:
+      - Conversations API
+  /v1/conversations/{conv_public_id}:
+    delete:
+      description: |-
+        Delete a conversation (soft delete). Items in the conversation will not be deleted but will be inaccessible.
+
+        **Features:**
+        - Soft delete (conversation marked as deleted, not physically removed)
+        - Items remain in database but become inaccessible
+        - Automatic ownership verification
+        - Returns deletion confirmation with conversation ID
+
+        **Response:**
+        - `id`: Deleted conversation ID
+        - `object`: Always "conversation.deleted"
+        - `deleted`: Always true
+      parameters:
+      - description: 'Conversation ID (format: conv_xxxxx)'
+        in: path
+        name: conv_public_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully deleted conversation
+          schema:
+            $ref: '#/definitions/conversationresponses.ConversationDeletedResponse'
+        "400":
+          description: Invalid conversation ID format
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Conversation not found or access denied
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error - deletion failed
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Delete a conversation
+      tags:
+      - Conversations API
+    get:
+      description: |-
+        Retrieve a conversation by ID with ownership verification
+
+        **Features:**
+        - Retrieves conversation metadata including creation timestamp
+        - Automatic ownership verification (user can only access their own conversations)
+        - Returns OpenAI-compatible conversation object
+
+        **Response Fields:**
+        - `id`: Conversation ID with `conv_` prefix
+        - `object`: Always "conversation"
+        - `created_at`: Unix timestamp
+        - `metadata`: User-defined key-value pairs
+      parameters:
+      - description: 'Conversation ID (format: conv_xxxxx)'
+        in: path
+        name: conv_public_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully retrieved conversation
+          schema:
+            $ref: '#/definitions/conversationresponses.ConversationResponse'
+        "400":
+          description: Invalid conversation ID format
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Conversation not found or access denied
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Get a conversation
+      tags:
+      - Conversations API
+    post:
+      consumes:
+      - application/json
+      description: |-
+        Update a conversation's metadata while preserving existing items
+
+        **Features:**
+        - Update metadata key-value pairs
+        - Replaces entire metadata object (not merged)
+        - Items remain unchanged
+        - Automatic ownership verification
+
+        **Metadata Constraints:**
+        - Maximum 16 key-value pairs
+        - Keys: max 64 characters
+        - Values: max 512 characters
+      parameters:
+      - description: 'Conversation ID (format: conv_xxxxx)'
+        in: path
+        name: conv_public_id
+        required: true
+        type: string
+      - description: Update conversation request with new metadata
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/conversationrequests.UpdateConversationRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully updated conversation
+          schema:
+            $ref: '#/definitions/conversationresponses.ConversationResponse'
+        "400":
+          description: Invalid request - validation failed or invalid metadata
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Conversation not found or access denied
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error - update failed
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Update a conversation
+      tags:
+      - Conversations API
+  /v1/conversations/{conv_public_id}/items:
+    get:
+      description: |-
+        List all items in a conversation with cursor-based pagination support
+
+        **Features:**
+        - Cursor-based pagination using item IDs
+        - Configurable page size (1-100 items, default 20)
+        - Sort order control (ascending or descending)
+        - Optional include parameter for additional fields
+        - Returns paginated list with navigation cursors
+
+        **Pagination:**
+        - Use `after` cursor from previous response for next page
+        - `has_more` indicates if more items are available
+        - `first_id` and `last_id` provide cursor references
+
+        **Query Parameters:**
+        - `limit`: Number of items (1-100, default 20)
+        - `order`: Sort order ("asc" or "desc", default "desc")
+        - `after`: Item ID cursor for pagination
+        - `include`: Additional fields to include (optional)
+      parameters:
+      - description: 'Conversation ID (format: conv_xxxxx)'
+        in: path
+        name: conv_public_id
+        required: true
+        type: string
+      - description: Item ID cursor to list items after (pagination)
+        in: query
+        name: after
+        type: string
+      - default: 20
+        description: Number of items to return (1-100)
+        in: query
+        maximum: 100
+        minimum: 1
+        name: limit
+        type: integer
+      - default: desc
+        description: 'Sort order: asc or desc'
+        enum:
+        - asc
+        - desc
+        in: query
+        name: order
+        type: string
+      - collectionFormat: csv
+        description: Additional fields to include in response
+        in: query
+        items:
+          type: string
+        name: include
+        type: array
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully retrieved items list
+          schema:
+            $ref: '#/definitions/conversationresponses.ItemListResponse'
+        "400":
+          description: Invalid request - invalid parameters or conversation ID
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Conversation not found or access denied
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error - listing failed
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: List conversation items
+      tags:
+      - Conversations API
+    post:
+      consumes:
+      - application/json
+      description: |-
+        Add items to a conversation. You may add up to 20 items at a time.
+
+        **Features:**
+        - Bulk item creation (max 20 items per request)
+        - Automatic item ID generation with `msg_` prefix
+        - Items added to conversation's active branch (default: MAIN)
+        - Returns list of created items with generated IDs
+
+        **Item Types:**
+        - `message`: User or assistant messages
+        - `tool_call`: Tool/function call items
+        - `tool_response`: Tool/function response items
+        - Other OpenAI-compatible item types
+
+        **Constraints:**
+        - Maximum 20 items per request
+        - Each item must have valid type and content
+        - Items are immutable after creation
+      parameters:
+      - description: 'Conversation ID (format: conv_xxxxx)'
+        in: path
+        name: conv_public_id
+        required: true
+        type: string
+      - collectionFormat: csv
+        description: Additional fields to include in response
+        in: query
+        items:
+          type: string
+        name: include
+        type: array
+      - description: Create items request with array of items
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/conversationrequests.CreateItemsRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully created items
+          schema:
+            $ref: '#/definitions/conversationresponses.ConversationItemCreatedResponse'
+        "400":
+          description: Invalid request - too many items, invalid format, or validation
+            failed
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Conversation not found or access denied
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error - item creation failed
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Create conversation items
+      tags:
+      - Conversations API
+  /v1/conversations/{conv_public_id}/items/{item_id}:
+    delete:
+      description: |-
+        Delete an item from a conversation. The item will be removed from the conversation.
+
+        **Features:**
+        - Remove specific item from conversation
+        - Automatic ownership verification
+        - Returns updated conversation object after deletion
+        - Items are permanently removed (not soft delete)
+
+        **Important:**
+        - Deleting an item may affect conversation flow
+        - Item IDs are not reused after deletion
+        - Other items in conversation remain unchanged
+        - Consider creating a new branch instead of deleting items
+
+        **Response:**
+        Returns the conversation object (not the deleted item)
+      parameters:
+      - description: 'Conversation ID (format: conv_xxxxx)'
+        in: path
+        name: conv_public_id
+        required: true
+        type: string
+      - description: 'Item ID to delete (format: msg_xxxxx)'
+        in: path
+        name: item_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully deleted item, returns conversation
+          schema:
+            $ref: '#/definitions/conversationresponses.ConversationResponse'
+        "400":
+          description: Invalid conversation ID or item ID format
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Conversation or item not found, or access denied
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error - deletion failed
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Delete a conversation item
+      tags:
+      - Conversations API
+    get:
+      description: |-
+        Retrieve a single item from a conversation by item ID
+
+        **Features:**
+        - Retrieve specific item by ID
+        - Returns complete item with all content
+        - Automatic ownership verification via conversation
+        - Optional include parameter for additional fields
+
+        **Response Fields:**
+        - `id`: Item ID with `msg_` prefix
+        - `type`: Item type (message, tool_call, etc.)
+        - `role`: Role for message items (user, assistant)
+        - `content`: Item content array
+        - `status`: Item status (completed, incomplete, etc.)
+        - `created_at`: Unix timestamp
+      parameters:
+      - description: 'Conversation ID (format: conv_xxxxx)'
+        in: path
+        name: conv_public_id
+        required: true
+        type: string
+      - description: 'Item ID (format: msg_xxxxx)'
+        in: path
+        name: item_id
+        required: true
+        type: string
+      - collectionFormat: csv
+        description: Additional fields to include in response
+        in: query
+        items:
+          type: string
+        name: include
+        type: array
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Successfully retrieved item
+          schema:
+            $ref: '#/definitions/conversationresponses.ItemResponse'
+        "400":
+          description: Invalid conversation ID or item ID format
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized - missing or invalid authentication
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Conversation or item not found, or access denied
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Get a conversation item
+      tags:
+      - Conversations API
+  /v1/healthz:
+    get:
+      description: Returns the health status of the API server. Used by orchestrators
+        and monitoring systems.
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Health status OK
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Health check endpoint
+      tags:
+      - Server API
+  /v1/models:
+    get:
+      consumes:
+      - application/json
+      description: Retrieves a list of available models that can be used for chat
+        completions or other tasks. Returns either simple model list or detailed list
+        with provider metadata based on X-PROVIDER-DATA header.
+      parameters:
+      - description: Set to 'true' to include provider metadata in response
+        enum:
+        - "true"
+        - "false"
+        in: header
+        name: X-PROVIDER-DATA
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: List of models with provider metadata (when X-PROVIDER-DATA=true)
+          schema:
+            $ref: '#/definitions/modelresponses.ModelWithProviderResponseList'
+        "404":
+          description: Models or providers not found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Failed to retrieve models
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: List available models
+      tags:
+      - Chat Completions API
+  /v1/models/catalogs/{model_public_id}:
+    get:
+      description: Retrieves detailed information about a model catalog entry by its
+        public ID (supports IDs with slashes like openrouter/nova-lite-v1)
+      parameters:
+      - description: Model Catalog Public ID (can contain slashes)
+        in: path
+        name: model_public_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Model catalog details
+          schema:
+            $ref: '#/definitions/modelresponses.ModelCatalogResponse'
+        "400":
+          description: Invalid request
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Model catalog not found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Get a model catalog entry
+      tags:
+      - Model API
+  /v1/models/providers:
+    get:
+      consumes:
+      - application/json
+      description: Retrieves a list of available model providers that can be used
+        for inference.
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: List of providers
+          schema:
+            $ref: '#/definitions/modelresponses.ProviderResponseList'
+        "500":
+          description: Failed to retrieve providers
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: List model providers
+      tags:
+      - Model API
+  /v1/projects:
+    get:
+      description: List all projects for the authenticated user
+      parameters:
+      - description: Maximum number of projects to return
+        in: query
+        name: limit
+        type: integer
+      - description: Return projects after the given numeric ID
+        in: query
+        name: after
+        type: string
+      - description: Sort order (asc or desc)
+        in: query
+        name: order
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/projectres.ProjectListResponse'
+        "401":
+          description: Unauthorized
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal Server Error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: List projects
+      tags:
+      - Projects API
+    post:
+      consumes:
+      - application/json
+      description: Create a new project for grouping conversations
+      parameters:
+      - description: Create project request
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/projectreq.CreateProjectRequest'
+      produces:
+      - application/json
+      responses:
+        "201":
+          description: Created
+          schema:
+            $ref: '#/definitions/projectres.ProjectResponse'
+        "400":
+          description: Bad Request
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal Server Error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Create project
+      tags:
+      - Projects API
+  /v1/projects/{project_id}:
+    delete:
+      description: Soft-delete a project
+      parameters:
+      - description: Project ID
+        in: path
+        name: project_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/projectres.ProjectDeletedResponse'
+        "401":
+          description: Unauthorized
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Not Found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal Server Error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Delete project
+      tags:
+      - Projects API
+    get:
+      description: Get a single project by ID
+      parameters:
+      - description: Project ID
+        in: path
+        name: project_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/projectres.ProjectResponse'
+        "401":
+          description: Unauthorized
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Not Found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal Server Error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Get project
+      tags:
+      - Projects API
+    patch:
+      consumes:
+      - application/json
+      description: Update project name, instruction, or archived status
+      parameters:
+      - description: Project ID
+        in: path
+        name: project_id
+        required: true
+        type: string
+      - description: Update request
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/projectreq.UpdateProjectRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/projectres.ProjectResponse'
+        "400":
+          description: Bad Request
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "401":
+          description: Unauthorized
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "404":
+          description: Not Found
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal Server Error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      security:
+      - BearerAuth: []
+      summary: Update project
+      tags:
+      - Projects API
+  /v1/readyz:
+    get:
+      description: Returns the readiness status of the API server. Indicates if the
+        service is ready to accept traffic.
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Readiness status ready
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Readiness check endpoint
+      tags:
+      - Server API
+  /v1/version:
+    get:
+      description: Returns the current build version of the API server and environment
+        reload timestamp.
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: Version information including version number and environment
+            reload timestamp
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Get API build version
+      tags:
+      - Server API
+securityDefinitions:
+  BearerAuth:
+    description: Type "Bearer" followed by a space and JWT token.
+    in: header
+    name: Authorization
+    type: apiKey
+swagger: "2.0"
diff --git a/services/llm-api/go.mod b/services/llm-api/go.mod
new file mode 100644
index 00000000..3cd17b4f
--- /dev/null
+++ b/services/llm-api/go.mod
@@ -0,0 +1,115 @@
+module jan-server/services/llm-api
+
+go 1.25.0
+
+require (
+	github.com/MicahParks/keyfunc/v2 v2.1.0
+	github.com/caarlos0/env/v10 v10.0.0
+	github.com/gin-gonic/gin v1.10.0
+	github.com/golang-jwt/jwt/v5 v5.3.0
+	github.com/golang-migrate/migrate/v4 v4.17.1
+	github.com/google/uuid v1.6.0
+	github.com/google/wire v0.7.0
+	github.com/mileusna/crontab v1.2.0
+	github.com/rs/zerolog v1.31.0
+	github.com/sashabaranov/go-openai v1.41.2
+	github.com/shopspring/decimal v1.4.0
+	github.com/swaggo/files v1.0.1
+	github.com/swaggo/gin-swagger v1.6.0
+	github.com/swaggo/swag v1.8.12
+	go.opentelemetry.io/otel v1.24.0
+	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0
+	go.opentelemetry.io/otel/sdk v1.24.0
+	go.opentelemetry.io/otel/sdk/metric v1.24.0
+	go.opentelemetry.io/otel/trace v1.24.0
+	golang.org/x/sync v0.18.0
+	gopkg.in/yaml.v3 v3.0.1
+	gorm.io/datatypes v1.2.5
+	gorm.io/driver/postgres v1.5.7
+	gorm.io/gen v0.3.27
+	gorm.io/gorm v1.26.0
+	gorm.io/plugin/dbresolver v1.6.2
+	resty.dev/v3 v3.0.0-beta.3
+)
+
+require (
+	filippo.io/edwards25519 v1.1.0 // indirect
+	github.com/KyleBanks/depth v1.2.1 // indirect
+	github.com/PuerkitoBio/purell v1.1.1 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
+	github.com/bytedance/sonic v1.11.6 // indirect
+	github.com/bytedance/sonic/loader v0.1.1 // indirect
+	github.com/cenkalti/backoff/v4 v4.2.1 // indirect
+	github.com/cloudwego/base64x v0.1.4 // indirect
+	github.com/cloudwego/iasm v0.2.0 // indirect
+	github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d // indirect
+	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
+	github.com/ghodss/yaml v1.0.0 // indirect
+	github.com/gin-contrib/sse v0.1.0 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-openapi/jsonpointer v0.19.5 // indirect
+	github.com/go-openapi/jsonreference v0.19.6 // indirect
+	github.com/go-openapi/spec v0.20.4 // indirect
+	github.com/go-openapi/swag v0.19.15 // indirect
+	github.com/go-playground/locales v0.14.1 // indirect
+	github.com/go-playground/universal-translator v0.18.1 // indirect
+	github.com/go-playground/validator/v10 v10.20.0 // indirect
+	github.com/go-sql-driver/mysql v1.8.1 // indirect
+	github.com/goccy/go-json v0.10.2 // indirect
+	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/google/subcommands v1.2.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 // indirect
+	github.com/hashicorp/errwrap v1.1.0 // indirect
+	github.com/hashicorp/go-multierror v1.1.1 // indirect
+	github.com/jackc/pgpassfile v1.0.0 // indirect
+	github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 // indirect
+	github.com/jackc/pgx/v5 v5.5.5 // indirect
+	github.com/jackc/puddle/v2 v2.2.1 // indirect
+	github.com/jinzhu/inflection v1.0.0 // indirect
+	github.com/jinzhu/now v1.1.5 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
+	github.com/leodido/go-urn v1.4.0 // indirect
+	github.com/lib/pq v1.10.9 // indirect
+	github.com/mailru/easyjson v0.7.6 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/rogpeppe/go-internal v1.13.1 // indirect
+	github.com/russross/blackfriday/v2 v2.0.1 // indirect
+	github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect
+	github.com/stretchr/testify v1.10.0 // indirect
+	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
+	github.com/ugorji/go/codec v1.2.12 // indirect
+	github.com/urfave/cli/v2 v2.3.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0 // indirect
+	go.opentelemetry.io/otel/metric v1.38.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.1.0 // indirect
+	go.uber.org/atomic v1.7.0 // indirect
+	golang.org/x/arch v0.8.0 // indirect
+	golang.org/x/crypto v0.45.0 // indirect
+	golang.org/x/mod v0.29.0 // indirect
+	golang.org/x/net v0.47.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+	golang.org/x/tools v0.38.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20240102182953-50ed04b92917 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect
+	google.golang.org/grpc v1.61.1 // indirect
+	google.golang.org/protobuf v1.34.1 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gorm.io/driver/mysql v1.5.7 // indirect
+	gorm.io/hints v1.1.0 // indirect
+)
+
+replace go.opentelemetry.io/otel => go.opentelemetry.io/otel v1.24.0
+
+replace go.opentelemetry.io/otel/metric => go.opentelemetry.io/otel/metric v1.24.0
+
+replace go.opentelemetry.io/otel/trace => go.opentelemetry.io/otel/trace v1.24.0
diff --git a/services/llm-api/go.sum b/services/llm-api/go.sum
new file mode 100644
index 00000000..eed6ec04
--- /dev/null
+++ b/services/llm-api/go.sum
@@ -0,0 +1,343 @@
+filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
+filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
+github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
+github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
+github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
+github.com/MicahParks/keyfunc/v2 v2.1.0 h1:6ZXKb9Rp6qp1bDbJefnG7cTH8yMN1IC/4nf+GVjO99k=
+github.com/MicahParks/keyfunc/v2 v2.1.0/go.mod h1:rW42fi+xgLJ2FRRXAfNx9ZA8WpD4OeE/yHVMteCkw9k=
+github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
+github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
+github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
+github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
+github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
+github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
+github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
+github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
+github.com/caarlos0/env/v10 v10.0.0 h1:yIHUBZGsyqCnpTkbjk8asUlx6RFhhEs+h7TOBdgdzXA=
+github.com/caarlos0/env/v10 v10.0.0/go.mod h1:ZfulV76NvVPw3tm591U4SwL3Xx9ldzBP9aGxzeN7G18=
+github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
+github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
+github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
+github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
+github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
+github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
+github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dhui/dktest v0.4.1 h1:/w+IWuDXVymg3IrRJCHHOkMK10m9aNVMOyD0X12YVTg=
+github.com/dhui/dktest v0.4.1/go.mod h1:DdOqcUpL7vgyP4GlF3X3w7HbSlz8cEQzwewPveYEQbA=
+github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8=
+github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
+github.com/docker/docker v24.0.9+incompatible h1:HPGzNmwfLZWdxHqK9/II92pyi1EpYKsAqcl4G0Of9v0=
+github.com/docker/docker v24.0.9+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
+github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
+github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
+github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
+github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
+github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
+github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
+github.com/gin-contrib/gzip v0.0.6 h1:NjcunTcGAj5CO1gn4N8jHOSIeRFHIbn51z6K+xaN4d4=
+github.com/gin-contrib/gzip v0.0.6/go.mod h1:QOJlmV2xmayAjkNS2Y8NQsMneuRShOU/kjovCXNuzzk=
+github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
+github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
+github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
+github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
+github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
+github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
+github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
+github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
+github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
+github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
+github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
+github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
+github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
+github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
+github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
+github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
+github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
+github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
+github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
+github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
+github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
+github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
+github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
+github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
+github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
+github.com/golang-migrate/migrate/v4 v4.17.1 h1:4zQ6iqL6t6AiItphxJctQb3cFqWiSpMnX7wLTPnnYO4=
+github.com/golang-migrate/migrate/v4 v4.17.1/go.mod h1:m8hinFyWBn0SA4QKHuKh175Pm9wjmxj3S2Mia7dbXzM=
+github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA=
+github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
+github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A=
+github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
+github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/subcommands v1.2.0 h1:vWQspBTo2nEqTUFita5/KeEWlUL8kQObDFbub/EN9oE=
+github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4=
+github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 h1:Wqo399gCIufwto+VfwCSvsnfGpF/w5E9CNxSwbpD6No=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0/go.mod h1:qmOFXW2epJhM0qSnUUYpldc7gVz2KMQwJ/QYCDIa7XU=
+github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
+github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
+github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
+github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
+github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
+github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 h1:L0QtFUgDarD7Fpv9jeVMgy/+Ec0mtnmYuImjTz6dtDA=
+github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
+github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw=
+github.com/jackc/pgx/v5 v5.5.5/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
+github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
+github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
+github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
+github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
+github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
+github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
+github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
+github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
+github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
+github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
+github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
+github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
+github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-sqlite3 v1.14.8/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
+github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwpU1Y=
+github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
+github.com/microsoft/go-mssqldb v1.7.2 h1:CHkFJiObW7ItKTJfHo1QX7QBBD1iV+mn1eOyRP3b/PA=
+github.com/microsoft/go-mssqldb v1.7.2/go.mod h1:kOvZKUdrhhFQmxLZqbwUV0rHkNkZpthMITIb2Ko1IoA=
+github.com/mileusna/crontab v1.2.0 h1:x9ZmE2A4p6CDqMEGQ+GbqsNtnmbdmWMQYShdQu8LvrU=
+github.com/mileusna/crontab v1.2.0/go.mod h1:dbns64w/u3tUnGZGf8pAa76ZqOfeBX4olW4U1ZwExmc=
+github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
+github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
+github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
+github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
+github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
+github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM=
+github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
+github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
+github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
+github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
+github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/sashabaranov/go-openai v1.41.2 h1:vfPRBZNMpnqu8ELsclWcAvF19lDNgh1t6TVfFFOPiSM=
+github.com/sashabaranov/go-openai v1.41.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
+github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k=
+github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME=
+github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
+github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
+github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg=
+github.com/swaggo/gin-swagger v1.6.0 h1:y8sxvQ3E20/RCyrXeFfg60r6H0Z+SwpTjMYsMm+zy8M=
+github.com/swaggo/gin-swagger v1.6.0/go.mod h1:BG00cCEy294xtVpyIAHG6+e2Qzj/xKlRdOqDkvq0uzo=
+github.com/swaggo/swag v1.8.12 h1:pctzkNPu0AlQP2royqX3apjKCQonAnf7KGoxeO4y64w=
+github.com/swaggo/swag v1.8.12/go.mod h1:lNfm6Gg+oAq3zRJQNEMBE66LIJKM44mxFqhEEgy2its=
+github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
+github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
+github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
+github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
+github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo=
+go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo=
+go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0 h1:mM8nKi6/iFQ0iqst80wDHU2ge198Ye/TfN0WBS5U24Y=
+go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.24.0/go.mod h1:0PrIIzDteLSmNyxqcGYRL4mDIo8OTuBAOI/Bn1URxac=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0 h1:t6wl9SPayj+c7lEIFgm4ooDBZVb01IhLB4InpomhRw8=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0/go.mod h1:iSDOcsnSA5INXzZtwaBPrKp/lWu/V14Dd+llD0oI2EA=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0 h1:Xw8U6u2f8DK2XAkGRFV7BBLENgnTGX9i4rQRxJf+/vs=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0/go.mod h1:6KW1Fm6R/s6Z3PGXwSJN2K4eT6wQB3vXX6CVnYX9NmM=
+go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI=
+go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco=
+go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucgoDw=
+go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg=
+go.opentelemetry.io/otel/sdk/metric v1.24.0 h1:yyMQrPzF+k88/DbH7o4FMAs80puqd+9osbiBrJrz/w8=
+go.opentelemetry.io/otel/sdk/metric v1.24.0/go.mod h1:I6Y5FjH6rvEnTTAYQz3Mmv2kl6Ek5IIrmwTLqMrrOE0=
+go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI=
+go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU=
+go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI=
+go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY=
+go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
+go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
+golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
+golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
+golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/genproto v0.0.0-20231212172506-995d672761c0 h1:YJ5pD9rF8o9Qtta0Cmy9rdBwkSjrTCT6XTiUQVOtIos=
+google.golang.org/genproto v0.0.0-20231212172506-995d672761c0/go.mod h1:l/k7rMz0vFTBPy+tFSGvXEd3z+BcoG1k7EHbqm+YBsY=
+google.golang.org/genproto/googleapis/api v0.0.0-20240102182953-50ed04b92917 h1:rcS6EyEaoCO52hQDupoSfrxI3R6C2Tq741is7X8OvnM=
+google.golang.org/genproto/googleapis/api v0.0.0-20240102182953-50ed04b92917/go.mod h1:CmlNWB9lSezaYELKS5Ym1r44VrrbPUa7JTvw+6MbpJ0=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 h1:6G8oQ016D88m1xAKljMlBOOGWDZkes4kMhgGFlf8WcQ=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917/go.mod h1:xtjpI3tXFPP051KaWnhvxkiubL/6dJ18vLVf7q2pTOU=
+google.golang.org/grpc v1.61.1 h1:kLAiWrZs7YeDM6MumDe7m3y4aM6wacLzM1Y/wiLP9XY=
+google.golang.org/grpc v1.61.1/go.mod h1:VUbo7IFqmF1QtCAstipjG0GIoq49KvMe9+h1jFLBNJs=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
+google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gorm.io/datatypes v1.2.5 h1:9UogU3jkydFVW1bIVVeoYsTpLRgwDVW3rHfJG6/Ek9I=
+gorm.io/datatypes v1.2.5/go.mod h1:I5FUdlKpLb5PMqeMQhm30CQ6jXP8Rj89xkTeCSAaAD4=
+gorm.io/driver/mysql v1.5.7 h1:MndhOPYOfEp2rHKgkZIhJ16eVUIRf2HmzgoPmh7FCWo=
+gorm.io/driver/mysql v1.5.7/go.mod h1:sEtPWMiqiN1N1cMXoXmBbd8C6/l+TESwriotuRRpkDM=
+gorm.io/driver/postgres v1.5.7 h1:8ptbNJTDbEmhdr62uReG5BGkdQyeasu/FZHxI0IMGnM=
+gorm.io/driver/postgres v1.5.7/go.mod h1:3e019WlBaYI5o5LIdNV+LyxCMNtLOQETBXL2h4chKpA=
+gorm.io/driver/sqlite v1.1.6/go.mod h1:W8LmC/6UvVbHKah0+QOC7Ja66EaZXHwUTjgXY8YNWX8=
+gorm.io/driver/sqlite v1.4.3 h1:HBBcZSDnWi5BW3B3rwvVTc510KGkBkexlOg0QrmLUuU=
+gorm.io/driver/sqlite v1.4.3/go.mod h1:0Aq3iPO+v9ZKbcdiz8gLWRw5VOPcBOPUQJFLq5e2ecI=
+gorm.io/driver/sqlserver v1.5.4 h1:xA+Y1KDNspv79q43bPyjDMUgHoYHLhXYmdFcYPobg8g=
+gorm.io/driver/sqlserver v1.5.4/go.mod h1:+frZ/qYmuna11zHPlh5oc2O6ZA/lS88Keb0XSH1Zh/g=
+gorm.io/gen v0.3.27 h1:ziocAFLpE7e0g4Rum69pGfB9S6DweTxK8gAun7cU8as=
+gorm.io/gen v0.3.27/go.mod h1:9zquz2xD1f3Eb/eHq4oLn2z6vDVvQlCY5S3uMBLv4EA=
+gorm.io/gorm v1.21.15/go.mod h1:F+OptMscr0P2F2qU97WT1WimdH9GaQPoDW7AYd5i2Y0=
+gorm.io/gorm v1.22.2/go.mod h1:F+OptMscr0P2F2qU97WT1WimdH9GaQPoDW7AYd5i2Y0=
+gorm.io/gorm v1.25.7/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
+gorm.io/gorm v1.26.0 h1:9lqQVPG5aNNS6AyHdRiwScAVnXHg/L/Srzx55G5fOgs=
+gorm.io/gorm v1.26.0/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
+gorm.io/hints v1.1.0 h1:Lp4z3rxREufSdxn4qmkK3TLDltrM10FLTHiuqwDPvXw=
+gorm.io/hints v1.1.0/go.mod h1:lKQ0JjySsPBj3uslFzY3JhYDtqEwzm+G1hv8rWujB6Y=
+gorm.io/plugin/dbresolver v1.6.2 h1:F4b85TenghUeITqe3+epPSUtHH7RIk3fXr5l83DF8Pc=
+gorm.io/plugin/dbresolver v1.6.2/go.mod h1:tctw63jdrOezFR9HmrKnPkmig3m5Edem9fdxk9bQSzM=
+nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
+resty.dev/v3 v3.0.0-beta.3 h1:3kEwzEgCnnS6Ob4Emlk94t+I/gClyoah7SnNi67lt+E=
+resty.dev/v3 v3.0.0-beta.3/go.mod h1:OgkqiPvTDtOuV4MGZuUDhwOpkY8enjOsjjMzeOHefy4=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/services/llm-api/internal/config/config.go b/services/llm-api/internal/config/config.go
new file mode 100644
index 00000000..db26b3fb
--- /dev/null
+++ b/services/llm-api/internal/config/config.go
@@ -0,0 +1,268 @@
+package config
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/caarlos0/env/v10"
+)
+
+// Global singleton for backwards compatibility with envs package
+var globalConfig *Config
+
+// Config holds all environment backed configuration for llm-api.
+type Config struct {
+	// HTTP Server
+	HTTPPort    int `env:"HTTP_PORT" envDefault:"8080"`
+	MetricsPort int `env:"METRICS_PORT" envDefault:"9091"`
+
+	// Database - Read/Write Split (required, no defaults)
+	DBPostgresqlWriteDSN string `env:"DB_POSTGRESQL_WRITE_DSN,notEmpty"`
+	DBPostgresqlRead1DSN string `env:"DB_POSTGRESQL_READ1_DSN"` // Optional read replica
+
+	// Keycloak / Auth
+	KeycloakBaseURL     string        `env:"KEYCLOAK_BASE_URL,notEmpty"`
+	KeycloakPublicURL   string        `env:"KEYCLOAK_PUBLIC_URL"` // Browser-accessible URL (defaults to KeycloakBaseURL)
+	KeycloakRealm       string        `env:"KEYCLOAK_REALM" envDefault:"jan"`
+	BackendClientID     string        `env:"BACKEND_CLIENT_ID,notEmpty"`
+	BackendClientSecret string        `env:"BACKEND_CLIENT_SECRET,notEmpty"`
+	Client              string        `env:"CLIENT,notEmpty"`
+	OAuthRedirectURI    string        `env:"OAUTH_REDIRECT_URI,notEmpty"`
+	GuestRole           string        `env:"GUEST_ROLE" envDefault:"guest"`
+	KeycloakAdminUser   string        `env:"KEYCLOAK_ADMIN"`
+	KeycloakAdminPass   string        `env:"KEYCLOAK_ADMIN_PASSWORD"`
+	KeycloakAdminRealm  string        `env:"KEYCLOAK_ADMIN_REALM" envDefault:"master"`
+	KeycloakAdminClient string        `env:"KEYCLOAK_ADMIN_CLIENT_ID" envDefault:"admin-cli"`
+	KeycloakAdminSecret string        `env:"KEYCLOAK_ADMIN_CLIENT_SECRET"`
+	JWKSURL             string        `env:"JWKS_URL"`
+	OIDCDiscoveryURL    string        `env:"OIDC_DISCOVERY_URL"`
+	Issuer              string        `env:"ISSUER,notEmpty"`
+	Account             string        `env:"ACCOUNT,notEmpty"`
+	RefreshJWKSInterval time.Duration `env:"JWKS_REFRESH_INTERVAL" envDefault:"5m"`
+	AuthClockSkew       time.Duration `env:"AUTH_CLOCK_SKEW" envDefault:"60s"`
+
+	// API Keys
+	APIKeySecret     []byte        `env:"APIKEY_SECRET"`
+	APIKeyDefaultTTL time.Duration `env:"API_KEY_DEFAULT_TTL" envDefault:"2160h"` // 90 days
+	APIKeyMaxTTL     time.Duration `env:"API_KEY_MAX_TTL" envDefault:"2160h"`
+	APIKeyMaxPerUser int           `env:"API_KEY_MAX_PER_USER" envDefault:"5"`
+	APIKeyPrefix     string        `env:"API_KEY_PREFIX" envDefault:"sk_live"`
+	KongAdminURL     string        `env:"KONG_ADMIN_URL" envDefault:"http://kong:8001"`
+
+	// Model Provider
+	ModelProviderSecret       string                   `env:"MODEL_PROVIDER_SECRET" envDefault:"jan-model-provider-secret-2024"`
+	JanDefaultNodeSetup       bool                     `env:"JAN_DEFAULT_NODE_SETUP" envDefault:"true"`
+	JanDefaultNodeURL         string                   `env:"JAN_DEFAULT_NODE_URL" envDefault:"http://localhost:8101/v1"`
+	JanDefaultNodeAPIKey      string                   `env:"JAN_DEFAULT_NODE_API_KEY" envDefault:"changeme"`
+	JanProviderConfigsEnabled bool                     `env:"JAN_PROVIDER_CONFIGS" envDefault:"false"`
+	JanProviderConfigSet      string                   `env:"JAN_PROVIDER_CONFIG_SET" envDefault:"default"`
+	JanProviderConfigFile     string                   `env:"JAN_PROVIDER_CONFIGS_FILE"`
+	ProviderBootstrap         *ProviderBootstrapConfig `env:"-"`
+
+	// Model Sync
+	ModelSyncIntervalMinutes int  `env:"MODEL_SYNC_INTERVAL_MINUTES" envDefault:"60"`
+	ModelSyncEnabled         bool `env:"MODEL_SYNC_ENABLED" envDefault:"true"`
+
+	// Observability / Logging
+	HTTPTimeout      time.Duration `env:"HTTP_TIMEOUT" envDefault:"30s"`
+	OTLPEndpoint     string        `env:"OTEL_EXPORTER_OTLP_ENDPOINT"`
+	OTLPHeaders      string        `env:"OTEL_EXPORTER_OTLP_HEADERS"`
+	ServiceName      string        `env:"SERVICE_NAME" envDefault:"llm-api"`
+	ServiceNamespace string        `env:"SERVICE_NAMESPACE" envDefault:"jan"`
+	Environment      string        `env:"ENVIRONMENT" envDefault:"development"`
+	LogLevel         string        `env:"LOG_LEVEL" envDefault:"info"`
+	LogFormat        string        `env:"LOG_FORMAT" envDefault:"console"`
+
+	// Features
+	AutoMigrate   bool `env:"AUTO_MIGRATE" envDefault:"true"`
+	EnableSwagger bool `env:"ENABLE_SWAGGER" envDefault:"true"`
+
+	// Media integration
+	MediaResolveURL     string        `env:"MEDIA_RESOLVE_URL" envDefault:"http://kong:8000/media/v1/media/resolve"`
+	MediaResolveTimeout time.Duration `env:"MEDIA_RESOLVE_TIMEOUT" envDefault:"5s"`
+
+	// Prompt Orchestration
+	PromptOrchestrationEnabled         bool   `env:"PROMPT_ORCHESTRATION_ENABLED" envDefault:"false"`
+	PromptOrchestrationEnableMemory    bool   `env:"PROMPT_ORCHESTRATION_MEMORY" envDefault:"false"`
+	PromptOrchestrationEnableTemplates bool   `env:"PROMPT_ORCHESTRATION_TEMPLATES" envDefault:"false"`
+	PromptOrchestrationEnableTools     bool   `env:"PROMPT_ORCHESTRATION_TOOLS" envDefault:"false"`
+	PromptOrchestrationDefaultPersona  string `env:"PROMPT_ORCHESTRATION_PERSONA"`
+
+	// Memory integration
+	MemoryEnabled bool          `env:"MEMORY_ENABLED" envDefault:"false"`
+	MemoryBaseURL string        `env:"MEMORY_BASE_URL" envDefault:"http://memory-tools:8090"`
+	MemoryTimeout time.Duration `env:"MEMORY_TIMEOUT" envDefault:"5s"`
+
+	// Internal
+	EnvReloadedAt time.Time
+}
+
+// Load parses environment variables into Config and performs minimal validation.
+func Load() (*Config, error) {
+	cfg := &Config{}
+	if err := env.Parse(cfg); err != nil {
+		return nil, fmt.Errorf("parse env: %w", err)
+	}
+
+	cfg.JanProviderConfigSet = strings.TrimSpace(cfg.JanProviderConfigSet)
+	if cfg.JanProviderConfigSet == "" {
+		cfg.JanProviderConfigSet = "default"
+	}
+
+	// Default KeycloakPublicURL to KeycloakBaseURL if not set
+	if cfg.KeycloakPublicURL == "" {
+		cfg.KeycloakPublicURL = cfg.KeycloakBaseURL
+	}
+
+	if cfg.JanProviderConfigsEnabled {
+		configFile := strings.TrimSpace(cfg.JanProviderConfigFile)
+		if configFile == "" {
+			configFile = DefaultProviderConfigFile
+		}
+		bootstrap, err := LoadProviderBootstrapConfig(configFile)
+		if err != nil {
+			return nil, fmt.Errorf("load provider configs: %w", err)
+		}
+		cfg.ProviderBootstrap = bootstrap
+		if len(bootstrap.ProvidersForSet(cfg.JanProviderConfigSet)) == 0 {
+			return nil, fmt.Errorf("provider config set %q is missing or empty in %s", cfg.JanProviderConfigSet, configFile)
+		}
+	}
+
+	if cfg.JWKSURL == "" && cfg.OIDCDiscoveryURL == "" {
+		return nil, errors.New("either JWKS_URL or OIDC_DISCOVERY_URL must be provided")
+	}
+
+	if cfg.JWKSURL != "" {
+		if _, err := url.ParseRequestURI(cfg.JWKSURL); err != nil {
+			return nil, fmt.Errorf("invalid JWKS_URL: %w", err)
+		}
+	}
+
+	if cfg.OIDCDiscoveryURL != "" {
+		if _, err := url.ParseRequestURI(cfg.OIDCDiscoveryURL); err != nil {
+			return nil, fmt.Errorf("invalid OIDC_DISCOVERY_URL: %w", err)
+		}
+	}
+
+	if strings.TrimSpace(cfg.KongAdminURL) == "" {
+		return nil, errors.New("KONG_ADMIN_URL is required")
+	}
+	if _, err := url.ParseRequestURI(cfg.KongAdminURL); err != nil {
+		return nil, fmt.Errorf("invalid KONG_ADMIN_URL: %w", err)
+	}
+
+	if cfg.APIKeyDefaultTTL <= 0 {
+		return nil, errors.New("API_KEY_DEFAULT_TTL must be > 0")
+	}
+	if cfg.APIKeyMaxTTL < cfg.APIKeyDefaultTTL {
+		return nil, errors.New("API_KEY_MAX_TTL must be >= API_KEY_DEFAULT_TTL")
+	}
+	if cfg.APIKeyMaxPerUser <= 0 {
+		cfg.APIKeyMaxPerUser = 5
+	}
+	cfg.APIKeyPrefix = strings.TrimSpace(cfg.APIKeyPrefix)
+	if cfg.APIKeyPrefix == "" {
+		cfg.APIKeyPrefix = "sk_live"
+	}
+
+	if cfg.AuthClockSkew < 0 {
+		cfg.AuthClockSkew = cfg.AuthClockSkew * -1
+	}
+
+	if _, err := url.ParseRequestURI(cfg.KeycloakBaseURL); err != nil {
+		return nil, fmt.Errorf("invalid KEYCLOAK_BASE_URL: %w", err)
+	}
+
+	cfg.LogLevel = strings.ToLower(cfg.LogLevel)
+	cfg.LogFormat = strings.ToLower(cfg.LogFormat)
+	cfg.EnvReloadedAt = time.Now()
+
+	// Update global singletons for backwards compatibility
+	globalConfig = cfg
+
+	return cfg, nil
+} // ResolveJWKSURL returns the JWKS endpoint using either the explicit JWKS_URL or the OIDC discovery document.
+func (c *Config) ResolveJWKSURL(ctx context.Context) (string, error) {
+	if c.JWKSURL != "" {
+		return c.JWKSURL, nil
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.OIDCDiscoveryURL, nil)
+	if err != nil {
+		return "", fmt.Errorf("oidc discovery request: %w", err)
+	}
+
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return "", fmt.Errorf("fetch oidc discovery: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("oidc discovery unexpected status: %s", resp.Status)
+	}
+
+	var doc struct {
+		JWKSURL string `json:"jwks_uri"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
+		return "", fmt.Errorf("decode oidc discovery: %w", err)
+	}
+
+	if doc.JWKSURL == "" {
+		return "", errors.New("jwks_uri not found in discovery document")
+	}
+
+	return doc.JWKSURL, nil
+}
+
+// GetDatabaseWriteDSN returns the write database connection string.
+func (c *Config) GetDatabaseWriteDSN() string {
+	return c.DBPostgresqlWriteDSN
+}
+
+// GetDatabaseReadDSN returns the read database connection string.
+// If DB_POSTGRESQL_READ1_DSN is set, it returns that.
+// Otherwise, falls back to write DSN (no replica configured).
+func (c *Config) GetDatabaseReadDSN() string {
+	if c.DBPostgresqlRead1DSN != "" {
+		return c.DBPostgresqlRead1DSN
+	}
+	return c.GetDatabaseWriteDSN()
+}
+
+// GetGlobal returns the global config instance for backwards compatibility.
+// Deprecated: Use dependency injection with Load() instead.
+func GetGlobal() *Config {
+	return globalConfig
+}
+
+// GetEnvReloadedAt returns when the environment was last reloaded
+// Deprecated: Use GetGlobal().EnvReloadedAt instead
+func GetEnvReloadedAt() time.Time {
+	if globalConfig != nil {
+		return globalConfig.EnvReloadedAt
+	}
+	return time.Time{}
+}
+
+// ProviderBootstrapEntries returns the configured provider definitions for the active set.
+func (c *Config) ProviderBootstrapEntries() []ProviderBootstrapEntry {
+	if c == nil || c.ProviderBootstrap == nil {
+		return nil
+	}
+	return c.ProviderBootstrap.ProvidersForSet(c.JanProviderConfigSet)
+}
+
+var Version = "dev"
+
+func IsDev() bool {
+	return strings.HasPrefix(Version, "dev")
+}
diff --git a/services/llm-api/internal/config/provider_configs.go b/services/llm-api/internal/config/provider_configs.go
new file mode 100644
index 00000000..8bacb5b6
--- /dev/null
+++ b/services/llm-api/internal/config/provider_configs.go
@@ -0,0 +1,221 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"gopkg.in/yaml.v3"
+)
+
+const DefaultProviderConfigFile = "config/providers.yml"
+
+// ProviderBootstrapEntry describes a provider that should be bootstrapped on startup.
+type ProviderBootstrapEntry struct {
+	Name                string
+	Vendor              string
+	BaseURL             string
+	APIKey              string
+	Active              bool
+	Metadata            map[string]string
+	AutoEnableNewModels bool
+	SyncModels          bool
+}
+
+// ProviderBootstrapConfig maintains all configured provider sets.
+type ProviderBootstrapConfig struct {
+	sets map[string][]ProviderBootstrapEntry
+}
+
+// ProvidersForSet returns a copy of the providers defined for the requested set.
+func (c *ProviderBootstrapConfig) ProvidersForSet(name string) []ProviderBootstrapEntry {
+	if c == nil {
+		return nil
+	}
+	set := strings.TrimSpace(name)
+	if set == "" {
+		set = "default"
+	}
+	list := c.sets[set]
+	if len(list) == 0 {
+		return nil
+	}
+	result := make([]ProviderBootstrapEntry, len(list))
+	copy(result, list)
+	return result
+}
+
+// LoadProviderBootstrapConfig parses the yaml file at the provided path.
+func LoadProviderBootstrapConfig(path string) (*ProviderBootstrapConfig, error) {
+	if strings.TrimSpace(path) == "" {
+		return nil, errors.New("provider config path is empty!!!")
+	}
+
+	cleanPath := filepath.Clean(path)
+	data, err := os.ReadFile(cleanPath)
+	if err != nil {
+		if errors.Is(err, os.ErrNotExist) && !filepath.IsAbs(cleanPath) {
+			altPath := filepath.Clean(filepath.Join("services", "llm-api", cleanPath))
+			altData, altErr := os.ReadFile(altPath)
+			if altErr != nil {
+				return nil, fmt.Errorf("read provider config %q: %w", altPath, altErr)
+			}
+			data = altData
+			cleanPath = altPath
+		} else {
+			return nil, fmt.Errorf("read provider config %q: %w", cleanPath, err)
+		}
+	}
+
+	var doc providerConfigDocument
+	if err := yaml.Unmarshal(data, &doc); err != nil {
+		return nil, fmt.Errorf("parse provider config %q: %w", cleanPath, err)
+	}
+
+	if len(doc.Providers) == 0 {
+		return nil, fmt.Errorf("provider config %q has no providers defined", cleanPath)
+	}
+
+	result := &ProviderBootstrapConfig{
+		sets: make(map[string][]ProviderBootstrapEntry),
+	}
+
+	for rawSet, entries := range doc.Providers {
+		setName := strings.TrimSpace(rawSet)
+		if setName == "" || len(entries) == 0 {
+			continue
+		}
+		for idx, entry := range entries {
+			normalized, err := normalizeProviderEntry(entry)
+			if err != nil {
+				return nil, fmt.Errorf("providers.%s[%d]: %w", setName, idx, err)
+			}
+			result.sets[setName] = append(result.sets[setName], normalized)
+		}
+	}
+
+	if len(result.sets) == 0 {
+		return nil, fmt.Errorf("provider config %q has no valid provider entries", cleanPath)
+	}
+
+	return result, nil
+}
+
+type providerConfigDocument struct {
+	Providers map[string][]providerConfigEntry `yaml:"providers"`
+}
+
+type providerConfigEntry struct {
+	Name        string            `yaml:"name"`
+	Type        string            `yaml:"type"`
+	Vendor      string            `yaml:"vendor"`
+	URL         string            `yaml:"url"`
+	BaseURL     string            `yaml:"base_url"`
+	APIKey      string            `yaml:"api_key"`
+	Key         string            `yaml:"key"`
+	Active      *bool             `yaml:"active"`
+	Description string            `yaml:"description"`
+	Metadata    map[string]string `yaml:"metadata"`
+	AutoEnable  *bool             `yaml:"auto_enable_new_models"`
+	SyncModels  *bool             `yaml:"sync_models"`
+}
+
+func normalizeProviderEntry(entry providerConfigEntry) (ProviderBootstrapEntry, error) {
+	vendor := firstNonEmpty(entry.Type, entry.Vendor)
+	vendor = strings.TrimSpace(vendor)
+	if vendor == "" {
+		return ProviderBootstrapEntry{}, errors.New("provider type is required")
+	}
+
+	baseURL := firstNonEmpty(entry.URL, entry.BaseURL)
+	baseURL = strings.TrimSpace(os.ExpandEnv(baseURL))
+	if baseURL == "" {
+		return ProviderBootstrapEntry{}, errors.New("provider url is required")
+	}
+
+	name := strings.TrimSpace(entry.Name)
+	if name == "" {
+		name = fmt.Sprintf("%s Provider", strings.ToUpper(vendor))
+	}
+	name = os.ExpandEnv(name)
+
+	apiKey := strings.TrimSpace(firstNonEmpty(entry.APIKey, entry.Key))
+	if apiKey != "" {
+		apiKey = os.ExpandEnv(apiKey)
+	}
+
+	active := true
+	if entry.Active != nil {
+		active = *entry.Active
+	}
+
+	autoEnable := true
+	if entry.AutoEnable != nil {
+		autoEnable = *entry.AutoEnable
+	}
+
+	syncModels := true
+	if entry.SyncModels != nil {
+		syncModels = *entry.SyncModels
+	}
+
+	metadata := cloneStringMap(entry.Metadata)
+	if desc := strings.TrimSpace(os.ExpandEnv(entry.Description)); desc != "" {
+		metadata = ensureStringMap(metadata)
+		metadata["description"] = desc
+	}
+	metadata = ensureStringMap(metadata)
+	metadata["auto_enable_new_models"] = strconv.FormatBool(autoEnable)
+	if len(metadata) == 0 {
+		metadata = nil
+	}
+
+	return ProviderBootstrapEntry{
+		Name:                name,
+		Vendor:              vendor,
+		BaseURL:             baseURL,
+		APIKey:              apiKey,
+		Active:              active,
+		Metadata:            metadata,
+		AutoEnableNewModels: autoEnable,
+		SyncModels:          syncModels,
+	}, nil
+}
+
+func firstNonEmpty(values ...string) string {
+	for _, v := range values {
+		if strings.TrimSpace(v) != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+func cloneStringMap(in map[string]string) map[string]string {
+	if len(in) == 0 {
+		return nil
+	}
+	out := make(map[string]string, len(in))
+	for k, v := range in {
+		key := strings.TrimSpace(k)
+		val := strings.TrimSpace(os.ExpandEnv(v))
+		if key == "" || val == "" {
+			continue
+		}
+		out[key] = val
+	}
+	if len(out) == 0 {
+		return nil
+	}
+	return out
+}
+
+func ensureStringMap(in map[string]string) map[string]string {
+	if in == nil {
+		return make(map[string]string)
+	}
+	return in
+}
diff --git a/services/llm-api/internal/domain/apikey/api_key.go b/services/llm-api/internal/domain/apikey/api_key.go
new file mode 100644
index 00000000..4b083ad3
--- /dev/null
+++ b/services/llm-api/internal/domain/apikey/api_key.go
@@ -0,0 +1,31 @@
+package apikey
+
+import (
+	"context"
+	"time"
+)
+
+// APIKey represents persistent metadata for an API key.
+type APIKey struct {
+	ID         string
+	UserID     uint
+	Name       string
+	Prefix     string
+	Suffix     string
+	Hash       string
+	ExpiresAt  time.Time
+	RevokedAt  *time.Time
+	LastUsedAt *time.Time
+	CreatedAt  time.Time
+	UpdatedAt  time.Time
+}
+
+// Repository defines storage operations for API keys.
+type Repository interface {
+	Create(ctx context.Context, key *APIKey) (*APIKey, error)
+	ListByUser(ctx context.Context, userID uint) ([]APIKey, error)
+	FindByID(ctx context.Context, id string) (*APIKey, error)
+	FindByHash(ctx context.Context, hash string) (*APIKey, error)
+	CountActiveByUser(ctx context.Context, userID uint) (int64, error)
+	MarkRevoked(ctx context.Context, id string, revokedAt time.Time) error
+}
diff --git a/services/llm-api/internal/domain/apikey/service.go b/services/llm-api/internal/domain/apikey/service.go
new file mode 100644
index 00000000..4f08ceb6
--- /dev/null
+++ b/services/llm-api/internal/domain/apikey/service.go
@@ -0,0 +1,301 @@
+package apikey
+
+import (
+	"context"
+	"crypto/rand"
+	"crypto/sha256"
+	"encoding/hex"
+	"errors"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/llm-api/internal/domain/user"
+	"jan-server/services/llm-api/internal/infrastructure/keycloak"
+)
+
+// ErrLimitExceeded indicates the user hit the maximum number of active API keys.
+var ErrLimitExceeded = errors.New("api key limit exceeded")
+
+// ErrNotFound indicates the API key does not exist or does not belong to user.
+var ErrNotFound = errors.New("api key not found")
+
+// Service orchestrates API key lifecycle operations.
+type Service struct {
+	repo       Repository
+	userRepo   user.Repository
+	keycloak   *keycloak.Client
+	logger     zerolog.Logger
+	defaultTTL time.Duration
+	maxTTL     time.Duration
+	maxPerUser int
+	keyPrefix  string
+}
+
+// Config configures the Service.
+type Config struct {
+	DefaultTTL time.Duration
+	MaxTTL     time.Duration
+	MaxPerUser int
+	KeyPrefix  string
+}
+
+// NewService constructs an API key service.
+func NewService(repo Repository, userRepo user.Repository, keycloakClient *keycloak.Client, cfg Config, logger zerolog.Logger) *Service {
+	return &Service{
+		repo:       repo,
+		userRepo:   userRepo,
+		keycloak:   keycloakClient,
+		logger:     logger.With().Str("component", "api-key-service").Logger(),
+		defaultTTL: cfg.DefaultTTL,
+		maxTTL:     cfg.MaxTTL,
+		maxPerUser: cfg.MaxPerUser,
+		keyPrefix:  cfg.KeyPrefix,
+	}
+}
+
+// CreateKey generates a new API key for the given user and persists metadata.
+func (s *Service) CreateKey(ctx context.Context, usr *user.User, name string, requestedTTL time.Duration) (*APIKey, string, error) {
+	if usr == nil || usr.ID == 0 {
+		return nil, "", fmt.Errorf("user is required")
+	}
+	name = strings.TrimSpace(name)
+	if name == "" {
+		return nil, "", fmt.Errorf("name is required")
+	}
+
+	count, err := s.repo.CountActiveByUser(ctx, usr.ID)
+	if err != nil {
+		return nil, "", err
+	}
+	if s.maxPerUser > 0 && count >= int64(s.maxPerUser) {
+		return nil, "", ErrLimitExceeded
+	}
+
+	ttl := s.defaultTTL
+	if requestedTTL > 0 && requestedTTL < s.maxTTL {
+		ttl = requestedTTL
+	} else if requestedTTL > s.maxTTL {
+		ttl = s.maxTTL
+	}
+	expiresAt := time.Now().Add(ttl)
+
+	rawKey, err := s.generateKeySecret()
+	if err != nil {
+		return nil, "", err
+	}
+	displaySuffix := ""
+	if len(rawKey) >= 4 {
+		displaySuffix = rawKey[len(rawKey)-4:]
+	}
+
+	// Hash the API key for storage
+	keyHash := hashKey(rawKey)
+
+	record := &APIKey{
+		ID:        uuid.NewString(),
+		UserID:    usr.ID,
+		Name:      name,
+		Prefix:    s.keyPrefix,
+		Suffix:    displaySuffix,
+		Hash:      keyHash,
+		ExpiresAt: expiresAt,
+		CreatedAt: time.Now(),
+		UpdatedAt: time.Now(),
+	}
+
+	persisted, err := s.repo.Create(ctx, record)
+	if err != nil {
+		return nil, "", err
+	}
+
+	// Store API key hash in Keycloak user attributes
+	if s.keycloak != nil {
+		if err := s.keycloak.StoreAPIKeyHash(ctx, usr.Subject, record.ID, keyHash); err != nil {
+			s.logger.Warn().Err(err).Str("user_id", usr.Subject).Msg("failed to store api key in keycloak")
+			// Continue - we have it in database
+		}
+	}
+
+	return persisted, rawKey, nil
+}
+
+// ListKeys returns API keys for the provided user.
+func (s *Service) ListKeys(ctx context.Context, userID uint) ([]APIKey, error) {
+	items, err := s.repo.ListByUser(ctx, userID)
+	if err != nil {
+		return nil, err
+	}
+	return items, nil
+}
+
+// RevokeKey marks the API key as revoked and removes it from Keycloak.
+func (s *Service) RevokeKey(ctx context.Context, usr *user.User, keyID string) error {
+	if usr == nil {
+		return fmt.Errorf("user is required")
+	}
+
+	key, err := s.repo.FindByID(ctx, keyID)
+	if err != nil {
+		return err
+	}
+	if key == nil || key.UserID != usr.ID {
+		return ErrNotFound
+	}
+
+	// Mark as revoked in database
+	if err := s.repo.MarkRevoked(ctx, key.ID, time.Now()); err != nil {
+		return fmt.Errorf("mark revoked: %w", err)
+	}
+
+	if s.keycloak != nil {
+		if usr.Subject != "" {
+			if err := s.keycloak.RemoveAPIKeyHash(ctx, usr.Subject, key.ID); err != nil {
+				s.logger.Warn().
+					Err(err).
+					Str("key_id", key.ID).
+					Str("user_subject", usr.Subject).
+					Msg("failed to remove api key hash from keycloak")
+			}
+		} else {
+			s.logger.Warn().
+				Str("key_id", key.ID).
+				Uint("user_id", usr.ID).
+				Msg("api key revoked but user subject missing; unable to remove from keycloak")
+		}
+	}
+
+	return nil
+}
+
+// ValidateAPIKey validates an API key using a hybrid approach:
+// 1. Fast database lookup to find the API key and user
+// 2. Verify key hasn't expired or been revoked
+// 3. Double-check user status in Keycloak (enabled, not deleted)
+// 4. Return user info if all checks pass
+func (s *Service) ValidateAPIKey(ctx context.Context, apiKey string) (*keycloak.APIKeyUserInfo, error) {
+	// Step 1: Fast database lookup
+	keyHash := hashKey(apiKey)
+
+	key, err := s.repo.FindByHash(ctx, keyHash)
+	if err != nil {
+		return nil, fmt.Errorf("find api key: %w", err)
+	}
+	if key == nil {
+		s.logger.Debug().Str("key_hash_prefix", keyHash[:8]+"...").Msg("api key not found in database")
+		return nil, errors.New("invalid api key")
+	}
+
+	// Step 2: Check if revoked or expired (fast database checks)
+	if key.RevokedAt != nil {
+		s.logger.Debug().
+			Str("key_id", key.ID).
+			Time("revoked_at", *key.RevokedAt).
+			Msg("api key has been revoked")
+		return nil, errors.New("api key revoked")
+	}
+
+	if time.Now().After(key.ExpiresAt) {
+		s.logger.Debug().
+			Str("key_id", key.ID).
+			Time("expired_at", key.ExpiresAt).
+			Msg("api key has expired")
+		return nil, errors.New("api key expired")
+	}
+
+	// Step 3: Load user from database
+	usr, err := s.userRepo.FindByID(ctx, key.UserID)
+	if err != nil {
+		return nil, fmt.Errorf("find user: %w", err)
+	}
+	if usr == nil {
+		s.logger.Warn().
+			Uint("user_id", key.UserID).
+			Str("key_id", key.ID).
+			Msg("api key references non-existent user")
+		return nil, errors.New("user not found")
+	}
+
+	// Step 4: Double-check user status in Keycloak
+	// This ensures the user is still enabled and exists in Keycloak
+	if s.keycloak != nil && usr.Subject != "" {
+		keycloakUser, err := s.keycloak.GetUserBySubject(ctx, usr.Subject)
+		if err != nil {
+			s.logger.Error().
+				Err(err).
+				Str("subject", usr.Subject).
+				Str("key_id", key.ID).
+				Msg("failed to verify user in keycloak")
+			return nil, fmt.Errorf("verify user status: %w", err)
+		}
+
+		// Verify user is enabled in Keycloak
+		if !keycloakUser.Enabled {
+			s.logger.Warn().
+				Str("subject", usr.Subject).
+				Str("username", *usr.Username).
+				Str("key_id", key.ID).
+				Msg("user is disabled in keycloak")
+			return nil, errors.New("user account is disabled")
+		}
+
+		// Step 5: All checks passed - return user info with Keycloak roles
+		s.logger.Debug().
+			Str("key_id", key.ID).
+			Str("user_id", fmt.Sprintf("%d", usr.ID)).
+			Str("username", *usr.Username).
+			Msg("api key validated successfully")
+
+		return &keycloak.APIKeyUserInfo{
+			UserID:    fmt.Sprintf("%d", usr.ID),
+			Subject:   usr.Subject,
+			Username:  ptrToString(usr.Username),
+			Email:     ptrToString(usr.Email),
+			FirstName: keycloakUser.FirstName,
+			LastName:  keycloakUser.LastName,
+			Roles:     keycloakUser.Roles,
+		}, nil
+	}
+
+	// If Keycloak is not configured or user has no subject, return basic user info
+	s.logger.Debug().
+		Str("key_id", key.ID).
+		Str("user_id", fmt.Sprintf("%d", usr.ID)).
+		Msg("api key validated successfully (no keycloak verification)")
+
+	return &keycloak.APIKeyUserInfo{
+		UserID:   fmt.Sprintf("%d", usr.ID),
+		Subject:  usr.Subject,
+		Username: ptrToString(usr.Username),
+		Email:    ptrToString(usr.Email),
+		Roles:    []string{},
+	}, nil
+}
+
+func (s *Service) generateKeySecret() (string, error) {
+	buf := make([]byte, 24)
+	if _, err := rand.Read(buf); err != nil {
+		return "", fmt.Errorf("generate key: %w", err)
+	}
+	randomPart := hex.EncodeToString(buf)
+	prefix := strings.TrimSpace(s.keyPrefix)
+	if prefix == "" {
+		prefix = "sk"
+	}
+	return fmt.Sprintf("%s_%s", prefix, randomPart), nil
+}
+
+func hashKey(key string) string {
+	sum := sha256.Sum256([]byte(key))
+	return hex.EncodeToString(sum[:])
+}
+
+func ptrToString(s *string) string {
+	if s == nil {
+		return ""
+	}
+	return *s
+}
diff --git a/services/llm-api/internal/domain/conversation/conversation.go b/services/llm-api/internal/domain/conversation/conversation.go
new file mode 100644
index 00000000..f70de21c
--- /dev/null
+++ b/services/llm-api/internal/domain/conversation/conversation.go
@@ -0,0 +1,303 @@
+package conversation
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/query"
+)
+
+// ===============================================
+// Conversation Types
+// ===============================================
+
+type ConversationStatus string
+
+const (
+	ConversationStatusActive   ConversationStatus = "active"
+	ConversationStatusArchived ConversationStatus = "archived"
+	ConversationStatusDeleted  ConversationStatus = "deleted"
+)
+
+// ConversationBranch represents a specific flow/path in a conversation
+// Used to support editing items while maintaining conversation history
+const (
+	BranchMain = "MAIN" // Default main conversation flow
+)
+
+// Branch names for edited conversations follow pattern: "EDIT_1", "EDIT_2", etc.
+// Or custom names for specific purposes
+
+// ===============================================
+// Conversation Structure
+// ===============================================
+
+type Conversation struct {
+	ID              uint                      `json:"-"`
+	PublicID        string                    `json:"id"`     // OpenAI-compatible string ID like "conv_abc123"
+	Object          string                    `json:"object"` // Always "conversation" for OpenAI compatibility
+	Title           *string                   `json:"title,omitempty"`
+	UserID          uint                      `json:"-"`
+	ProjectID       *uint                     `json:"-"` // Optional project grouping
+	ProjectPublicID *string                   `json:"-"` // Public ID of the project
+	Status          ConversationStatus        `json:"status"`
+	Items           []Item                    `json:"items,omitempty"`           // Legacy: items without branch (defaults to MAIN)
+	Branches        map[string][]Item         `json:"branches,omitempty"`        // Branched items organized by branch name
+	ActiveBranch    string                    `json:"active_branch,omitempty"`   // Currently active branch (default: "MAIN")
+	BranchMetadata  map[string]BranchMetadata `json:"branch_metadata,omitempty"` // Metadata about each branch
+	Metadata        map[string]string         `json:"metadata,omitempty"`
+	Referrer        *string                   `json:"referrer,omitempty"`
+	IsPrivate       bool                      `json:"is_private"`
+
+	// Project instruction inheritance
+	InstructionVersion           int     `json:"instruction_version"`                      // Version of project instruction when conversation was created
+	EffectiveInstructionSnapshot *string `json:"effective_instruction_snapshot,omitempty"` // Snapshot of merged instruction for reproducibility
+
+	CreatedAt time.Time `json:"created_at"` // Unix timestamp for OpenAI compatibility
+	UpdatedAt time.Time `json:"updated_at"` // Unix timestamp for OpenAI compatibility
+}
+
+// BranchMetadata contains information about a conversation branch
+type BranchMetadata struct {
+	Name             string     `json:"name"`                          // Branch identifier (MAIN, EDIT_1, etc.)
+	Description      *string    `json:"description,omitempty"`         // Optional description of this branch
+	ParentBranch     *string    `json:"parent_branch,omitempty"`       // Branch this was forked from
+	ForkedAt         *time.Time `json:"forked_at,omitempty"`           // When this branch was created
+	ForkedFromItemID *string    `json:"forked_from_item_id,omitempty"` // Item ID where fork occurred
+	ItemCount        int        `json:"item_count"`                    // Number of items in this branch
+	CreatedAt        time.Time  `json:"created_at"`                    // Branch creation time
+	UpdatedAt        time.Time  `json:"updated_at"`                    // Last update time
+}
+
+// ===============================================
+// Conversation Repository
+// ===============================================
+
+type ConversationFilter struct {
+	ID        *uint
+	PublicID  *string
+	UserID    *uint
+	ProjectID *uint
+	Referrer  *string
+}
+
+type ConversationRepository interface {
+	Create(ctx context.Context, conversation *Conversation) error
+	FindByFilter(ctx context.Context, filter ConversationFilter, pagination *query.Pagination) ([]*Conversation, error)
+	Count(ctx context.Context, filter ConversationFilter) (int64, error)
+	FindByID(ctx context.Context, id uint) (*Conversation, error)
+	FindByPublicID(ctx context.Context, publicID string) (*Conversation, error)
+	Update(ctx context.Context, conversation *Conversation) error
+	Delete(ctx context.Context, id uint) error
+
+	// Item operations (legacy - assumes MAIN branch)
+	AddItem(ctx context.Context, conversationID uint, item *Item) error
+	SearchItems(ctx context.Context, conversationID uint, query string) ([]*Item, error) // TODO: Implement search functionality
+	BulkAddItems(ctx context.Context, conversationID uint, items []*Item) error
+	GetItemByID(ctx context.Context, conversationID uint, itemID uint) (*Item, error)
+	GetItemByPublicID(ctx context.Context, conversationID uint, publicID string) (*Item, error)
+	DeleteItem(ctx context.Context, conversationID uint, itemID uint) error
+	CountItems(ctx context.Context, conversationID uint, branchName string) (int, error)
+
+	// Branch operations - TODO: Implement branching UI and endpoints
+	CreateBranch(ctx context.Context, conversationID uint, branchName string, metadata *BranchMetadata) error
+	GetBranch(ctx context.Context, conversationID uint, branchName string) (*BranchMetadata, error)
+	ListBranches(ctx context.Context, conversationID uint) ([]*BranchMetadata, error)
+	DeleteBranch(ctx context.Context, conversationID uint, branchName string) error
+	SetActiveBranch(ctx context.Context, conversationID uint, branchName string) error
+
+	// Branch item operations
+	AddItemToBranch(ctx context.Context, conversationID uint, branchName string, item *Item) error
+	GetBranchItems(ctx context.Context, conversationID uint, branchName string, pagination *query.Pagination) ([]*Item, error)
+	BulkAddItemsToBranch(ctx context.Context, conversationID uint, branchName string, items []*Item) error
+
+	// Fork operation - creates a new branch from an existing branch at a specific item
+	// TODO: Implement forking functionality for conversation editing
+	ForkBranch(ctx context.Context, conversationID uint, sourceBranch, newBranch string, fromItemID string, description *string) error
+
+	// Item rating operations - TODO: Implement item rating/feedback system
+	RateItem(ctx context.Context, conversationID uint, itemID string, rating ItemRating, comment *string) error
+	GetItemRating(ctx context.Context, conversationID uint, itemID string) (*ItemRating, error)
+	RemoveItemRating(ctx context.Context, conversationID uint, itemID string) error
+}
+
+// ===============================================
+// Conversation Factory Functions
+// ===============================================
+
+// NewConversation creates a new conversation with the given parameters
+func NewConversation(publicID string, userID uint, title *string, metadata map[string]string) *Conversation {
+	return NewConversationWithProject(publicID, userID, title, metadata, nil)
+}
+
+// NewConversationWithProject creates a new conversation with project association
+func NewConversationWithProject(publicID string, userID uint, title *string, metadata map[string]string, projectID *uint) *Conversation {
+	now := time.Now()
+
+	// Initialize metadata if nil
+	if metadata == nil {
+		metadata = make(map[string]string)
+	}
+
+	conv := &Conversation{
+		PublicID:                     publicID,
+		Object:                       "conversation",
+		Title:                        title,
+		UserID:                       userID,
+		ProjectID:                    projectID,
+		Status:                       ConversationStatusActive,
+		ActiveBranch:                 BranchMain,
+		Branches:                     make(map[string][]Item),
+		BranchMetadata:               make(map[string]BranchMetadata),
+		Metadata:                     metadata,
+		IsPrivate:                    false,
+		InstructionVersion:           1,
+		EffectiveInstructionSnapshot: nil,
+		CreatedAt:                    now,
+		UpdatedAt:                    now,
+	}
+
+	// Initialize MAIN branch metadata
+	conv.BranchMetadata[BranchMain] = BranchMetadata{
+		Name:             BranchMain,
+		Description:      nil,
+		ParentBranch:     nil,
+		ForkedAt:         nil,
+		ForkedFromItemID: nil,
+		ItemCount:        0,
+		CreatedAt:        now,
+		UpdatedAt:        now,
+	}
+
+	return conv
+}
+
+// GetActiveBranchItems returns items from the currently active branch
+// TODO: Currently unused - will be needed when implementing conversation branching UI
+func (c *Conversation) GetActiveBranchItems() []Item {
+	if c.Branches != nil {
+		if items, exists := c.Branches[c.ActiveBranch]; exists {
+			return items
+		}
+	}
+	// Fallback to legacy Items field
+	return c.Items
+}
+
+// GetBranchItems returns items from a specific branch
+func (c *Conversation) GetBranchItems(branchName string) []Item {
+	if c.Branches != nil {
+		if items, exists := c.Branches[branchName]; exists {
+			return items
+		}
+	}
+	// If requesting MAIN and Branches is empty, return legacy Items
+	if branchName == BranchMain {
+		return c.Items
+	}
+	return []Item{}
+}
+
+// AddItemToActiveBranch adds an item to the currently active branch
+// TODO: Currently unused - will be needed when implementing conversation branching UI
+func (c *Conversation) AddItemToActiveBranch(item Item) {
+	if c.Branches == nil {
+		c.Branches = make(map[string][]Item)
+	}
+
+	// Set branch on item
+	item.Branch = c.ActiveBranch
+	item.SequenceNumber = len(c.Branches[c.ActiveBranch])
+
+	c.Branches[c.ActiveBranch] = append(c.Branches[c.ActiveBranch], item)
+
+	// Update branch metadata
+	if c.BranchMetadata != nil {
+		if meta, exists := c.BranchMetadata[c.ActiveBranch]; exists {
+			meta.ItemCount++
+			meta.UpdatedAt = time.Now()
+			c.BranchMetadata[c.ActiveBranch] = meta
+		}
+	}
+}
+
+// SwitchBranch changes the active branch
+// TODO: Currently unused - will be needed when implementing conversation branching UI
+func (c *Conversation) SwitchBranch(branchName string) error {
+	// Check if branch exists
+	if c.BranchMetadata != nil {
+		if _, exists := c.BranchMetadata[branchName]; !exists {
+			return fmt.Errorf("branch not found: %s", branchName)
+		}
+	}
+	c.ActiveBranch = branchName
+	return nil
+}
+
+// CreateBranch creates a new branch (fork) from an existing branch
+// TODO: Currently unused - will be needed when implementing conversation branching UI
+func (c *Conversation) CreateBranch(newBranchName, sourceBranch, fromItemID string, description *string) error {
+	if c.Branches == nil {
+		c.Branches = make(map[string][]Item)
+	}
+	if c.BranchMetadata == nil {
+		c.BranchMetadata = make(map[string]BranchMetadata)
+	}
+
+	// Check if branch already exists
+	if _, exists := c.BranchMetadata[newBranchName]; exists {
+		return fmt.Errorf("branch already exists: %s", newBranchName)
+	}
+
+	// Get source branch items
+	sourceItems := c.GetBranchItems(sourceBranch)
+
+	// Find the fork point
+	forkIndex := -1
+	for i, item := range sourceItems {
+		if item.PublicID == fromItemID {
+			forkIndex = i
+			break
+		}
+	}
+
+	if forkIndex == -1 && fromItemID != "" {
+		return fmt.Errorf("item not found: %s", fromItemID)
+	}
+
+	// Copy items up to fork point
+	var newBranchItems []Item
+	if forkIndex >= 0 {
+		newBranchItems = make([]Item, forkIndex+1)
+		for i := 0; i <= forkIndex; i++ {
+			item := sourceItems[i]
+			item.Branch = newBranchName
+			item.SequenceNumber = i
+			newBranchItems[i] = item
+		}
+	}
+
+	c.Branches[newBranchName] = newBranchItems
+
+	// Create branch metadata
+	now := time.Now()
+	c.BranchMetadata[newBranchName] = BranchMetadata{
+		Name:             newBranchName,
+		Description:      description,
+		ParentBranch:     &sourceBranch,
+		ForkedAt:         &now,
+		ForkedFromItemID: &fromItemID,
+		ItemCount:        len(newBranchItems),
+		CreatedAt:        now,
+		UpdatedAt:        now,
+	}
+
+	return nil
+}
+
+// GenerateEditBranchName generates a unique branch name for conversation edits
+// TODO: Currently unused - will be needed when implementing conversation branching UI
+func GenerateEditBranchName(conversationID uint) string {
+	return fmt.Sprintf("EDIT_%d_%d", conversationID, time.Now().Unix())
+}
diff --git a/services/llm-api/internal/domain/conversation/conversation_service.go b/services/llm-api/internal/domain/conversation/conversation_service.go
new file mode 100644
index 00000000..81a9db4e
--- /dev/null
+++ b/services/llm-api/internal/domain/conversation/conversation_service.go
@@ -0,0 +1,304 @@
+package conversation
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/utils/idgen"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+// ConversationService handles business logic for conversations
+type ConversationService struct {
+	repo      ConversationRepository
+	validator *ConversationValidator
+}
+
+// NewConversationService creates a new conversation service
+func NewConversationService(repo ConversationRepository) *ConversationService {
+	return &ConversationService{
+		repo:      repo,
+		validator: NewConversationValidator(nil), // Use default config
+	}
+}
+
+// ===============================================
+// Core CRUD Operations
+// ===============================================
+
+// CreateConversation creates a conversation (core function - direct repository call)
+func (s *ConversationService) CreateConversation(ctx context.Context, conv *Conversation) (*Conversation, error) {
+	// Validate conversation
+	if err := s.validator.ValidateConversation(conv); err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "conversation validation failed", err, "")
+	}
+
+	// Persist conversation
+	if err := s.repo.Create(ctx, conv); err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to create conversation")
+	}
+
+	return conv, nil
+}
+
+// GetConversationByPublicIDAndUserID retrieves a conversation by public ID and validates ownership (core function)
+func (s *ConversationService) GetConversationByPublicIDAndUserID(ctx context.Context, publicID string, userID uint) (*Conversation, error) {
+	// Validate conversation ID format
+	if err := s.validator.ValidateConversationID(publicID); err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "invalid conversation ID", err, "")
+	}
+
+	// Retrieve conversation
+	conversation, err := s.repo.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "conversation not found")
+	}
+
+	// Verify ownership
+	if conversation.UserID != userID {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeNotFound, "conversation not found", nil, "")
+	}
+
+	return conversation, nil
+}
+
+// UpdateConversation updates a conversation (core function - direct repository call)
+func (s *ConversationService) UpdateConversation(ctx context.Context, conv *Conversation) (*Conversation, error) {
+	// Validate updated conversation
+	if err := s.validator.ValidateConversation(conv); err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "conversation validation failed", err, "")
+	}
+
+	// Persist changes
+	if err := s.repo.Update(ctx, conv); err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to update conversation")
+	}
+
+	return conv, nil
+}
+
+// DeleteConversation deletes a conversation (core function - marks as deleted)
+func (s *ConversationService) DeleteConversation(ctx context.Context, conv *Conversation) error {
+	if err := s.repo.Delete(ctx, conv.ID); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to delete conversation")
+	}
+	return nil
+}
+
+// FindConversationsByFilter retrieves conversations using flexible filter criteria with pagination
+func (s *ConversationService) FindConversationsByFilter(ctx context.Context, filter ConversationFilter, pagination *query.Pagination) ([]*Conversation, int64, error) {
+	// Get conversations
+	conversations, err := s.repo.FindByFilter(ctx, filter, pagination)
+	if err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to list conversations")
+	}
+
+	// Get total count
+	total, err := s.repo.Count(ctx, filter)
+	if err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to count conversations")
+	}
+
+	return conversations, total, nil
+}
+
+// ===============================================
+// Business Logic Operations (High-level)
+// ===============================================
+
+// CreateConversationInput represents the input for creating a conversation
+type CreateConversationInput struct {
+	UserID          uint
+	Title           *string
+	Metadata        map[string]string
+	Referrer        *string
+	ProjectID       *uint
+	ProjectPublicID *string
+}
+
+// UpdateConversationInput represents the input for updating a conversation
+type UpdateConversationInput struct {
+	Title    *string
+	Metadata map[string]string
+	Referrer *string
+}
+
+// CreateConversationWithInput creates a new conversation with input validation
+func (s *ConversationService) CreateConversationWithInput(ctx context.Context, input CreateConversationInput) (*Conversation, error) {
+	// Generate public ID
+	publicID, err := idgen.GenerateSecureID("conv", 16)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to generate conversation ID")
+	}
+
+	// Create conversation entity
+	conversation := NewConversationWithProject(publicID, input.UserID, input.Title, input.Metadata, input.ProjectID)
+	conversation.Referrer = input.Referrer               // optional metadata
+	conversation.ProjectPublicID = input.ProjectPublicID // set project public ID
+
+	// Use core function to create conversation
+	return s.CreateConversation(ctx, conversation)
+}
+
+// UpdateConversationWithInput updates a conversation's properties with input validation
+func (s *ConversationService) UpdateConversationWithInput(ctx context.Context, userID uint, publicID string, input UpdateConversationInput) (*Conversation, error) {
+	// Retrieve and verify ownership
+	conversation, err := s.GetConversationByPublicIDAndUserID(ctx, publicID, userID)
+	if err != nil {
+		return nil, err
+	}
+
+	// Update fields
+	if input.Title != nil {
+		// Update the title field directly
+		conversation.Title = input.Title
+	}
+
+	if input.Metadata != nil {
+		// Replace metadata entirely (not merged)
+		conversation.Metadata = input.Metadata
+	}
+
+	if input.Referrer != nil {
+		conversation.Referrer = input.Referrer
+	}
+
+	// Use core function to update conversation
+	return s.UpdateConversation(ctx, conversation)
+}
+
+// DeleteConversationByID marks a conversation as deleted (soft delete)
+func (s *ConversationService) DeleteConversationByID(ctx context.Context, userID uint, publicID string) error {
+	// Retrieve and verify ownership
+	conversation, err := s.GetConversationByPublicIDAndUserID(ctx, publicID, userID)
+	if err != nil {
+		return err
+	}
+
+	// Use core function to delete conversation
+	return s.DeleteConversation(ctx, conversation)
+}
+
+// ===============================================
+// Item Management Methods
+// ===============================================
+
+// AddItemsToConversation adds multiple items to a conversation branch
+func (s *ConversationService) AddItemsToConversation(ctx context.Context, conv *Conversation, branchName string, items []Item) ([]Item, error) {
+	if len(items) == 0 {
+		return []Item{}, nil
+	}
+
+	// Validate branch exists (for now, only MAIN is supported)
+	if branchName != BranchMain {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeNotFound, fmt.Sprintf("branch not found: %s", branchName), nil, "")
+	}
+
+	// Get current item count to determine starting sequence number
+	currentCount, err := s.repo.CountItems(ctx, conv.ID, branchName)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to get item count")
+	}
+
+	// Generate IDs and assign sequence numbers for items
+	itemPtrs := make([]*Item, len(items))
+	for i := range items {
+		if items[i].PublicID == "" {
+			publicID, err := idgen.GenerateSecureID("msg", 16)
+			if err != nil {
+				return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to generate item ID")
+			}
+			items[i].PublicID = publicID
+		}
+		items[i].Object = "conversation.item"
+		items[i].ConversationID = conv.ID
+		items[i].Branch = branchName
+		// Assign sequence number: start from current count + 1, increment for each item
+		items[i].SequenceNumber = currentCount + i + 1
+		itemPtrs[i] = &items[i]
+	}
+
+	// Add items to repository
+	if branchName == BranchMain || branchName == "" {
+		if err := s.repo.BulkAddItems(ctx, conv.ID, itemPtrs); err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to add items")
+		}
+	} else {
+		if err := s.repo.BulkAddItemsToBranch(ctx, conv.ID, branchName, itemPtrs); err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to add items to branch")
+		}
+	}
+
+	// Update conversation's updated_at timestamp
+	if len(items) > 0 {
+		conv.UpdatedAt = items[len(items)-1].CreatedAt
+		if err := s.repo.Update(ctx, conv); err != nil {
+			// Log error but don't fail the operation
+			_ = err
+		}
+	}
+
+	return items, nil
+}
+
+// GetConversationItems retrieves items from a conversation branch with pagination
+func (s *ConversationService) GetConversationItems(ctx context.Context, conv *Conversation, branchName string, pagination *query.Pagination) ([]Item, error) {
+	// Get items from the branch with pagination applied at repository level
+	items, err := s.repo.GetBranchItems(ctx, conv.ID, branchName, pagination)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to get items")
+	}
+
+	return convertItemPtrsToItems(items), nil
+}
+
+// GetConversationItem retrieves a single item from a conversation
+func (s *ConversationService) GetConversationItem(ctx context.Context, conv *Conversation, itemPublicID string) (*Item, error) {
+	// Get the item directly by public ID from repository
+	item, err := s.repo.GetItemByPublicID(ctx, conv.ID, itemPublicID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "item not found")
+	}
+
+	return item, nil
+}
+
+// DeleteConversationItem deletes an item from a conversation
+func (s *ConversationService) DeleteConversationItem(ctx context.Context, conv *Conversation, itemPublicID string) error {
+	// Get the item to find its numeric ID
+	item, err := s.repo.GetItemByPublicID(ctx, conv.ID, itemPublicID)
+	if err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to get item for deletion")
+	}
+
+	// Delete the item from the database
+	if err := s.repo.DeleteItem(ctx, conv.ID, item.ID); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to delete item")
+	}
+
+	// Update conversation timestamp
+	conv.UpdatedAt = time.Now()
+	if err := s.repo.Update(ctx, conv); err != nil {
+		// Log error but don't fail the deletion
+		_ = err
+	}
+
+	return nil
+}
+
+// ===============================================
+// Helper Functions
+// ===============================================
+
+// convertItemPtrsToItems converts []*Item to []Item
+func convertItemPtrsToItems(itemPtrs []*Item) []Item {
+	items := make([]Item, len(itemPtrs))
+	for i, ptr := range itemPtrs {
+		if ptr != nil {
+			items[i] = *ptr
+		}
+	}
+	return items
+}
diff --git a/services/llm-api/internal/domain/conversation/conversation_validation.go b/services/llm-api/internal/domain/conversation/conversation_validation.go
new file mode 100644
index 00000000..617720f1
--- /dev/null
+++ b/services/llm-api/internal/domain/conversation/conversation_validation.go
@@ -0,0 +1,235 @@
+package conversation
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"unicode/utf8"
+
+	"jan-server/services/llm-api/internal/utils/idgen"
+)
+
+// ===============================================
+// Conversation Validation
+// ===============================================
+
+// ConversationValidationConfig holds conversation-level validation rules
+type ConversationValidationConfig struct {
+	MaxTitleLength          int
+	MaxMetadataKeys         int
+	MaxMetadataKeyLength    int
+	MaxMetadataValueLength  int
+	MaxItemsPerConversation int // TODO: Implement validation for maximum items in a conversation
+	MaxReferrerLength       int
+}
+
+// DefaultConversationValidationConfig returns OpenAI-aligned conversation validation rules
+func DefaultConversationValidationConfig() *ConversationValidationConfig {
+	return &ConversationValidationConfig{
+		MaxTitleLength:          256,  // OpenAI default
+		MaxMetadataKeys:         16,   // OpenAI default
+		MaxMetadataKeyLength:    64,   // OpenAI default
+		MaxMetadataValueLength:  512,  // OpenAI default
+		MaxItemsPerConversation: 1000, // Reasonable conversation size limit
+		MaxReferrerLength:       64,
+	}
+}
+
+// ConversationValidator handles conversation-level validation
+type ConversationValidator struct {
+	config             *ConversationValidationConfig
+	metadataKeyPattern *regexp.Regexp
+}
+
+// NewConversationValidator creates a validator for conversations
+func NewConversationValidator(config *ConversationValidationConfig) *ConversationValidator {
+	if config == nil {
+		config = DefaultConversationValidationConfig()
+	}
+
+	return &ConversationValidator{
+		config:             config,
+		metadataKeyPattern: regexp.MustCompile(`^[a-zA-Z0-9_]+$`),
+	}
+}
+
+// ValidateConversation performs full conversation validation
+func (v *ConversationValidator) ValidateConversation(conv *Conversation) error {
+	if conv == nil {
+		return fmt.Errorf("conversation cannot be nil")
+	}
+
+	// Validate PublicID format
+	if conv.PublicID != "" {
+		if err := v.ValidateConversationID(conv.PublicID); err != nil {
+			return fmt.Errorf("invalid conversation ID: %w", err)
+		}
+	}
+
+	// Validate title
+	if conv.Title != nil {
+		if err := v.validateTitle(*conv.Title); err != nil {
+			return fmt.Errorf("invalid title: %w", err)
+		}
+	}
+
+	// Validate metadata
+	if conv.Metadata != nil {
+		if err := v.validateMetadata(conv.Metadata); err != nil {
+			return fmt.Errorf("invalid metadata: %w", err)
+		}
+	}
+
+	if conv.Referrer != nil {
+		if err := v.validateReferrer(*conv.Referrer); err != nil {
+			return fmt.Errorf("invalid referrer: %w", err)
+		}
+	}
+
+	// Validate status
+	if conv.Status != "" {
+		if err := v.validateStatus(conv.Status); err != nil {
+			return fmt.Errorf("invalid status: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// validateReferrer validates the referrer value (internal use only)
+func (v *ConversationValidator) validateReferrer(referrer string) error {
+	referrer = strings.TrimSpace(referrer)
+	if referrer == "" {
+		return fmt.Errorf("referrer cannot be empty")
+	}
+
+	if utf8.RuneCountInString(referrer) > v.config.MaxReferrerLength {
+		return fmt.Errorf("referrer cannot exceed %d characters", v.config.MaxReferrerLength)
+	}
+
+	if strings.Contains(referrer, "\x00") {
+		return fmt.Errorf("referrer cannot contain null bytes")
+	}
+
+	return nil
+}
+
+// ValidateConversationID validates conversation ID format
+func (v *ConversationValidator) ValidateConversationID(id string) error {
+	if id == "" {
+		return fmt.Errorf("conversation ID cannot be empty")
+	}
+
+	// Must start with "conv_" prefix
+	if !strings.HasPrefix(id, "conv_") {
+		return fmt.Errorf("conversation ID must start with 'conv_' prefix")
+	}
+
+	// Use domain-specific ID validation
+	if !idgen.ValidateIDFormat(id, "conv") {
+		return fmt.Errorf("invalid conversation ID format")
+	}
+
+	return nil
+}
+
+// validateTitle validates conversation title (internal use only)
+func (v *ConversationValidator) validateTitle(title string) error {
+	// Title can be empty (optional field)
+	if title == "" {
+		return nil
+	}
+
+	// Check length (character count, not bytes)
+	length := utf8.RuneCountInString(title)
+	if length > v.config.MaxTitleLength {
+		return fmt.Errorf("title cannot exceed %d characters (got %d)", v.config.MaxTitleLength, length)
+	}
+
+	// Trim and check for only whitespace
+	trimmed := strings.TrimSpace(title)
+	if trimmed == "" {
+		return fmt.Errorf("title cannot be only whitespace")
+	}
+
+	// Check for null bytes (security)
+	if strings.Contains(title, "\x00") {
+		return fmt.Errorf("title cannot contain null bytes")
+	}
+
+	return nil
+}
+
+// validateMetadata validates conversation metadata (internal use only)
+func (v *ConversationValidator) validateMetadata(metadata map[string]string) error {
+	if metadata == nil {
+		return nil
+	}
+
+	// Check number of keys
+	if len(metadata) > v.config.MaxMetadataKeys {
+		return fmt.Errorf("metadata cannot have more than %d keys (got %d)", v.config.MaxMetadataKeys, len(metadata))
+	}
+
+	// Validate each key-value pair
+	for key, value := range metadata {
+		if err := v.validateMetadataKey(key); err != nil {
+			return fmt.Errorf("invalid metadata key '%s': %w", key, err)
+		}
+
+		if err := v.validateMetadataValue(key, value); err != nil {
+			return fmt.Errorf("invalid metadata value for key '%s': %w", key, err)
+		}
+	}
+
+	return nil
+}
+
+// validateStatus validates conversation status (internal use only)
+func (v *ConversationValidator) validateStatus(status ConversationStatus) error {
+	switch status {
+	case ConversationStatusActive, ConversationStatusArchived, ConversationStatusDeleted:
+		return nil
+	default:
+		return fmt.Errorf("invalid conversation status: %s (must be active, archived, or deleted)", status)
+	}
+}
+
+// Private helper methods
+
+func (v *ConversationValidator) validateMetadataKey(key string) error {
+	if key == "" {
+		return fmt.Errorf("metadata key cannot be empty")
+	}
+
+	length := len(key) // OpenAI uses byte length for keys
+	if length > v.config.MaxMetadataKeyLength {
+		return fmt.Errorf("metadata key cannot exceed %d bytes (got %d)", v.config.MaxMetadataKeyLength, length)
+	}
+
+	// OpenAI requires alphanumeric + underscore only
+	if !v.metadataKeyPattern.MatchString(key) {
+		return fmt.Errorf("metadata key must contain only alphanumeric characters and underscores")
+	}
+
+	// Cannot start with underscore (reserved for system metadata)
+	if strings.HasPrefix(key, "_") {
+		return fmt.Errorf("metadata key cannot start with underscore (reserved for system use)")
+	}
+
+	return nil
+}
+
+func (v *ConversationValidator) validateMetadataValue(key, value string) error {
+	length := utf8.RuneCountInString(value)
+	if length > v.config.MaxMetadataValueLength {
+		return fmt.Errorf("metadata value cannot exceed %d characters (got %d)", v.config.MaxMetadataValueLength, length)
+	}
+
+	// Check for null bytes (security)
+	if strings.Contains(value, "\x00") {
+		return fmt.Errorf("metadata value cannot contain null bytes")
+	}
+
+	return nil
+}
diff --git a/services/llm-api/internal/domain/conversation/item.go b/services/llm-api/internal/domain/conversation/item.go
new file mode 100644
index 00000000..d3b51d76
--- /dev/null
+++ b/services/llm-api/internal/domain/conversation/item.go
@@ -0,0 +1,806 @@
+package conversation
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/query"
+)
+
+// ===============================================
+// Item Types and Enums
+// ===============================================
+
+// @Enum(message, function_call, function_call_output, reasoning, file_search, web_search, code_interpreter, computer_use, custom_tool_call, mcp_item, image_generation)
+type ItemType string
+
+const (
+	ItemTypeMessage         ItemType = "message"
+	ItemTypeFunctionCall    ItemType = "function_call"
+	ItemTypeFunctionCallOut ItemType = "function_call_output"
+	ItemTypeReasoning       ItemType = "reasoning"        // For o1/reasoning models
+	ItemTypeFileSearch      ItemType = "file_search"      // RAG/retrieval operations
+	ItemTypeWebSearch       ItemType = "web_search"       // Web browsing operations
+	ItemTypeCodeInterpreter ItemType = "code_interpreter" // Code execution
+	ItemTypeComputerUse     ItemType = "computer_use"     // Computer interaction
+	ItemTypeCustomToolCall  ItemType = "custom_tool_call" // Custom tool invocations
+	ItemTypeMCPItem         ItemType = "mcp_item"         // Model Context Protocol items
+	ItemTypeImageGeneration ItemType = "image_generation" // DALL-E image generation
+)
+
+func ValidateItemType(input string) bool {
+	switch ItemType(input) {
+	case ItemTypeMessage, ItemTypeFunctionCall, ItemTypeFunctionCallOut,
+		ItemTypeReasoning, ItemTypeFileSearch, ItemTypeWebSearch,
+		ItemTypeCodeInterpreter, ItemTypeComputerUse, ItemTypeCustomToolCall,
+		ItemTypeMCPItem, ItemTypeImageGeneration:
+		return true
+	default:
+		return false
+	}
+}
+
+// @Enum(system, user, assistant, tool, developer, critic, discriminator, unknown)
+type ItemRole string
+
+const (
+	ItemRoleSystem        ItemRole = "system"
+	ItemRoleUser          ItemRole = "user"
+	ItemRoleAssistant     ItemRole = "assistant"
+	ItemRoleTool          ItemRole = "tool"
+	ItemRoleDeveloper     ItemRole = "developer"     // System-level instructions (OpenAI replacement for system)
+	ItemRoleCritic        ItemRole = "critic"        // For critique/evaluation workflows
+	ItemRoleDiscriminator ItemRole = "discriminator" // For adversarial/validation workflows
+	ItemRoleUnknown       ItemRole = "unknown"       // Fallback for unrecognized roles
+)
+
+func ValidateItemRole(input string) bool {
+	switch ItemRole(input) {
+	case ItemRoleSystem, ItemRoleUser, ItemRoleAssistant, ItemRoleTool,
+		ItemRoleDeveloper, ItemRoleCritic, ItemRoleDiscriminator, ItemRoleUnknown:
+		return true
+	default:
+		return false
+	}
+}
+
+// @Enum(incomplete, in_progress, completed, failed, cancelled, searching, generating, calling, streaming, rate_limited)
+type ItemStatus string
+
+const (
+	ItemStatusIncomplete  ItemStatus = "incomplete"   // Not started or partially complete (OpenAI uses this instead of "pending")
+	ItemStatusInProgress  ItemStatus = "in_progress"  // Currently processing
+	ItemStatusCompleted   ItemStatus = "completed"    // Successfully finished
+	ItemStatusFailed      ItemStatus = "failed"       // Failed with error
+	ItemStatusCancelled   ItemStatus = "cancelled"    // Cancelled by user or system
+	ItemStatusSearching   ItemStatus = "searching"    // File/web search in progress
+	ItemStatusGenerating  ItemStatus = "generating"   // Image generation in progress
+	ItemStatusCalling     ItemStatus = "calling"      // Function/tool call in progress
+	ItemStatusStreaming   ItemStatus = "streaming"    // Streaming response in progress
+	ItemStatusRateLimited ItemStatus = "rate_limited" // Rate limit hit
+)
+
+func ValidateItemStatus(input string) bool {
+	switch ItemStatus(input) {
+	case ItemStatusIncomplete, ItemStatusInProgress, ItemStatusCompleted,
+		ItemStatusFailed, ItemStatusCancelled, ItemStatusSearching,
+		ItemStatusGenerating, ItemStatusCalling, ItemStatusStreaming,
+		ItemStatusRateLimited:
+		return true
+	default:
+		return false
+	}
+}
+
+// ToItemStatusPtr returns a pointer to the given ItemStatus
+func ToItemStatusPtr(s ItemStatus) *ItemStatus {
+	return &s
+}
+
+// ItemStatusToStringPtr converts *ItemStatus to *string
+func ItemStatusToStringPtr(s *ItemStatus) *string {
+	if s == nil {
+		return nil
+	}
+	str := string(*s)
+	return &str
+}
+
+// ===============================================
+// Item Structures
+// ===============================================
+
+// BaseItem contains common fields for all item types
+type BaseItem struct {
+	ID                uint               `json:"-"`
+	ConversationID    uint               `json:"-"`
+	PublicID          string             `json:"id"`
+	Object            string             `json:"object"` // Always "conversation.item"
+	Type              ItemType           `json:"type"`
+	Status            *ItemStatus        `json:"status,omitempty"`
+	IncompleteAt      *time.Time         `json:"incomplete_at,omitempty"`
+	IncompleteDetails *IncompleteDetails `json:"incomplete_details,omitempty"`
+	CompletedAt       *time.Time         `json:"completed_at,omitempty"`
+	CreatedAt         time.Time          `json:"created_at"`
+}
+
+// MessageItem represents a message in the conversation
+type MessageItem struct {
+	BaseItem
+	Role    ItemRole  `json:"role"`
+	Content []Content `json:"content"`
+}
+
+// FunctionCallItem represents a function/tool call
+type FunctionCallItem struct {
+	BaseItem
+	CallID    string  `json:"call_id"`
+	Name      string  `json:"name"`
+	Arguments string  `json:"arguments"`
+	ToolType  *string `json:"tool_type,omitempty"` // "function", "file_search", "code_interpreter", etc.
+}
+
+// FunctionCallOutputItem represents the output of a function call
+type FunctionCallOutputItem struct {
+	BaseItem
+	CallID string `json:"call_id"`
+	Output string `json:"output"`
+}
+
+// ReasoningItem represents internal reasoning from models like o1
+type ReasoningItem struct {
+	BaseItem
+	Summary  string    `json:"summary"`
+	Thinking []Content `json:"thinking,omitempty"` // Internal reasoning steps
+}
+
+// FileSearchItem represents a file search operation
+type FileSearchItem struct {
+	BaseItem
+	Query    string             `json:"query"`
+	FileIDs  []string           `json:"file_ids,omitempty"`
+	Results  []FileSearchResult `json:"results,omitempty"`
+	Metadata map[string]string  `json:"metadata,omitempty"`
+}
+
+// FileSearchResult represents a single file search result
+type FileSearchResult struct {
+	FileID      string       `json:"file_id"`
+	Filename    string       `json:"filename"`
+	Score       float64      `json:"score"`
+	Content     string       `json:"content"`
+	PageNumber  *int         `json:"page_number,omitempty"`
+	Annotations []Annotation `json:"annotations,omitempty"`
+}
+
+// WebSearchItem represents a web search operation
+type WebSearchItem struct {
+	BaseItem
+	Query   string            `json:"query"`
+	Results []WebSearchResult `json:"results,omitempty"`
+}
+
+// WebSearchResult represents a single web search result
+type WebSearchResult struct {
+	Title   string   `json:"title"`
+	URL     string   `json:"url"`
+	Snippet string   `json:"snippet"`
+	Score   *float64 `json:"score,omitempty"`
+}
+
+// CodeInterpreterItem represents code execution
+type CodeInterpreterItem struct {
+	BaseItem
+	Language string         `json:"language"`
+	Code     string         `json:"code"`
+	Output   *string        `json:"output,omitempty"`
+	Error    *string        `json:"error,omitempty"`
+	ExitCode *int           `json:"exit_code,omitempty"`
+	Files    []string       `json:"files,omitempty"` // Generated file IDs
+	Metadata map[string]any `json:"metadata,omitempty"`
+}
+
+// ComputerUseItem represents computer interaction
+type ComputerUseItem struct {
+	BaseItem
+	Action     ComputerAction     `json:"action"`
+	Screenshot *ScreenshotContent `json:"screenshot,omitempty"`
+	Result     *string            `json:"result,omitempty"`
+	Error      *string            `json:"error,omitempty"`
+}
+
+// CustomToolCallItem represents a custom tool invocation
+type CustomToolCallItem struct {
+	BaseItem
+	ToolID   string         `json:"tool_id"`
+	ToolName string         `json:"tool_name"`
+	Input    map[string]any `json:"input"`
+	Output   map[string]any `json:"output,omitempty"`
+}
+
+// MCPItem represents a Model Context Protocol item
+type MCPItem struct {
+	BaseItem
+	Protocol string         `json:"protocol"`
+	Action   string         `json:"action"`
+	Data     map[string]any `json:"data"`
+}
+
+// ImageGenerationItem represents image generation (DALL-E)
+type ImageGenerationItem struct {
+	BaseItem
+	Prompt        string   `json:"prompt"`
+	Model         *string  `json:"model,omitempty"`
+	Size          *string  `json:"size,omitempty"`    // "256x256", "512x512", "1024x1024", etc.
+	Quality       *string  `json:"quality,omitempty"` // "standard", "hd"
+	Style         *string  `json:"style,omitempty"`   // "vivid", "natural"
+	ImageURLs     []string `json:"image_urls,omitempty"`
+	RevisedPrompt *string  `json:"revised_prompt,omitempty"`
+}
+
+// Item is the legacy/generic item structure for backward compatibility
+// New code should use the type-specific item structs above
+type Item struct {
+	ID                uint               `json:"-"`
+	ConversationID    uint               `json:"-"`
+	PublicID          string             `json:"id"`
+	Object            string             `json:"object"`                    // Always "conversation.item" for OpenAI compatibility
+	Branch            string             `json:"branch,omitempty"`          // Branch identifier (MAIN, EDIT_1, etc.)
+	SequenceNumber    int                `json:"sequence_number,omitempty"` // Order within branch
+	Type              ItemType           `json:"type"`
+	Role              *ItemRole          `json:"role,omitempty"`
+	Content           []Content          `json:"content,omitempty"`
+	Status            *ItemStatus        `json:"status,omitempty"`
+	IncompleteAt      *time.Time         `json:"incomplete_at,omitempty"`
+	IncompleteDetails *IncompleteDetails `json:"incomplete_details,omitempty"`
+	CompletedAt       *time.Time         `json:"completed_at,omitempty"`
+	ResponseID        *uint              `json:"-"`
+
+	// User feedback/rating
+	Rating        *ItemRating `json:"rating,omitempty"`         // Like/unlike rating
+	RatedAt       *time.Time  `json:"rated_at,omitempty"`       // When rating was given
+	RatingComment *string     `json:"rating_comment,omitempty"` // Optional comment with rating
+
+	CreatedAt time.Time `json:"created_at"`
+}
+
+// ===============================================
+// Rating Support
+// ===============================================
+
+// ItemRating represents like/unlike feedback on an item
+type ItemRating string
+
+const (
+	ItemRatingLike   ItemRating = "like"   // Positive feedback (like)
+	ItemRatingUnlike ItemRating = "unlike" // Negative feedback (unlike)
+)
+
+// Validate checks if the rating is valid
+func (r ItemRating) Validate() bool {
+	return r == ItemRatingLike || r == ItemRatingUnlike
+}
+
+// String returns the string representation
+func (r ItemRating) String() string {
+	return string(r)
+}
+
+// ToItemRatingPtr returns a pointer to the given ItemRating
+func ToItemRatingPtr(r ItemRating) *ItemRating {
+	return &r
+}
+
+// ParseItemRating converts a string to ItemRating
+func ParseItemRating(s string) (*ItemRating, error) {
+	rating := ItemRating(s)
+	if !rating.Validate() {
+		return nil, fmt.Errorf("invalid rating: must be 'like' or 'unlike'")
+	}
+	return &rating, nil
+}
+
+// ===============================================
+// Content Structures
+// ===============================================
+
+type Content struct {
+	Type               string             `json:"type"`
+	FinishReason       *string            `json:"finish_reason,omitempty"`        // Finish reason
+	Text               *Text              `json:"text,omitempty"`                 // Generic text content
+	InputText          *string            `json:"input_text,omitempty"`           // User input text (simple)
+	OutputText         *OutputText        `json:"output_text,omitempty"`          // AI output text (with annotations)
+	ReasoningContent   *string            `json:"reasoning_content,omitempty"`    // AI reasoning content
+	Refusal            *string            `json:"refusal,omitempty"`              // Model refusal message
+	SummaryText        *string            `json:"summary_text,omitempty"`         // Summary content
+	Thinking           *string            `json:"thinking,omitempty"`             // Internal reasoning (o1 models)
+	Image              *ImageContent      `json:"image,omitempty"`                // Image content
+	File               *FileContent       `json:"file,omitempty"`                 // File content
+	Audio              *AudioContent      `json:"audio,omitempty"`                // Audio content for speech
+	InputAudio         *InputAudio        `json:"input_audio,omitempty"`          // User audio input
+	Code               *CodeContent       `json:"code,omitempty"`                 // Code block with execution metadata
+	ComputerScreenshot *ScreenshotContent `json:"computer_screenshot,omitempty"`  // Screenshot from computer use
+	ComputerAction     *ComputerAction    `json:"computer_action,omitempty"`      // Computer interaction details
+	FunctionCall       *FunctionCall      `json:"function_call,omitempty"`        // Function call content (deprecated, use tool_calls)
+	FunctionCallOut    *FunctionCallOut   `json:"function_call_output,omitempty"` // Function call output
+	ToolCalls          []ToolCall         `json:"tool_calls,omitempty"`           // Tool calls (for assistant messages)
+	ToolCallID         *string            `json:"tool_call_id,omitempty"`         // Tool call ID (for tool responses)
+}
+
+// Text content - matches OpenAI's text content format
+type Text struct {
+	Text        string       `json:"text"` // Changed from "value" to match OpenAI spec
+	Annotations []Annotation `json:"annotations,omitempty"`
+}
+
+type OutputText struct {
+	Text        string       `json:"text"`
+	Annotations []Annotation `json:"annotations"`        // Required for OpenAI compatibility
+	LogProbs    []LogProb    `json:"logprobs,omitempty"` // Token probabilities
+}
+
+// Image content for multimodal support
+type ImageContent struct {
+	URL    string `json:"url,omitempty"`
+	FileID string `json:"file_id,omitempty"`
+	Detail string `json:"detail,omitempty"` // "low", "high", "auto"
+}
+
+// File content for attachments
+type FileContent struct {
+	FileID   string `json:"file_id"`
+	Name     string `json:"name,omitempty"`
+	MimeType string `json:"mime_type,omitempty"`
+	Size     int64  `json:"size,omitempty"`
+}
+
+// Audio content for speech output
+type AudioContent struct {
+	ID         string  `json:"id,omitempty"`
+	Transcript *string `json:"transcript,omitempty"` // Text transcription of audio
+	Data       *string `json:"data,omitempty"`       // Base64 encoded audio data
+	Format     *string `json:"format,omitempty"`     // Audio format: mp3, wav, pcm16, etc.
+}
+
+// InputAudio for user audio input
+type InputAudio struct {
+	Data       string  `json:"data"`                 // Base64 encoded audio data
+	Format     string  `json:"format"`               // Audio format: mp3, wav, pcm16, etc.
+	Transcript *string `json:"transcript,omitempty"` // Optional text transcription
+}
+
+// CodeContent represents code with execution metadata
+type CodeContent struct {
+	Language    string         `json:"language"`               // Programming language
+	Code        string         `json:"code"`                   // Code content
+	ExecutionID *string        `json:"execution_id,omitempty"` // Execution session ID
+	Output      *string        `json:"output,omitempty"`       // Execution output
+	Error       *string        `json:"error,omitempty"`        // Execution error
+	ExitCode    *int           `json:"exit_code,omitempty"`    // Process exit code
+	Metadata    map[string]any `json:"metadata,omitempty"`     // Additional metadata
+}
+
+// ScreenshotContent represents a screenshot from computer use
+type ScreenshotContent struct {
+	ImageURL    string  `json:"image_url"`             // URL to screenshot image
+	ImageData   *string `json:"image_data,omitempty"`  // Base64 encoded image data
+	Width       int     `json:"width"`                 // Image width in pixels
+	Height      int     `json:"height"`                // Image height in pixels
+	Timestamp   int64   `json:"timestamp"`             // Unix timestamp when screenshot was taken
+	Description *string `json:"description,omitempty"` // Optional description
+}
+
+// ComputerAction represents a computer interaction action
+type ComputerAction struct {
+	Action      string         `json:"action"`                 // Action type: "click", "type", "key", "scroll", "move", etc.
+	Coordinates *Coordinates   `json:"coordinates,omitempty"`  // Screen coordinates for mouse actions
+	Text        *string        `json:"text,omitempty"`         // Text for typing actions
+	Key         *string        `json:"key,omitempty"`          // Key for keyboard actions
+	ScrollDelta *ScrollDelta   `json:"scroll_delta,omitempty"` // Scroll amount
+	Metadata    map[string]any `json:"metadata,omitempty"`     // Additional action metadata
+}
+
+// Coordinates represents screen coordinates
+type Coordinates struct {
+	X int `json:"x"`
+	Y int `json:"y"`
+}
+
+// ScrollDelta represents scroll amount
+type ScrollDelta struct {
+	X int `json:"x"`
+	Y int `json:"y"`
+}
+
+// FunctionCall represents a function/tool call
+type FunctionCall struct {
+	ID        string `json:"id,omitempty"`        // Call ID
+	Name      string `json:"name"`                // Function name
+	Arguments string `json:"arguments,omitempty"` // JSON-encoded arguments
+}
+
+// FunctionCallOut represents the output of a function call
+type FunctionCallOut struct {
+	CallID string `json:"call_id"` // ID of the function call this responds to
+	Output string `json:"output"`  // String output from the function
+}
+
+// ToolCall represents a tool invocation (superset of function calls)
+type ToolCall struct {
+	ID       string       `json:"id"`
+	Type     string       `json:"type"` // "function", "file_search", "code_interpreter"
+	Function FunctionCall `json:"function,omitempty"`
+}
+
+type Annotation struct {
+	Type        string   `json:"type"`                   // "file_citation", "url_citation", "file_path", etc.
+	Text        string   `json:"text,omitempty"`         // Display text
+	FileID      string   `json:"file_id,omitempty"`      // For file citations
+	Filename    *string  `json:"filename,omitempty"`     // File name for citations
+	ContainerID *string  `json:"container_id,omitempty"` // Document container reference
+	URL         string   `json:"url,omitempty"`          // For URL citations
+	Quote       *string  `json:"quote,omitempty"`        // Actual quoted text from source
+	PageNumber  *int     `json:"page_number,omitempty"`  // Page reference for documents
+	BoundingBox *BBox    `json:"bounding_box,omitempty"` // Bounding box for image/PDF annotations
+	Confidence  *float64 `json:"confidence,omitempty"`   // Citation confidence score (0.0-1.0)
+	StartIndex  int      `json:"start_index"`            // Start position in text
+	EndIndex    int      `json:"end_index"`              // End position in text
+	Index       int      `json:"index,omitempty"`        // Citation index
+}
+
+// BBox represents a bounding box for spatial annotations
+type BBox struct {
+	X      float64 `json:"x"`
+	Y      float64 `json:"y"`
+	Width  float64 `json:"width"`
+	Height float64 `json:"height"`
+}
+
+// Log probability for AI responses
+type LogProb struct {
+	Token       string       `json:"token"`
+	LogProb     float64      `json:"logprob"`
+	Bytes       []int        `json:"bytes,omitempty"`
+	TopLogProbs []TopLogProb `json:"top_logprobs,omitempty"`
+}
+
+type TopLogProb struct {
+	Token   string  `json:"token"`
+	LogProb float64 `json:"logprob"`
+	Bytes   []int   `json:"bytes,omitempty"`
+}
+
+type IncompleteDetails struct {
+	Reason string  `json:"reason"`          // "max_tokens", "content_filter", "tool_calls", etc.
+	Error  *string `json:"error,omitempty"` // Error message if applicable
+}
+
+// ===============================================
+// Item Repository
+// ===============================================
+
+type ItemFilter struct {
+	ID             *uint
+	PublicID       *string
+	ConversationID *uint
+	Role           *ItemRole
+	ResponseID     *uint
+}
+
+type ItemRepository interface {
+	Create(ctx context.Context, item *Item) error
+	FindByID(ctx context.Context, id uint) (*Item, error)
+	FindByPublicID(ctx context.Context, publicID string) (*Item, error) // Find by OpenAI-compatible string ID
+	FindByConversationID(ctx context.Context, conversationID uint) ([]*Item, error)
+	Search(ctx context.Context, conversationID uint, query string) ([]*Item, error)
+	Delete(ctx context.Context, id uint) error
+	BulkCreate(ctx context.Context, items []*Item) error
+	CountByConversation(ctx context.Context, conversationID uint) (int64, error)
+	ExistsByIDAndConversation(ctx context.Context, itemID uint, conversationID uint) (bool, error)
+	FindByFilter(ctx context.Context, filter ItemFilter, pagination *query.Pagination) ([]*Item, error)
+	Count(ctx context.Context, filter ItemFilter) (int64, error)
+}
+
+// ===============================================
+// Item Factory Functions
+// ===============================================
+
+// NewItem creates a new conversation item with the given parameters (legacy)
+func NewItem(publicID string, itemType ItemType, role ItemRole, content []Content, conversationID uint, responseID *uint) *Item {
+	return &Item{
+		PublicID:       publicID,
+		Object:         "conversation.item",
+		Type:           itemType,
+		Role:           &role,
+		Content:        content,
+		ConversationID: conversationID,
+		ResponseID:     responseID,
+		CreatedAt:      time.Now(),
+	}
+}
+
+// NewMessageItem creates a new message item
+func NewMessageItem(publicID string, role ItemRole, content []Content, conversationID uint) *MessageItem {
+	return &MessageItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeMessage,
+			CreatedAt:      time.Now(),
+		},
+		Role:    role,
+		Content: content,
+	}
+}
+
+// NewFunctionCallItem creates a new function call item
+func NewFunctionCallItem(publicID string, callID string, name string, arguments string, conversationID uint) *FunctionCallItem {
+	return &FunctionCallItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeFunctionCall,
+			Status:         ToItemStatusPtr(ItemStatusCalling),
+			CreatedAt:      time.Now(),
+		},
+		CallID:    callID,
+		Name:      name,
+		Arguments: arguments,
+	}
+}
+
+// NewFunctionCallOutputItem creates a new function call output item
+func NewFunctionCallOutputItem(publicID string, callID string, output string, conversationID uint) *FunctionCallOutputItem {
+	return &FunctionCallOutputItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeFunctionCallOut,
+			Status:         ToItemStatusPtr(ItemStatusCompleted),
+			CreatedAt:      time.Now(),
+		},
+		CallID: callID,
+		Output: output,
+	}
+}
+
+// NewReasoningItem creates a new reasoning item
+func NewReasoningItem(publicID string, summary string, thinking []Content, conversationID uint) *ReasoningItem {
+	return &ReasoningItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeReasoning,
+			CreatedAt:      time.Now(),
+		},
+		Summary:  summary,
+		Thinking: thinking,
+	}
+}
+
+// NewFileSearchItem creates a new file search item
+func NewFileSearchItem(publicID string, query string, fileIDs []string, conversationID uint) *FileSearchItem {
+	return &FileSearchItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeFileSearch,
+			Status:         ToItemStatusPtr(ItemStatusSearching),
+			CreatedAt:      time.Now(),
+		},
+		Query:   query,
+		FileIDs: fileIDs,
+	}
+}
+
+// NewWebSearchItem creates a new web search item
+func NewWebSearchItem(publicID string, query string, conversationID uint) *WebSearchItem {
+	return &WebSearchItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeWebSearch,
+			Status:         ToItemStatusPtr(ItemStatusSearching),
+			CreatedAt:      time.Now(),
+		},
+		Query: query,
+	}
+}
+
+// NewCodeInterpreterItem creates a new code interpreter item
+func NewCodeInterpreterItem(publicID string, language string, code string, conversationID uint) *CodeInterpreterItem {
+	return &CodeInterpreterItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeCodeInterpreter,
+			Status:         ToItemStatusPtr(ItemStatusInProgress),
+			CreatedAt:      time.Now(),
+		},
+		Language: language,
+		Code:     code,
+	}
+}
+
+// NewComputerUseItem creates a new computer use item
+func NewComputerUseItem(publicID string, action ComputerAction, conversationID uint) *ComputerUseItem {
+	return &ComputerUseItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeComputerUse,
+			Status:         ToItemStatusPtr(ItemStatusInProgress),
+			CreatedAt:      time.Now(),
+		},
+		Action: action,
+	}
+}
+
+// NewCustomToolCallItem creates a new custom tool call item
+func NewCustomToolCallItem(publicID string, toolID string, toolName string, input map[string]any, conversationID uint) *CustomToolCallItem {
+	return &CustomToolCallItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeCustomToolCall,
+			Status:         ToItemStatusPtr(ItemStatusCalling),
+			CreatedAt:      time.Now(),
+		},
+		ToolID:   toolID,
+		ToolName: toolName,
+		Input:    input,
+	}
+}
+
+// NewMCPItem creates a new Model Context Protocol item
+func NewMCPItem(publicID string, protocol string, action string, data map[string]any, conversationID uint) *MCPItem {
+	return &MCPItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeMCPItem,
+			CreatedAt:      time.Now(),
+		},
+		Protocol: protocol,
+		Action:   action,
+		Data:     data,
+	}
+}
+
+// NewImageGenerationItem creates a new image generation item
+func NewImageGenerationItem(publicID string, prompt string, conversationID uint) *ImageGenerationItem {
+	return &ImageGenerationItem{
+		BaseItem: BaseItem{
+			ConversationID: conversationID,
+			PublicID:       publicID,
+			Object:         "conversation.item",
+			Type:           ItemTypeImageGeneration,
+			Status:         ToItemStatusPtr(ItemStatusGenerating),
+			CreatedAt:      time.Now(),
+		},
+		Prompt: prompt,
+	}
+}
+
+// ===============================================
+// Content Factory Functions
+// ===============================================
+
+// NewTextContent creates a new text content item
+func NewTextContent(text string) Content {
+	return Content{
+		Type: "text",
+		Text: &Text{
+			Text: text,
+		},
+	}
+}
+
+// NewInputTextContent creates a new input text content (for user messages)
+func NewInputTextContent(text string) Content {
+	return Content{
+		Type:      "input_text",
+		InputText: &text,
+	}
+}
+
+// NewOutputTextContent creates a new output text content with annotations
+func NewOutputTextContent(text string, annotations []Annotation) Content {
+	return Content{
+		Type: "output_text",
+		OutputText: &OutputText{
+			Text:        text,
+			Annotations: annotations,
+		},
+	}
+}
+
+// NewImageContent creates a new image content
+func NewImageContent(url, fileID, detail string) Content {
+	return Content{
+		Type: "image",
+		Image: &ImageContent{
+			URL:    url,
+			FileID: fileID,
+			Detail: detail,
+		},
+	}
+}
+
+// NewAudioContent creates a new audio content
+func NewAudioContent(id string, transcript *string, format *string) Content {
+	return Content{
+		Type: "audio",
+		Audio: &AudioContent{
+			ID:         id,
+			Transcript: transcript,
+			Format:     format,
+		},
+	}
+}
+
+// NewRefusalContent creates a refusal content (when model refuses to answer)
+func NewRefusalContent(refusalMessage string) Content {
+	return Content{
+		Type:    "refusal",
+		Refusal: &refusalMessage,
+	}
+}
+
+// NewThinkingContent creates thinking content (for o1 reasoning models)
+func NewThinkingContent(thinking string) Content {
+	return Content{
+		Type:     "thinking",
+		Thinking: &thinking,
+	}
+}
+
+// NewCodeContent creates code content with execution metadata
+func NewCodeContent(language string, code string, output *string, exitCode *int) Content {
+	return Content{
+		Type: "code",
+		Code: &CodeContent{
+			Language: language,
+			Code:     code,
+			Output:   output,
+			ExitCode: exitCode,
+		},
+	}
+}
+
+// NewComputerScreenshotContent creates a computer screenshot content
+func NewComputerScreenshotContent(imageURL string, width int, height int) Content {
+	return Content{
+		Type: "computer_screenshot",
+		ComputerScreenshot: &ScreenshotContent{
+			ImageURL:  imageURL,
+			Width:     width,
+			Height:    height,
+			Timestamp: time.Now().Unix(),
+		},
+	}
+}
+
+// NewComputerActionContent creates a computer action content
+func NewComputerActionContent(action string, coords *Coordinates, text *string) Content {
+	compAction := ComputerAction{
+		Action:      action,
+		Coordinates: coords,
+		Text:        text,
+	}
+	return Content{
+		Type:           "computer_action",
+		ComputerAction: &compAction,
+	}
+}
diff --git a/services/llm-api/internal/domain/conversation/item_validation.go b/services/llm-api/internal/domain/conversation/item_validation.go
new file mode 100644
index 00000000..f90b66c8
--- /dev/null
+++ b/services/llm-api/internal/domain/conversation/item_validation.go
@@ -0,0 +1,633 @@
+package conversation
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"unicode/utf8"
+
+	"jan-server/services/llm-api/internal/utils/idgen"
+)
+
+// ===============================================
+// Item Validation
+// ===============================================
+
+// ItemValidationConfig holds item-level validation rules
+type ItemValidationConfig struct {
+	MaxContentBlocks     int
+	MaxTextContentLength int
+	MaxCodeLength        int
+	MaxReasoningLength   int
+	MaxThinkingLength    int
+	MaxAudioSize         int64
+	MaxImageSize         int64
+	MaxFileSize          int64
+	MaxToolCalls         int
+	MaxAnnotations       int
+	MaxItemsPerBatch     int
+}
+
+// DefaultItemValidationConfig returns OpenAI-aligned item validation rules
+func DefaultItemValidationConfig() *ItemValidationConfig {
+	return &ItemValidationConfig{
+		MaxContentBlocks:     100,               // OpenAI supports multiple content blocks
+		MaxTextContentLength: 100000,            // ~100K chars for text content
+		MaxCodeLength:        50000,             // Code blocks up to 50K chars
+		MaxReasoningLength:   100000,            // Reasoning content up to 100K chars
+		MaxThinkingLength:    50000,             // Thinking content up to 50K chars
+		MaxAudioSize:         25 * 1024 * 1024,  // 25MB for audio
+		MaxImageSize:         20 * 1024 * 1024,  // 20MB for images
+		MaxFileSize:          512 * 1024 * 1024, // 512MB for files
+		MaxToolCalls:         16,                // Max tool calls per message
+		MaxAnnotations:       100,               // Max annotations per content block
+		MaxItemsPerBatch:     100,               // Max items per batch operation
+	}
+}
+
+// ItemValidator handles item-level validation
+type ItemValidator struct {
+	config     *ItemValidationConfig
+	itemIDRx   *regexp.Regexp
+	urlPattern *regexp.Regexp
+}
+
+// NewItemValidator creates a validator for items
+func NewItemValidator(config *ItemValidationConfig) *ItemValidator {
+	if config == nil {
+		config = DefaultItemValidationConfig()
+	}
+
+	return &ItemValidator{
+		config:     config,
+		itemIDRx:   regexp.MustCompile(`^msg_[a-zA-Z0-9]{16,}$`),
+		urlPattern: regexp.MustCompile(`^https?://|^data:|^file://`),
+	}
+}
+
+// ValidateItem performs full item validation
+func (v *ItemValidator) ValidateItem(item Item) error {
+	// Validate PublicID
+	if item.PublicID != "" {
+		if err := v.ValidateItemID(item.PublicID); err != nil {
+			return fmt.Errorf("invalid item ID: %w", err)
+		}
+	}
+
+	// Validate type
+	if err := v.ValidateItemType(item.Type); err != nil {
+		return fmt.Errorf("invalid item type: %w", err)
+	}
+
+	// Validate role if present
+	if item.Role != nil {
+		if err := v.ValidateItemRole(*item.Role); err != nil {
+			return fmt.Errorf("invalid item role: %w", err)
+		}
+	}
+
+	// Validate status if present
+	if item.Status != nil {
+		if err := v.ValidateItemStatus(*item.Status); err != nil {
+			return fmt.Errorf("invalid item status: %w", err)
+		}
+	}
+
+	// Validate content array
+	if len(item.Content) > 0 {
+		if err := v.ValidateContentArray(item.Content); err != nil {
+			return fmt.Errorf("invalid content: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// ValidateBaseItem performs validation on BaseItem
+func (v *ItemValidator) ValidateBaseItem(item BaseItem) error {
+	// Validate PublicID
+	if item.PublicID != "" {
+		if err := v.ValidateItemID(item.PublicID); err != nil {
+			return fmt.Errorf("invalid item ID: %w", err)
+		}
+	}
+
+	// Validate type
+	if err := v.ValidateItemType(item.Type); err != nil {
+		return fmt.Errorf("invalid item type: %w", err)
+	}
+
+	// Validate status if present
+	if item.Status != nil {
+		if err := v.ValidateItemStatus(*item.Status); err != nil {
+			return fmt.Errorf("invalid item status: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// ValidateMessageItem validates a MessageItem
+func (v *ItemValidator) ValidateMessageItem(item *MessageItem) error {
+	if item == nil {
+		return fmt.Errorf("message item cannot be nil")
+	}
+
+	// Validate base item
+	if err := v.ValidateBaseItem(item.BaseItem); err != nil {
+		return err
+	}
+
+	// Validate role
+	if err := v.ValidateItemRole(item.Role); err != nil {
+		return fmt.Errorf("invalid item role: %w", err)
+	}
+
+	// Message must have content
+	if len(item.Content) == 0 {
+		return fmt.Errorf("message item must have at least one content block")
+	}
+
+	// Validate content array
+	if err := v.ValidateContentArray(item.Content); err != nil {
+		return fmt.Errorf("invalid content: %w", err)
+	}
+
+	return nil
+}
+
+// ValidateItemID validates item ID format
+func (v *ItemValidator) ValidateItemID(id string) error {
+	if id == "" {
+		return fmt.Errorf("item ID cannot be empty")
+	}
+
+	// Must start with "msg_" prefix
+	if !strings.HasPrefix(id, "msg_") {
+		return fmt.Errorf("item ID must start with 'msg_' prefix")
+	}
+
+	// Use domain-specific ID validation
+	if !idgen.ValidateIDFormat(id, "msg") {
+		return fmt.Errorf("invalid item ID format")
+	}
+
+	return nil
+}
+
+// ValidateItemType validates item type
+func (v *ItemValidator) ValidateItemType(itemType ItemType) error {
+	if !ValidateItemType(string(itemType)) {
+		return fmt.Errorf("invalid item type: %s", itemType)
+	}
+	return nil
+}
+
+// ValidateItemRole validates item role
+func (v *ItemValidator) ValidateItemRole(role ItemRole) error {
+	if !ValidateItemRole(string(role)) {
+		return fmt.Errorf("invalid item role: %s", role)
+	}
+	return nil
+}
+
+// ValidateItemStatus validates item status
+func (v *ItemValidator) ValidateItemStatus(status ItemStatus) error {
+	if !ValidateItemStatus(string(status)) {
+		return fmt.Errorf("invalid item status: %s", status)
+	}
+	return nil
+}
+
+// ValidateContentArray validates an array of content blocks
+func (v *ItemValidator) ValidateContentArray(content []Content) error {
+	if len(content) == 0 {
+		return nil // Empty content is allowed for some item types
+	}
+
+	if len(content) > v.config.MaxContentBlocks {
+		return fmt.Errorf("content array cannot exceed %d blocks (got %d)", v.config.MaxContentBlocks, len(content))
+	}
+
+	for i, c := range content {
+		if err := v.ValidateContent(c); err != nil {
+			return fmt.Errorf("invalid content block at index %d: %w", i, err)
+		}
+	}
+
+	return nil
+}
+
+// ValidateContent validates a single content block
+func (v *ItemValidator) ValidateContent(content Content) error {
+	if content.Type == "" {
+		return fmt.Errorf("content type cannot be empty")
+	}
+
+	// Validate based on content type
+	switch content.Type {
+	case "text":
+		if content.Text != nil {
+			return v.validateTextContent(content.Text)
+		}
+		return fmt.Errorf("text content type requires text field")
+
+	case "input_text":
+		if content.InputText != nil {
+			return v.validateSimpleText(*content.InputText, "input_text")
+		}
+		return fmt.Errorf("input_text content type requires input_text field")
+
+	case "output_text":
+		if content.OutputText != nil {
+			return v.validateOutputText(content.OutputText)
+		}
+		return fmt.Errorf("output_text content type requires output_text field")
+
+	case "image":
+		if content.Image != nil {
+			return v.validateImageContent(content.Image)
+		}
+		return fmt.Errorf("image content type requires image field")
+
+	case "file":
+		if content.File != nil {
+			return v.validateFileContent(content.File)
+		}
+		return fmt.Errorf("file content type requires file field")
+
+	case "audio":
+		if content.Audio != nil {
+			return v.validateAudioContent(content.Audio)
+		}
+		return fmt.Errorf("audio content type requires audio field")
+
+	case "input_audio":
+		if content.InputAudio != nil {
+			return v.validateInputAudioContent(content.InputAudio)
+		}
+		return fmt.Errorf("input_audio content type requires input_audio field")
+
+	case "refusal":
+		if content.Refusal != nil {
+			return v.validateSimpleText(*content.Refusal, "refusal")
+		}
+		return fmt.Errorf("refusal content type requires refusal field")
+
+	case "thinking":
+		if content.Thinking != nil {
+			return v.validateThinkingContent(*content.Thinking)
+		}
+		return fmt.Errorf("thinking content type requires thinking field")
+
+	case "code":
+		if content.Code != nil {
+			return v.validateCodeContent(content.Code)
+		}
+		return fmt.Errorf("code content type requires code field")
+
+	case "computer_screenshot":
+		if content.ComputerScreenshot != nil {
+			return v.validateScreenshotContent(content.ComputerScreenshot)
+		}
+		return fmt.Errorf("computer_screenshot content type requires computer_screenshot field")
+
+	case "computer_action":
+		if content.ComputerAction != nil {
+			return v.validateComputerAction(content.ComputerAction)
+		}
+		return fmt.Errorf("computer_action content type requires computer_action field")
+
+	default:
+		return fmt.Errorf("unsupported content type: %s", content.Type)
+	}
+}
+
+// ValidateBatchSize ensures batch operations are within limits
+func (v *ItemValidator) ValidateBatchSize(itemCount int) error {
+	if itemCount == 0 {
+		return fmt.Errorf("batch cannot be empty")
+	}
+
+	if itemCount > v.config.MaxItemsPerBatch {
+		return fmt.Errorf("cannot process more than %d items in a single batch (got %d)", v.config.MaxItemsPerBatch, itemCount)
+	}
+
+	return nil
+}
+
+// ===============================================
+// Private Validation Methods
+// ===============================================
+
+func (v *ItemValidator) validateTextContent(text *Text) error {
+	if text == nil {
+		return fmt.Errorf("text content cannot be nil")
+	}
+
+	if err := v.validateSimpleText(text.Text, "text"); err != nil {
+		return err
+	}
+
+	// Validate annotations
+	if len(text.Annotations) > v.config.MaxAnnotations {
+		return fmt.Errorf("text cannot have more than %d annotations (got %d)", v.config.MaxAnnotations, len(text.Annotations))
+	}
+
+	for i, ann := range text.Annotations {
+		if err := v.validateAnnotation(ann); err != nil {
+			return fmt.Errorf("invalid annotation at index %d: %w", i, err)
+		}
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateOutputText(output *OutputText) error {
+	if output == nil {
+		return fmt.Errorf("output text content cannot be nil")
+	}
+
+	if err := v.validateSimpleText(output.Text, "output_text"); err != nil {
+		return err
+	}
+
+	// Validate annotations (required field for OpenAI)
+	if len(output.Annotations) > v.config.MaxAnnotations {
+		return fmt.Errorf("output text cannot have more than %d annotations (got %d)", v.config.MaxAnnotations, len(output.Annotations))
+	}
+
+	for i, ann := range output.Annotations {
+		if err := v.validateAnnotation(ann); err != nil {
+			return fmt.Errorf("invalid annotation at index %d: %w", i, err)
+		}
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateSimpleText(text, fieldName string) error {
+	if text == "" {
+		return fmt.Errorf("%s cannot be empty", fieldName)
+	}
+
+	length := utf8.RuneCountInString(text)
+	if length > v.config.MaxTextContentLength {
+		return fmt.Errorf("%s cannot exceed %d characters (got %d)", fieldName, v.config.MaxTextContentLength, length)
+	}
+
+	// Check for null bytes (security)
+	if strings.Contains(text, "\x00") {
+		return fmt.Errorf("%s cannot contain null bytes", fieldName)
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateThinkingContent(thinking string) error {
+	if thinking == "" {
+		return fmt.Errorf("thinking content cannot be empty")
+	}
+
+	length := utf8.RuneCountInString(thinking)
+	if length > v.config.MaxThinkingLength {
+		return fmt.Errorf("thinking content cannot exceed %d characters (got %d)", v.config.MaxThinkingLength, length)
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateImageContent(image *ImageContent) error {
+	if image == nil {
+		return fmt.Errorf("image content cannot be nil")
+	}
+
+	// Must have either URL or FileID
+	if image.URL == "" && image.FileID == "" {
+		return fmt.Errorf("image content must have either url or file_id")
+	}
+
+	// Cannot have both
+	if image.URL != "" && image.FileID != "" {
+		return fmt.Errorf("image content cannot have both url and file_id")
+	}
+
+	// Validate URL format if present
+	if image.URL != "" {
+		if !v.urlPattern.MatchString(image.URL) {
+			return fmt.Errorf("invalid image URL format (must start with http://, https://, data:, or file://)")
+		}
+	}
+
+	// Validate detail level if present
+	if image.Detail != "" {
+		switch image.Detail {
+		case "low", "high", "auto":
+			// Valid
+		default:
+			return fmt.Errorf("invalid image detail level: %s (must be low, high, or auto)", image.Detail)
+		}
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateFileContent(file *FileContent) error {
+	if file == nil {
+		return fmt.Errorf("file content cannot be nil")
+	}
+
+	if file.FileID == "" {
+		return fmt.Errorf("file content must have a file_id")
+	}
+
+	if file.Size < 0 {
+		return fmt.Errorf("file size cannot be negative")
+	}
+
+	if file.Size > v.config.MaxFileSize {
+		return fmt.Errorf("file size cannot exceed %d bytes (got %d)", v.config.MaxFileSize, file.Size)
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateAudioContent(audio *AudioContent) error {
+	if audio == nil {
+		return fmt.Errorf("audio content cannot be nil")
+	}
+
+	// Must have ID
+	if audio.ID == "" {
+		return fmt.Errorf("audio content must have an id")
+	}
+
+	// Validate format if present
+	if audio.Format != nil {
+		validFormats := []string{"mp3", "wav", "opus", "flac", "pcm16"}
+		isValid := false
+		for _, fmt := range validFormats {
+			if *audio.Format == fmt {
+				isValid = true
+				break
+			}
+		}
+		if !isValid {
+			return fmt.Errorf("invalid audio format: %s", *audio.Format)
+		}
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateInputAudioContent(audio *InputAudio) error {
+	if audio == nil {
+		return fmt.Errorf("input audio content cannot be nil")
+	}
+
+	if audio.Data == "" {
+		return fmt.Errorf("input audio must have data")
+	}
+
+	if audio.Format == "" {
+		return fmt.Errorf("input audio must have format")
+	}
+
+	// Validate format
+	validFormats := []string{"mp3", "wav", "opus", "flac", "pcm16"}
+	isValid := false
+	for _, fmt := range validFormats {
+		if audio.Format == fmt {
+			isValid = true
+			break
+		}
+	}
+	if !isValid {
+		return fmt.Errorf("invalid audio format: %s", audio.Format)
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateCodeContent(code *CodeContent) error {
+	if code == nil {
+		return fmt.Errorf("code content cannot be nil")
+	}
+
+	if code.Language == "" {
+		return fmt.Errorf("code content must have a language")
+	}
+
+	if code.Code == "" {
+		return fmt.Errorf("code content must have code")
+	}
+
+	length := utf8.RuneCountInString(code.Code)
+	if length > v.config.MaxCodeLength {
+		return fmt.Errorf("code content cannot exceed %d characters (got %d)", v.config.MaxCodeLength, length)
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateScreenshotContent(screenshot *ScreenshotContent) error {
+	if screenshot == nil {
+		return fmt.Errorf("screenshot content cannot be nil")
+	}
+
+	// Must have either ImageURL or ImageData
+	if screenshot.ImageURL == "" && screenshot.ImageData == nil {
+		return fmt.Errorf("screenshot must have either image_url or image_data")
+	}
+
+	if screenshot.Width <= 0 {
+		return fmt.Errorf("screenshot width must be positive")
+	}
+
+	if screenshot.Height <= 0 {
+		return fmt.Errorf("screenshot height must be positive")
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateComputerAction(action *ComputerAction) error {
+	if action == nil {
+		return fmt.Errorf("computer action cannot be nil")
+	}
+
+	if action.Action == "" {
+		return fmt.Errorf("computer action must have an action type")
+	}
+
+	// Validate action type
+	validActions := []string{"click", "type", "key", "scroll", "move", "drag", "screenshot"}
+	isValid := false
+	for _, a := range validActions {
+		if action.Action == a {
+			isValid = true
+			break
+		}
+	}
+	if !isValid {
+		return fmt.Errorf("invalid action type: %s", action.Action)
+	}
+
+	// Validate required fields based on action type
+	switch action.Action {
+	case "click", "move", "drag":
+		if action.Coordinates == nil {
+			return fmt.Errorf("action '%s' requires coordinates", action.Action)
+		}
+	case "type":
+		if action.Text == nil || *action.Text == "" {
+			return fmt.Errorf("action 'type' requires text")
+		}
+	case "key":
+		if action.Key == nil || *action.Key == "" {
+			return fmt.Errorf("action 'key' requires key")
+		}
+	case "scroll":
+		if action.ScrollDelta == nil {
+			return fmt.Errorf("action 'scroll' requires scroll_delta")
+		}
+	}
+
+	return nil
+}
+
+func (v *ItemValidator) validateAnnotation(annotation Annotation) error {
+	if annotation.Type == "" {
+		return fmt.Errorf("annotation must have a type")
+	}
+
+	// Validate annotation type
+	validTypes := []string{"file_citation", "url_citation", "file_path", "quote", "highlight"}
+	isValid := false
+	for _, t := range validTypes {
+		if annotation.Type == t {
+			isValid = true
+			break
+		}
+	}
+	if !isValid {
+		return fmt.Errorf("invalid annotation type: %s", annotation.Type)
+	}
+
+	// Validate required fields based on type
+	switch annotation.Type {
+	case "file_citation":
+		if annotation.FileID == "" {
+			return fmt.Errorf("file_citation annotation requires file_id")
+		}
+	case "url_citation":
+		if annotation.URL == "" {
+			return fmt.Errorf("url_citation annotation requires url")
+		}
+		if !v.urlPattern.MatchString(annotation.URL) {
+			return fmt.Errorf("invalid url format in annotation")
+		}
+	}
+
+	return nil
+}
diff --git a/services/llm-api/internal/domain/model.go b/services/llm-api/internal/domain/model.go
new file mode 100644
index 00000000..841a9012
--- /dev/null
+++ b/services/llm-api/internal/domain/model.go
@@ -0,0 +1,15 @@
+package domain
+
+import "time"
+
+// Model captures public metadata for a provider-backed model.
+type Model struct {
+	ID           string    `json:"id"`
+	Provider     string    `json:"provider"`
+	DisplayName  string    `json:"display_name"`
+	Family       string    `json:"family"`
+	Capabilities []string  `json:"capabilities"`
+	Active       bool      `json:"active"`
+	CreatedAt    time.Time `json:"created_at"`
+	UpdatedAt    time.Time `json:"updated_at"`
+}
diff --git a/services/llm-api/internal/domain/model/model_catalog.go b/services/llm-api/internal/domain/model/model_catalog.go
new file mode 100644
index 00000000..ad92ddcd
--- /dev/null
+++ b/services/llm-api/internal/domain/model/model_catalog.go
@@ -0,0 +1,71 @@
+package model
+
+import (
+	"context"
+	"time"
+
+	decimal "github.com/shopspring/decimal"
+	"jan-server/services/llm-api/internal/domain/query"
+)
+
+type SupportedParameters struct {
+	Names   []string                    `json:"names"`   // e.g., ["include_reasoning","max_tokens",...]
+	Default map[string]*decimal.Decimal `json:"default"` // temperature/top_p/frequency_penalty, null allowed
+}
+
+// Architecture metadata.
+type Architecture struct {
+	Modality         string   `json:"modality"` // "text+image->text"
+	InputModalities  []string `json:"input_modalities"`
+	OutputModalities []string `json:"output_modalities"`
+	Tokenizer        string   `json:"tokenizer"`     // "GPT" / "SentencePiece" / etc.
+	InstructType     *string  `json:"instruct_type"` // nullable
+}
+
+type ModelCatalogStatus string
+
+const (
+	ModelCatalogStatusInit    ModelCatalogStatus = "init"    // default status when creating entry
+	ModelCatalogStatusFilled  ModelCatalogStatus = "filled"  // may update from Provider like OpenRouter
+	ModelCatalogStatusUpdated ModelCatalogStatus = "updated" // manually updated by admin (cannot be auto-updated anymore
+)
+
+type ModelCatalog struct {
+	ID                  uint                `json:"id"`
+	PublicID            string              `json:"public_id"`
+	SupportedParameters SupportedParameters `json:"supported_parameters"`
+	Architecture        Architecture        `json:"architecture"`
+	Tags                []string            `json:"tags,omitempty"`
+	Notes               *string             `json:"notes,omitempty"`
+	IsModerated         *bool               `json:"is_moderated,omitempty"`
+	Active              *bool               `json:"active,omitempty"`
+	Extras              map[string]any      `json:"extras,omitempty"`
+	Status              ModelCatalogStatus  `json:"status"`
+	LastSyncedAt        *time.Time
+	CreatedAt           time.Time `json:"created_at"`
+	UpdatedAt           time.Time `json:"updated_at"`
+}
+
+type ModelCatalogFilter struct {
+	IDs              *[]uint
+	PublicID         *string
+	IsModerated      *bool
+	Active           *bool
+	Status           *ModelCatalogStatus
+	LastSyncedAfter  *time.Time
+	LastSyncedBefore *time.Time
+}
+
+type ModelCatalogRepository interface {
+	Create(ctx context.Context, catalog *ModelCatalog) error
+	Update(ctx context.Context, catalog *ModelCatalog) error
+	DeleteByID(ctx context.Context, id uint) error
+	FindByID(ctx context.Context, id uint) (*ModelCatalog, error)
+	FindByPublicID(ctx context.Context, publicID string) (*ModelCatalog, error)
+	FindByFilter(ctx context.Context, filter ModelCatalogFilter, p *query.Pagination) ([]*ModelCatalog, error)
+	Count(ctx context.Context, filter ModelCatalogFilter) (int64, error)
+	BatchUpdateActive(ctx context.Context, filter ModelCatalogFilter, active bool) (int64, error)
+	// Batch methods for optimization
+	FindByIDs(ctx context.Context, ids []uint) ([]*ModelCatalog, error)
+	FindByPublicIDs(ctx context.Context, publicIDs []string) ([]*ModelCatalog, error)
+}
diff --git a/services/llm-api/internal/domain/model/model_catalog_service.go b/services/llm-api/internal/domain/model/model_catalog_service.go
new file mode 100644
index 00000000..30114369
--- /dev/null
+++ b/services/llm-api/internal/domain/model/model_catalog_service.go
@@ -0,0 +1,291 @@
+package model
+
+import (
+	"context"
+	"time"
+
+	decimal "github.com/shopspring/decimal"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/utils/httpclients/chat"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+	"jan-server/services/llm-api/internal/utils/ptr"
+)
+
+type ModelCatalogService struct {
+	modelCatalogRepo ModelCatalogRepository
+}
+
+func NewModelCatalogService(modelCatalogRepo ModelCatalogRepository) *ModelCatalogService {
+	return &ModelCatalogService{
+		modelCatalogRepo: modelCatalogRepo,
+	}
+}
+
+func (s *ModelCatalogService) UpsertCatalog(ctx context.Context, provider *Provider, model chat.Model) (*ModelCatalog, bool, error) {
+	kind := ProviderCustom
+	if provider != nil {
+		kind = provider.Kind
+	}
+	publicID := catalogPublicID(kind, model)
+	if publicID == "" {
+		return nil, false, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "model identifier missing", nil, "3934616c-8447-4ba8-809e-9b3c3924c32d")
+	}
+	existing, err := s.modelCatalogRepo.FindByPublicID(ctx, publicID)
+	if err != nil {
+		// NotFound is expected for new catalogs - only treat other errors as fatal
+		if !platformerrors.IsErrorType(err, platformerrors.ErrorTypeNotFound) {
+			return nil, false, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to find existing model catalog")
+		}
+		// Not found - proceed to create new catalog
+		existing = nil
+	}
+
+	catalog := buildModelCatalogFromModel(provider, model)
+	catalog.PublicID = publicID
+	now := time.Now().UTC()
+	catalog.LastSyncedAt = &now
+
+	if existing != nil {
+		catalog.ID = existing.ID
+		catalog.CreatedAt = existing.CreatedAt
+		catalog.Active = existing.Active // Preserve Active status - don't override manually disabled catalogs
+		if existing.Status == ModelCatalogStatusFilled {
+			return existing, false, nil
+		}
+		if catalog.Status == ModelCatalogStatusFilled {
+			if err := s.modelCatalogRepo.Update(ctx, catalog); err != nil {
+				return nil, false, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to update model catalog")
+			}
+			return catalog, false, nil
+		}
+		return existing, false, nil
+	}
+
+	if err := s.modelCatalogRepo.Create(ctx, catalog); err != nil {
+		return nil, false, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to create model catalog")
+	}
+	return catalog, true, nil
+}
+
+func (s *ModelCatalogService) FindByID(ctx context.Context, id uint) (*ModelCatalog, error) {
+	if id == 0 {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "model catalog ID is required", nil, "bfa98c70-387e-445c-a541-d1d07f722f67")
+	}
+
+	catalog, err := s.modelCatalogRepo.FindByID(ctx, id)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to find model catalog by ID")
+	}
+
+	return catalog, nil
+}
+
+func (s *ModelCatalogService) FindByPublicID(ctx context.Context, publicID string) (*ModelCatalog, error) {
+	if publicID == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "model catalog public ID is required", nil, "c7539cbf-157d-49c3-8b04-adc572a496f7")
+	}
+
+	catalog, err := s.modelCatalogRepo.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to find model catalog by public ID")
+	}
+
+	return catalog, nil
+}
+
+func (s *ModelCatalogService) FindByPublicIDs(ctx context.Context, publicIDs []string) (map[string]*ModelCatalog, error) {
+	if len(publicIDs) == 0 {
+		return make(map[string]*ModelCatalog), nil
+	}
+
+	catalogs, err := s.modelCatalogRepo.FindByPublicIDs(ctx, publicIDs)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to find model catalogs by public IDs")
+	}
+
+	result := make(map[string]*ModelCatalog, len(catalogs))
+	for _, catalog := range catalogs {
+		result[catalog.PublicID] = catalog
+	}
+
+	return result, nil
+}
+
+func (s *ModelCatalogService) FindByIDs(ctx context.Context, ids []uint) (map[uint]*ModelCatalog, error) {
+	if len(ids) == 0 {
+		return make(map[uint]*ModelCatalog), nil
+	}
+
+	catalogs, err := s.modelCatalogRepo.FindByIDs(ctx, ids)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to find model catalogs by IDs")
+	}
+
+	result := make(map[uint]*ModelCatalog, len(catalogs))
+	for _, catalog := range catalogs {
+		result[catalog.ID] = catalog
+	}
+
+	return result, nil
+}
+
+func (s *ModelCatalogService) Update(ctx context.Context, catalog *ModelCatalog) (*ModelCatalog, error) {
+	if catalog == nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "catalog cannot be nil", nil, "d2305a92-e294-4429-838f-963438264abe")
+	}
+
+	if err := s.modelCatalogRepo.Update(ctx, catalog); err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to update model catalog")
+	}
+
+	return catalog, nil
+}
+
+func (s *ModelCatalogService) FindByFilter(ctx context.Context, filter ModelCatalogFilter, pagination *query.Pagination) ([]*ModelCatalog, error) {
+	catalogs, err := s.modelCatalogRepo.FindByFilter(ctx, filter, pagination)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to find model catalogs")
+	}
+	return catalogs, nil
+}
+
+func (s *ModelCatalogService) Count(ctx context.Context, filter ModelCatalogFilter) (int64, error) {
+	count, err := s.modelCatalogRepo.Count(ctx, filter)
+	if err != nil {
+		return 0, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to count model catalogs")
+	}
+	return count, nil
+}
+
+func (s *ModelCatalogService) BatchUpdateActive(ctx context.Context, filter ModelCatalogFilter, active bool) (int64, error) {
+	rowsAffected, err := s.modelCatalogRepo.BatchUpdateActive(ctx, filter, active)
+	if err != nil {
+		return 0, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to batch update active status")
+	}
+
+	affectedCatalogs, err := s.modelCatalogRepo.FindByFilter(ctx, filter, nil)
+	if err != nil {
+		return rowsAffected, nil
+	}
+
+	if len(affectedCatalogs) > 0 {
+		modelKeys := make([]string, 0, len(affectedCatalogs))
+		for _, catalog := range affectedCatalogs {
+			modelKeys = append(modelKeys, catalog.PublicID)
+		}
+	}
+
+	return rowsAffected, nil
+}
+
+func catalogPublicID(kind ProviderKind, model chat.Model) string {
+	// Use CanonicalSlug if available, otherwise use model ID
+	rawModelKey := model.CanonicalSlug
+	if rawModelKey == "" {
+		rawModelKey = model.ID
+	}
+	// Return the canonical vendor/model format
+	return NormalizeModelKey(kind, rawModelKey)
+}
+
+func buildModelCatalogFromModel(provider *Provider, model chat.Model) *ModelCatalog {
+	kind := ProviderCustom
+	if provider != nil {
+		kind = provider.Kind
+	}
+
+	status := ModelCatalogStatusInit
+	if kind == ProviderOpenRouter {
+		status = ModelCatalogStatusFilled
+	}
+
+	var notes *string
+	if desc, ok := getString(model.Raw, "description"); ok && desc != "" {
+		notes = ptr.ToString(desc)
+	}
+
+	defaultParameterNames := []string{
+		"temperature",
+		"max_tokens",
+		"top_p",
+		"frequency_penalty",
+		"presence_penalty",
+		"stop",
+		"stream",
+		"n",
+		"response_format",
+	}
+
+	toolSupport := provider != nil && provider.SupportsTools()
+	if toolSupport {
+		defaultParameterNames = append(defaultParameterNames, "tools", "tool_choice")
+	}
+
+	supportedNames := extractStringSlice(model.Raw["supported_parameters"])
+	if !toolSupport && len(supportedNames) > 0 {
+		filtered := make([]string, 0, len(supportedNames))
+		for _, name := range supportedNames {
+			if name == "tools" || name == "tool_choice" {
+				continue
+			}
+			filtered = append(filtered, name)
+		}
+		supportedNames = filtered
+	}
+	nameSet := make(map[string]struct{}, len(supportedNames)+len(defaultParameterNames))
+	for _, name := range supportedNames {
+		nameSet[name] = struct{}{}
+	}
+	for _, name := range defaultParameterNames {
+		if _, exists := nameSet[name]; !exists {
+			supportedNames = append(supportedNames, name)
+			nameSet[name] = struct{}{}
+		}
+	}
+
+	defaultParameters := extractDefaultParameters(model.Raw["default_parameters"])
+	if _, exists := defaultParameters["top_p"]; !exists {
+		if val, err := decimal.NewFromString("1"); err == nil {
+			defaultParameters["top_p"] = &val
+		}
+	}
+	if _, exists := defaultParameters["temperature"]; !exists {
+		if val, err := decimal.NewFromString("0.7"); err == nil {
+			defaultParameters["temperature"] = &val
+		}
+	}
+
+	supportedParameters := SupportedParameters{
+		Names:   supportedNames,
+		Default: defaultParameters,
+	}
+
+	architecture := Architecture{}
+	if archMap, ok := model.Raw["architecture"].(map[string]any); ok {
+		architecture.Modality, _ = getString(archMap, "modality")
+		architecture.InputModalities = extractStringSlice(archMap["input_modalities"])
+		architecture.OutputModalities = extractStringSlice(archMap["output_modalities"])
+		architecture.Tokenizer, _ = getString(archMap, "tokenizer")
+		if instructType, ok := getString(archMap, "instruct_type"); ok && instructType != "" {
+			architecture.InstructType = ptr.ToString(instructType)
+		}
+	}
+
+	var isModerated *bool
+	if topProvider, ok := model.Raw["top_provider"].(map[string]any); ok {
+		if moderated, ok := topProvider["is_moderated"].(bool); ok {
+			isModerated = ptr.ToBool(moderated)
+		}
+	}
+
+	extras := copyMap(model.Raw)
+
+	return &ModelCatalog{
+		SupportedParameters: supportedParameters,
+		Architecture:        architecture,
+		Notes:               notes,
+		IsModerated:         isModerated,
+		Extras:              extras,
+		Status:              status,
+	}
+}
diff --git a/services/llm-api/internal/domain/model/model_llmkey.go b/services/llm-api/internal/domain/model/model_llmkey.go
new file mode 100644
index 00000000..141d8f70
--- /dev/null
+++ b/services/llm-api/internal/domain/model/model_llmkey.go
@@ -0,0 +1,350 @@
+package model
+
+import (
+	"regexp"
+	"strings"
+)
+
+// NormalizeModelKey returns a canonical "<vendor>/<model>" key.
+// It tries to infer the underlying vendor from the raw name and provider kind.
+//
+// Examples:
+//
+//	NormalizeModelKey(ProviderOpenRouter, "anthropic/claude-3.5-sonnet") => "anthropic/claude-3.5-sonnet"
+//	NormalizeModelKey(ProviderAWSBedrock, "anthropic.claude-3-5-sonnet-20240620-v1:0") => "anthropic/claude-3-5-sonnet-20240620-v1"
+//	NormalizeModelKey(ProviderOllama, "llama3:8b-instruct") => "meta/llama3-8b-instruct"
+//	NormalizeModelKey(ProviderVercelAI, "openai:gpt-4o-mini") => "openai/gpt-4o-mini"
+//	NormalizeModelKey(ProviderGoogle, "models/gemini-1.5-flash-001") => "google/gemini-1.5-flash-001"
+//	NormalizeModelKey(ProviderGroq, "mixtral-8x7b-32768") => "mistral/mixtral-8x7b-32768"
+//
+// Additional test cases:
+//
+//	# Aggregator with unknown owner - falls back to provider vendor
+//	NormalizeModelKey(ProviderJan, "aibrix/jan-v1-4b") => "jan/jan-v1-4b"
+//	NormalizeModelKey(ProviderJan, "jan-v1-4b") => "jan/jan-v1-4b"
+//
+//	# Aggregator with known vendor - preserves vendor
+//	NormalizeModelKey(ProviderOpenRouter, "meta-llama/Llama-3-8B-Instruct") => "meta/llama-3-8b-instruct"
+//	NormalizeModelKey(ProviderReplicate, "anthropic/claude-3.5-sonnet") => "anthropic/claude-3.5-sonnet"
+//
+//	# Version handling
+//	NormalizeModelKey(ProviderReplicate, "owner/model:v1.0") => "vendor/model-v1.0"
+//
+//	# Family inference
+//	NormalizeModelKey(ProviderGroq, "llama-3.1-70b-versatile") => "meta/llama-3.1-70b-versatile"
+//	NormalizeModelKey(ProviderCustom, "mixtral-8x7b") => "mistral/mixtral-8x7b"
+//
+//	# Special prefixes
+//	NormalizeModelKey(ProviderGoogle, "models/gemini-pro") => "google/gemini-pro"
+//	NormalizeModelKey(ProviderAWSBedrock, "meta.llama3-70b-instruct-v1:0") => "meta/llama3-70b-instruct-v1"
+//
+//	# Colon-separated vendor:model pattern
+//	NormalizeModelKey(ProviderVercelAI, "anthropic:claude-3-opus") => "anthropic/claude-3-opus"
+//	NormalizeModelKey(ProviderOllama, "qwen2:7b") => "qwen/qwen2-7b"
+func NormalizeModelKey(pk ProviderKind, raw string) string {
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return ""
+	}
+
+	// 1) Try cross-provider patterns first (highest priority)
+
+	// Pattern: "vendor:model" where vendor is a known vendor
+	if v, m, ok := splitPair(raw, ":"); ok && likelyVendor(v) {
+		return joinKM(slug(v), slug(m))
+	}
+
+	// Pattern: Google's "models/model-name" format
+	if pk == ProviderGoogle && strings.HasPrefix(strings.ToLower(raw), "models/") {
+		name := strings.TrimPrefix(raw, "models/")
+		return joinKM("google", slug(name))
+	}
+
+	// Pattern: AWS Bedrock "vendor.model-name:version" format
+	if pk == ProviderAWSBedrock {
+		return normalizeBedrockModel(raw)
+	}
+
+	// Pattern: "owner/model[:version]" - common for aggregators and repos
+	// For aggregators, use provider as fallback vendor for unknown owners
+	fallbackVendor := ""
+	switch pk {
+	case ProviderJan, ProviderOpenRouter, ProviderTogetherAI, ProviderVercelAI, ProviderDeepInfra, ProviderReplicate, ProviderHuggingFace:
+		fallbackVendor = getProviderVendorName(pk)
+	}
+	if vendor, model, ok := parseOwnerModelPair(raw, fallbackVendor); ok {
+		return joinKM(vendor, model)
+	}
+
+	// Pattern: Ollama "family:tag" format
+	if pk == ProviderOllama && strings.Contains(raw, ":") {
+		return normalizeOllamaModel(raw)
+	}
+
+	// 2) Provider-specific defaults (no owner prefix detected)
+	return normalizeByProvider(pk, raw)
+}
+
+// normalizeBedrockModel handles AWS Bedrock's "vendor.model:version" format.
+func normalizeBedrockModel(raw string) string {
+	main := strings.SplitN(raw, ":", 2)[0] // drop ":0" etc
+	if segs := strings.SplitN(main, ".", 2); len(segs) == 2 {
+		vendor := slug(segs[0])
+		model := slug(segs[1])
+		vendor = remapVendorFromFamily(vendor, model)
+		return joinKM(vendor, model)
+	}
+	// fallback: infer from family
+	return inferFromFamily(raw)
+}
+
+// normalizeOllamaModel handles Ollama's "family:tag" format.
+func normalizeOllamaModel(raw string) string {
+	base, tag, _ := splitPair(raw, ":")
+	model := slug(base + "-" + tag)
+	vendor := vendorFromFamilyPrefix(base)
+	return joinKM(vendor, model)
+}
+
+// normalizeByProvider applies provider-specific normalization rules.
+func normalizeByProvider(pk ProviderKind, raw string) string {
+	switch pk {
+	case ProviderOpenAI, ProviderAzureOpenAI:
+		return joinKM("openai", slug(raw))
+	case ProviderAnthropic:
+		return joinKM("anthropic", slug(raw))
+	case ProviderGoogle:
+		return joinKM("google", slug(stripModelsPrefix(raw)))
+	case ProviderMistral:
+		return joinKM("mistral", slug(raw))
+	case ProviderCohere:
+		return joinKM("cohere", slug(raw))
+	case ProviderGroq:
+		// Groq hosts many families; infer vendor by family prefix
+		return inferFromFamily(raw)
+	case ProviderPerplexity:
+		// Prefer perplexity for their own models (pplx/sonar)
+		r := strings.ToLower(raw)
+		if strings.HasPrefix(r, "pplx") || strings.Contains(r, "sonar") {
+			return joinKM("perplexity", slug(raw))
+		}
+		return inferFromFamily(raw)
+	case ProviderJan, ProviderOpenRouter, ProviderTogetherAI, ProviderVercelAI, ProviderDeepInfra, ProviderReplicate, ProviderHuggingFace:
+		// Aggregators: use provider name as vendor for bare model names
+		return joinKM(getProviderVendorName(pk), slug(raw))
+	case ProviderCustom:
+		// Custom providers: best effort family inference
+		return inferFromFamily(raw)
+	default:
+		return inferFromFamily(raw)
+	}
+}
+
+// ---- helpers ----
+
+// knownVendors is the single source of truth for all recognized AI model vendors.
+// This prevents duplication and makes it easier to add new vendors.
+var knownVendors = map[string]bool{
+	"openai":     true,
+	"anthropic":  true,
+	"gemini":     true,
+	"google":     true,
+	"mistral":    true,
+	"mistralai":  true,
+	"meta":       true,
+	"meta-llama": true,
+	"cohere":     true,
+	"qwen":       true,
+	"qwen2":      true,
+	"qwen2.5":    true,
+	"qwen3":      true,
+	"tii":        true,
+	"tiiuae":     true,
+	"databricks": true,
+	"aws":        true,
+	"azure":      true,
+	"perplexity": true,
+	"microsoft":  true,
+	"deepmind":   true,
+	"01-ai":      true,
+	"zhipu":      true,
+}
+
+// parseOwnerModelPair extracts and normalizes owner/model from "owner/model[:version]" format.
+// Returns empty strings if the format doesn't match or if the owner contains spaces.
+// If vendor is unrecognized and fallbackVendor is provided, uses fallback instead.
+func parseOwnerModelPair(raw string, fallbackVendor string) (vendor, model string, ok bool) {
+	owner, name, found := splitPair(raw, "/")
+	if !found || owner == "" || name == "" || strings.Contains(owner, " ") {
+		return "", "", false
+	}
+
+	ownerSlug := slug(owner)
+	// Handle version tags: "name:version" -> "name-version"
+	if n, ver, has := splitPair(name, ":"); has && n != "" {
+		name = n + "-" + ver
+	}
+	modelSlug := slug(strings.ReplaceAll(name, ":", "-"))
+
+	vendor = remapVendorFromFamily(ownerSlug, modelSlug)
+	// If vendor is unrecognized, use fallback
+	if vendor == ownerSlug && !isKnownVendor(vendor) && fallbackVendor != "" {
+		vendor = fallbackVendor
+	}
+
+	return vendor, modelSlug, true
+}
+
+var nonAlnumDashDot = regexp.MustCompile(`[^a-z0-9\-\.:\/]`)
+
+func slug(input string) string {
+	output := strings.TrimSpace(strings.ToLower(input))
+	output = strings.ReplaceAll(output, "_", "-")
+	output = strings.Join(strings.Fields(output), "-")
+	output = nonAlnumDashDot.ReplaceAllString(output, "")
+	// collapse "meta-llama" common HF owner; keep dots/colons until we normalize them out above
+	return output
+}
+
+func joinKM(vendor, model string) string {
+	vendor = strings.Trim(vendor, "/")
+	model = strings.Trim(model, "/")
+	if vendor == "" {
+		vendor = "unknown"
+	}
+	return vendor + "/" + model
+}
+
+func splitPair(s, sep string) (string, string, bool) {
+	i := strings.Index(s, sep)
+	if i < 0 {
+		return "", "", false
+	}
+	return s[:i], s[i+1:], true
+}
+
+// likelyVendor checks if a string is likely a vendor name that appears as a prefix.
+// This is a subset of known vendors commonly used in "vendor:model" patterns.
+func likelyVendor(s string) bool {
+	v := strings.ToLower(s)
+	// Common vendors that appear as prefixes in aggregator APIs
+	switch v {
+	case "openai", "anthropic", "gemini", "google", "mistral", "meta", "cohere":
+		return true
+	default:
+		return isKnownVendor(v)
+	}
+}
+
+// isKnownVendor checks if a string matches any recognized AI model vendor.
+func isKnownVendor(s string) bool {
+	return knownVendors[strings.ToLower(s)]
+}
+
+func getProviderVendorName(pk ProviderKind) string {
+	switch pk {
+	case ProviderJan:
+		return "jan"
+	case ProviderOpenRouter:
+		return "openrouter"
+	case ProviderTogetherAI:
+		return "together"
+	case ProviderVercelAI:
+		return "vercel"
+	case ProviderDeepInfra:
+		return "deepinfra"
+	case ProviderReplicate:
+		return "replicate"
+	case ProviderHuggingFace:
+		return "huggingface"
+	default:
+		return strings.ToLower(string(pk))
+	}
+}
+
+func stripModelsPrefix(s string) string {
+	if strings.HasPrefix(strings.ToLower(s), "models/") {
+		return s[7:]
+	}
+	return s
+}
+
+// Maps family hints to real vendors (e.g., "llama3" -> "meta")
+func vendorFromFamilyPrefix(modelBase string) string {
+	m := strings.ToLower(modelBase)
+	switch {
+	case strings.HasPrefix(m, "llama"):
+		return "meta"
+	case strings.HasPrefix(m, "gemma"):
+		return "google"
+	case strings.HasPrefix(m, "mixtral"), strings.HasPrefix(m, "mistral"):
+		return "mistral"
+	case strings.HasPrefix(m, "qwen"):
+		return "qwen"
+	case strings.HasPrefix(m, "phi"):
+		return "microsoft"
+	case strings.HasPrefix(m, "yi"):
+		return "01-ai"
+	case strings.HasPrefix(m, "glm"), strings.HasPrefix(m, "chatglm"):
+		return "zhipu"
+	default:
+		return "unknown"
+	}
+}
+
+// If owner looks like a family (meta-llama) prefer the brand vendor; else use owner.
+func remapVendorFromFamily(owner, model string) string {
+	switch owner {
+	case "meta-llama", "meta":
+		return "meta"
+	case "google", "deepmind":
+		return "google"
+	case "mistralai", "mistral":
+		return "mistral"
+	case "anthropic":
+		return "anthropic"
+	case "qwen", "qwen2", "qwen2.5", "qwen3":
+		return "qwen"
+	case "tiiuae", "tii":
+		return "tii"
+	case "openai":
+		return "openai"
+	case "cohere":
+		return "cohere"
+	default:
+		// Try infer from model family prefix
+		v := vendorFromFamilyPrefix(model)
+		if v != "unknown" {
+			return v
+		}
+		return owner
+	}
+}
+
+// As a last resort, infer vendor from recognizable family in a bare model name.
+func inferFromFamily(raw string) string {
+	r := strings.ToLower(raw)
+	// Handle name:tag patterns (ollama-like)
+	if base, tag, ok := splitPair(r, ":"); ok {
+		r = base + "-" + tag
+	}
+	r = stripModelsPrefix(r)
+	model := slug(strings.ReplaceAll(r, "/", "-"))
+	// Try to extract first token as family base
+	family := model
+	if i := strings.IndexAny(model, "-_."); i > 0 {
+		family = model[:i]
+	}
+	vendor := vendorFromFamilyPrefix(family)
+	// Special cases
+	if strings.Contains(model, "claude") {
+		vendor = "anthropic"
+	}
+	if strings.Contains(model, "gpt") || strings.Contains(model, "o1") {
+		vendor = "openai"
+	}
+	if strings.HasPrefix(model, "gemini") || strings.HasPrefix(model, "google") {
+		vendor = "google"
+	}
+	return joinKM(vendor, model)
+}
diff --git a/services/llm-api/internal/domain/model/model_utils.go b/services/llm-api/internal/domain/model/model_utils.go
new file mode 100644
index 00000000..c439729f
--- /dev/null
+++ b/services/llm-api/internal/domain/model/model_utils.go
@@ -0,0 +1,131 @@
+package model
+
+import (
+	"strings"
+
+	"github.com/shopspring/decimal"
+)
+
+// we can reuse these utility functions in both model_catalog and provider_model
+func extractDefaultParameters(value any) map[string]*decimal.Decimal {
+	result := map[string]*decimal.Decimal{}
+	params, ok := value.(map[string]any)
+	if !ok {
+		return result
+	}
+	for key, raw := range params {
+		if raw == nil {
+			result[key] = nil
+			continue
+		}
+		switch v := raw.(type) {
+		case string:
+			if strings.TrimSpace(v) == "" {
+				result[key] = nil
+				continue
+			}
+			if d, err := decimal.NewFromString(v); err == nil {
+				val := d
+				result[key] = &val
+			}
+		case float64:
+			d := decimal.NewFromFloat(v)
+			result[key] = &d
+		case float32:
+			d := decimal.NewFromFloat32(v)
+			result[key] = &d
+		default:
+			// ignore unsupported types
+		}
+	}
+	return result
+}
+
+func extractStringSlice(value any) []string {
+	list := []string{}
+	switch arr := value.(type) {
+	case []any:
+		for _, item := range arr {
+			if str, ok := item.(string); ok {
+				list = append(list, strings.TrimSpace(str))
+			}
+		}
+	case []string:
+		for _, item := range arr {
+			list = append(list, strings.TrimSpace(item))
+		}
+	}
+	return list
+}
+
+func extractStringSliceFromMap(raw map[string]any, path ...string) []string {
+	current := any(raw)
+	for _, key := range path {
+		m, ok := current.(map[string]any)
+		if !ok {
+			return nil
+		}
+		current = m[key]
+	}
+	return extractStringSlice(current)
+}
+
+func getString(raw map[string]any, key string) (string, bool) {
+	if raw == nil {
+		return "", false
+	}
+	if value, ok := raw[key]; ok {
+		if str, ok := value.(string); ok {
+			return strings.TrimSpace(str), true
+		}
+	}
+	return "", false
+}
+
+func copyMap(source map[string]any) map[string]any {
+	if source == nil {
+		return nil
+	}
+	dest := make(map[string]any, len(source))
+	for k, v := range source {
+		dest[k] = v
+	}
+	return dest
+}
+
+func floatFromAny(value any) (float64, bool) {
+	switch v := value.(type) {
+	case float64:
+		return v, true
+	case float32:
+		return float64(v), true
+	case int:
+		return float64(v), true
+	case int64:
+		return float64(v), true
+	case string:
+		if strings.TrimSpace(v) == "" {
+			return 0, false
+		}
+		if parsed, err := decimal.NewFromString(v); err == nil {
+			return parsed.InexactFloat64(), true
+		}
+	}
+	return 0, false
+}
+
+func containsString(list []string, target string) bool {
+	target = strings.ToLower(target)
+	for _, item := range list {
+		if strings.ToLower(item) == target {
+			return true
+		}
+	}
+	return false
+}
+
+func normalizeURL(baseURL string) string {
+	normalized := strings.TrimSpace(baseURL)
+	normalized = strings.TrimRight(normalized, "/")
+	return normalized
+}
diff --git a/services/llm-api/internal/domain/model/provider.go b/services/llm-api/internal/domain/model/provider.go
new file mode 100644
index 00000000..eb0fff81
--- /dev/null
+++ b/services/llm-api/internal/domain/model/provider.go
@@ -0,0 +1,204 @@
+package model
+
+import (
+	"context"
+	"encoding/json"
+	"strings"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/query"
+)
+
+type ProviderKind string
+
+const (
+	ProviderJan         ProviderKind = "jan"
+	ProviderOpenAI      ProviderKind = "openai"
+	ProviderOpenRouter  ProviderKind = "openrouter"
+	ProviderAnthropic   ProviderKind = "anthropic"
+	ProviderGoogle      ProviderKind = "google"
+	ProviderMistral     ProviderKind = "mistral"
+	ProviderGroq        ProviderKind = "groq"
+	ProviderCohere      ProviderKind = "cohere"
+	ProviderOllama      ProviderKind = "ollama"
+	ProviderReplicate   ProviderKind = "replicate"
+	ProviderAzureOpenAI ProviderKind = "azure_openai"
+	ProviderAWSBedrock  ProviderKind = "aws_bedrock"
+	ProviderPerplexity  ProviderKind = "perplexity"
+	ProviderTogetherAI  ProviderKind = "togetherai"
+	ProviderHuggingFace ProviderKind = "huggingface"
+	ProviderVercelAI    ProviderKind = "vercel_ai"
+	ProviderDeepInfra   ProviderKind = "deepinfra"
+	ProviderCustom      ProviderKind = "custom" // for any customer-provided API
+)
+
+type Provider struct {
+	ID              uint         `json:"id"`
+	PublicID        string       `json:"public_id"`
+	DisplayName     string       `json:"display_name"`
+	Kind            ProviderKind `json:"kind"`
+	BaseURL         string       `json:"base_url"`               // e.g., https://api.openai.com/v1
+	EncryptedAPIKey string       `json:"-"`                      // encrypted at rest, decrypted in memory when needed
+	APIKeyHint      *string      `json:"api_key_hint,omitempty"` // last4 or source name, not the secret
+	IsModerated     bool         `json:"is_moderated"`           // whether provider enforces moderation upstream
+	Active          bool
+	Metadata        map[string]string `json:"metadata,omitempty"` // supports: image_input, file_attachment, description, etc.
+	LastSyncedAt    *time.Time
+	CreatedAt       time.Time
+	UpdatedAt       time.Time
+}
+
+// Metadata keys for provider capabilities
+const (
+	MetadataKeyImageInput       = "image_input"            // JSON string with ImageInputCapability
+	MetadataKeyFileAttachment   = "file_attachment"        // JSON string with FileAttachmentCapability
+	MetadataKeyDescription      = "description"            // Human-readable description
+	MetadataKeyEnvironment      = "environment"            // e.g., "production", "staging", "local"
+	MetadataKeyAutoEnableModels = "auto_enable_new_models" // "true" to auto-enable new models
+	MetadataKeyToolSupport      = "tool_support"           // "true" if provider supports tools/tool_choice
+)
+
+// ImageInputCapability describes how a provider supports image input
+type ImageInputCapability struct {
+	Supported bool   `json:"supported"`
+	URL       bool   `json:"url"`    // Supports image URLs (https://)
+	Base64    bool   `json:"base64"` // Supports base64-encoded images
+	Schema    string `json:"schema"` // Description of the schema/format
+}
+
+// FileAttachmentCapability describes how a provider supports file attachments
+type FileAttachmentCapability struct {
+	Supported  bool   `json:"supported"`
+	URL        bool   `json:"url"`         // Supports file URLs (https://)
+	Base64     bool   `json:"base64"`      // Supports base64-encoded files
+	FileUpload bool   `json:"file_upload"` // Supports file upload API (file_id references)
+	Schema     string `json:"schema"`      // Description of the schema/format
+}
+
+// GetImageInputCapability parses and returns the image input capability from metadata
+func (p *Provider) GetImageInputCapability() (*ImageInputCapability, error) {
+	if p.Metadata == nil {
+		return &ImageInputCapability{Supported: false}, nil
+	}
+
+	val, ok := p.Metadata[MetadataKeyImageInput]
+	if !ok || val == "" {
+		return &ImageInputCapability{Supported: false}, nil
+	}
+
+	// Handle simple boolean strings for backward compatibility
+	if val == "true" || val == "1" {
+		return &ImageInputCapability{Supported: true, URL: true, Base64: true}, nil
+	}
+	if val == "false" || val == "0" {
+		return &ImageInputCapability{Supported: false}, nil
+	}
+
+	// Parse JSON structure
+	var cap ImageInputCapability
+	if err := json.Unmarshal([]byte(val), &cap); err != nil {
+		// If parsing fails, treat as unsupported
+		return &ImageInputCapability{Supported: false}, nil
+	}
+
+	return &cap, nil
+}
+
+// GetFileAttachmentCapability parses and returns the file attachment capability from metadata
+func (p *Provider) GetFileAttachmentCapability() (*FileAttachmentCapability, error) {
+	if p.Metadata == nil {
+		return &FileAttachmentCapability{Supported: false}, nil
+	}
+
+	val, ok := p.Metadata[MetadataKeyFileAttachment]
+	if !ok || val == "" {
+		return &FileAttachmentCapability{Supported: false}, nil
+	}
+
+	// Handle simple boolean strings for backward compatibility
+	if val == "true" || val == "1" {
+		return &FileAttachmentCapability{Supported: true, URL: true, Base64: true, FileUpload: true}, nil
+	}
+	if val == "false" || val == "0" {
+		return &FileAttachmentCapability{Supported: false}, nil
+	}
+
+	// Parse JSON structure
+	var cap FileAttachmentCapability
+	if err := json.Unmarshal([]byte(val), &cap); err != nil {
+		// If parsing fails, treat as unsupported
+		return &FileAttachmentCapability{Supported: false}, nil
+	}
+
+	return &cap, nil
+}
+
+// SupportsImageInput returns true if the provider supports image input
+func (p *Provider) SupportsImageInput() bool {
+	cap, _ := p.GetImageInputCapability()
+	return cap != nil && cap.Supported
+}
+
+// SupportsFileAttachment returns true if the provider supports file attachments
+func (p *Provider) SupportsFileAttachment() bool {
+	cap, _ := p.GetFileAttachmentCapability()
+	return cap != nil && cap.Supported
+}
+
+// GetDescription returns the provider description from metadata
+func (p *Provider) GetDescription() string {
+	if p.Metadata == nil {
+		return ""
+	}
+	return p.Metadata[MetadataKeyDescription]
+}
+
+// GetEnvironment returns the environment from metadata (e.g., "production", "staging")
+func (p *Provider) GetEnvironment() string {
+	if p.Metadata == nil {
+		return ""
+	}
+	return p.Metadata[MetadataKeyEnvironment]
+}
+
+// SupportsTools returns true if provider metadata indicates tool support.
+func (p *Provider) SupportsTools() bool {
+	if p == nil || p.Metadata == nil {
+		return false
+	}
+	val := strings.TrimSpace(strings.ToLower(p.Metadata[MetadataKeyToolSupport]))
+	switch val {
+	case "1", "true", "yes", "on":
+		return true
+	default:
+		return false
+	}
+}
+
+// ProviderFilter defines optional conditions for querying providers.
+type ProviderFilter struct {
+	IDs              *[]uint
+	PublicID         *string
+	Kind             *ProviderKind
+	Active           *bool
+	IsModerated      *bool
+	LastSyncedAfter  *time.Time
+	LastSyncedBefore *time.Time
+}
+
+type AccessibleModels struct {
+	Providers      []*Provider      `json:"providers"`
+	ProviderModels []*ProviderModel `json:"provider_models"`
+}
+
+// ProviderRepository abstracts persistence for provider aggregate roots.
+type ProviderRepository interface {
+	Create(ctx context.Context, provider *Provider) error
+	Update(ctx context.Context, provider *Provider) error
+	DeleteByID(ctx context.Context, id uint) error
+	FindByID(ctx context.Context, id uint) (*Provider, error)
+	FindByPublicID(ctx context.Context, publicID string) (*Provider, error)
+	FindByFilter(ctx context.Context, filter ProviderFilter, p *query.Pagination) ([]*Provider, error)
+	Count(ctx context.Context, filter ProviderFilter) (int64, error)
+	FindByIDs(ctx context.Context, ids []uint) ([]*Provider, error)
+}
diff --git a/services/llm-api/internal/domain/model/provider_capabilities.go b/services/llm-api/internal/domain/model/provider_capabilities.go
new file mode 100644
index 00000000..714e0d6c
--- /dev/null
+++ b/services/llm-api/internal/domain/model/provider_capabilities.go
@@ -0,0 +1,203 @@
+package model
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"sync"
+
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+)
+
+// ProviderCapabilitiesDefaults holds the default capabilities for each provider kind
+type ProviderCapabilitiesDefaults struct {
+	ImageInput     ImageInputCapability     `json:"image_input"`
+	FileAttachment FileAttachmentCapability `json:"file_attachment"`
+}
+
+var (
+	defaultCapabilities     map[string]ProviderCapabilitiesDefaults
+	defaultCapabilitiesMux  sync.RWMutex
+	defaultCapabilitiesOnce sync.Once
+)
+
+// LoadDefaultCapabilities loads provider capabilities from providers_metadata_default.yml
+func LoadDefaultCapabilities(configPath string) error {
+	defaultCapabilitiesOnce.Do(func() {
+		// Defer panic recovery in case logger or other dependencies aren't ready
+		defer func() {
+			if r := recover(); r != nil {
+				// Silently use hardcoded defaults if anything fails during initialization
+				defaultCapabilitiesMux.Lock()
+				if defaultCapabilities == nil {
+					defaultCapabilities = getHardcodedDefaults()
+				}
+				defaultCapabilitiesMux.Unlock()
+			}
+		}()
+
+		// Try the provided path first
+		yamlPath := configPath
+		if yamlPath == "" {
+			// Default to config/providers_metadata_default.yml
+			yamlPath = filepath.Join("config", "providers_metadata_default.yml")
+		}
+
+		data, err := os.ReadFile(yamlPath)
+		if err != nil {
+			// Use hardcoded fallbacks if file not found
+			defaultCapabilitiesMux.Lock()
+			defaultCapabilities = getHardcodedDefaults()
+			defaultCapabilitiesMux.Unlock()
+
+			// Try to log warning
+			log := logger.GetLogger()
+			log.Warn().
+				Str("path", yamlPath).
+				Err(err).
+				Msg("Could not load provider capabilities defaults, using hardcoded fallbacks")
+			return
+		}
+
+		var defs map[string]ProviderCapabilitiesDefaults
+		if err := json.Unmarshal(data, &defs); err != nil {
+			// Use hardcoded fallbacks if parse fails
+			defaultCapabilitiesMux.Lock()
+			defaultCapabilities = getHardcodedDefaults()
+			defaultCapabilitiesMux.Unlock()
+
+			// Try to log warning
+			log := logger.GetLogger()
+			log.Warn().
+				Str("path", yamlPath).
+				Err(err).
+				Msg("Could not parse provider capabilities defaults, using hardcoded fallbacks")
+			return
+		}
+
+		defaultCapabilitiesMux.Lock()
+		defaultCapabilities = defs
+		defaultCapabilitiesMux.Unlock()
+
+		// Log success
+		log := logger.GetLogger()
+		log.Info().
+			Str("path", yamlPath).
+			Int("providers", len(defs)).
+			Msg("Loaded provider capabilities defaults")
+	})
+
+	return nil
+}
+
+// GetDefaultCapabilities returns the default capabilities for a provider kind
+func GetDefaultCapabilities(kind ProviderKind) ProviderCapabilitiesDefaults {
+	// Ensure defaults are loaded
+	LoadDefaultCapabilities("")
+
+	defaultCapabilitiesMux.RLock()
+	defer defaultCapabilitiesMux.RUnlock()
+
+	if caps, exists := defaultCapabilities[string(kind)]; exists {
+		return caps
+	}
+
+	// Return empty/unsupported capabilities for unknown providers
+	return ProviderCapabilitiesDefaults{
+		ImageInput: ImageInputCapability{
+			Supported: false,
+		},
+		FileAttachment: FileAttachmentCapability{
+			Supported: false,
+		},
+	}
+}
+
+// getHardcodedDefaults returns hardcoded defaults as a fallback
+func getHardcodedDefaults() map[string]ProviderCapabilitiesDefaults {
+	return map[string]ProviderCapabilitiesDefaults{
+		"openai": {
+			ImageInput: ImageInputCapability{
+				Supported: true,
+				URL:       true,
+				Base64:    true,
+				Schema:    "messages[].content[].type='image_url'; image_url.url=https:// or data:image/...;base64,...",
+			},
+			FileAttachment: FileAttachmentCapability{
+				Supported:  true,
+				URL:        false,
+				Base64:     false,
+				FileUpload: true,
+				Schema:     "messages[].content[].type='input_file'; file_id from Files API upload",
+			},
+		},
+		"azure_openai": {
+			ImageInput: ImageInputCapability{
+				Supported: true,
+				URL:       true,
+				Base64:    true,
+				Schema:    "identical to OpenAI vision; supports https:// and data:image/...;base64,...",
+			},
+			FileAttachment: FileAttachmentCapability{
+				Supported:  true,
+				URL:        false,
+				Base64:     false,
+				FileUpload: true,
+				Schema:     "Files uploaded to Azure resource → reference with file_id",
+			},
+		},
+		"google": {
+			ImageInput: ImageInputCapability{
+				Supported: true,
+				URL:       false,
+				Base64:    true,
+				Schema:    "messages[].parts[].inline_data={mime_type,data} or file_data={file_uri,mime_type}",
+			},
+			FileAttachment: FileAttachmentCapability{
+				Supported:  true,
+				URL:        false,
+				Base64:     true,
+				FileUpload: true,
+				Schema:     "Gemini: inline_data for small files; file_data.file_uri for uploaded files",
+			},
+		},
+		"anthropic": {
+			ImageInput: ImageInputCapability{
+				Supported: true,
+				URL:       true,
+				Base64:    true,
+				Schema:    "messages[].content[].type='image'; image_url=https:// or data:image/...;base64,... or file_id",
+			},
+			FileAttachment: FileAttachmentCapability{
+				Supported:  true,
+				URL:        true,
+				Base64:     true,
+				FileUpload: true,
+				Schema:     "messages[].content[].type='input_file'; supports url, inline base64, or file_id",
+			},
+		},
+		"ollama": {
+			ImageInput: ImageInputCapability{
+				Supported: true,
+				URL:       true,
+				Base64:    true,
+				Schema:    "local path or data URI; multimodal models like llava accept both",
+			},
+			FileAttachment: FileAttachmentCapability{
+				Supported:  true,
+				URL:        true,
+				Base64:     true,
+				FileUpload: false,
+				Schema:     "file={path or base64}; handled locally by Ollama server",
+			},
+		},
+		"custom": {
+			ImageInput: ImageInputCapability{
+				Supported: false,
+			},
+			FileAttachment: FileAttachmentCapability{
+				Supported: false,
+			},
+		},
+	}
+}
diff --git a/services/llm-api/internal/domain/model/provider_model.go b/services/llm-api/internal/domain/model/provider_model.go
new file mode 100644
index 00000000..ae2445cf
--- /dev/null
+++ b/services/llm-api/internal/domain/model/provider_model.go
@@ -0,0 +1,91 @@
+package model
+
+import (
+	"context"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/query"
+)
+
+type MicroUSD int64
+
+type PriceUnit string
+
+const (
+	Per1KPromptTokens     PriceUnit = "per_1k_prompt_tokens"
+	Per1KCompletionTokens PriceUnit = "per_1k_completion_tokens"
+	PerRequest            PriceUnit = "per_request"
+	PerImage              PriceUnit = "per_image"
+	PerWebSearch          PriceUnit = "per_web_search"
+	PerInternalReasoning  PriceUnit = "per_internal_reasoning"
+)
+
+// PriceLine is a single line item (e.g., prompt token price).
+type PriceLine struct {
+	Unit     PriceUnit `json:"unit"`
+	Amount   MicroUSD  `json:"amount_micro_usd"` // e.g., 15000 -> $0.0150
+	Currency string    `json:"currency"`         // "USD" (fixed if you only bill in USD)
+}
+
+// Pricing groups price lines for a model.
+type Pricing struct {
+	Lines []PriceLine `json:"lines"` // flexible: add/remove units without schema churn
+}
+
+// TokenLimits for context and completion.
+type TokenLimits struct {
+	ContextLength       int `json:"context_length"`        // e.g., 400000
+	MaxCompletionTokens int `json:"max_completion_tokens"` // e.g., 128000
+}
+
+// ProviderModel describes a specific model under a provider.
+type ProviderModel struct {
+	ID                      uint         `json:"id"`
+	PublicID                string       `json:"public_id"`
+	ProviderID              uint         `json:"provider_id"`
+	Kind                    ProviderKind `json:"kind"`
+	ModelCatalogID          *uint        `json:"model_catalog_id"`
+	ModelPublicID           string       `json:"model_public_id"`            // Matches model_catalog.PublicID (canonical vendor/model format)
+	ProviderOriginalModelID string       `json:"provider_original_model_id"` // Original model ID from provider API (chat.Model.ID)
+	DisplayName             string       `json:"display_name"`
+	Pricing                 Pricing      `json:"pricing"`
+	TokenLimits             *TokenLimits `json:"token_limits,omitempty"` // override provider top caps
+	Family                  *string      `json:"family,omitempty"`       // e.g., "gpt-4o", "llama-3.1"
+	SupportsImages          bool         `json:"supports_images"`
+	SupportsEmbeddings      bool         `json:"supports_embeddings"`
+	SupportsReasoning       bool         `json:"supports_reasoning"`
+	SupportsAudio           bool         `json:"supports_audio"`
+	SupportsVideo           bool         `json:"supports_video"`
+	Active                  bool         `json:"active"`
+	CreatedAt               time.Time    `json:"created_at"`
+	UpdatedAt               time.Time    `json:"updated_at"`
+}
+
+// ProviderModelFilter defines optional conditions for querying provider models.
+type ProviderModelFilter struct {
+	IDs                *[]uint
+	PublicID           *string
+	ProviderIDs        *[]uint
+	ProviderID         *uint
+	ModelCatalogID     *uint
+	ModelPublicID      *string
+	ModelPublicIDs     *[]string
+	Active             *bool
+	SupportsImages     *bool
+	SupportsEmbeddings *bool
+	SupportsReasoning  *bool
+	SupportsAudio      *bool
+	SupportsVideo      *bool
+}
+
+// ProviderModelRepository abstracts persistence for provider models.
+type ProviderModelRepository interface {
+	Create(ctx context.Context, model *ProviderModel) error
+	Update(ctx context.Context, model *ProviderModel) error
+	DeleteByID(ctx context.Context, id uint) error
+	FindByID(ctx context.Context, id uint) (*ProviderModel, error)
+	FindByPublicID(ctx context.Context, publicID string) (*ProviderModel, error)
+	FindByFilter(ctx context.Context, filter ProviderModelFilter, p *query.Pagination) ([]*ProviderModel, error)
+	Count(ctx context.Context, filter ProviderModelFilter) (int64, error)
+	BatchUpdateActive(ctx context.Context, filter ProviderModelFilter, active bool) (int64, error)
+}
diff --git a/services/llm-api/internal/domain/model/provider_model_service.go b/services/llm-api/internal/domain/model/provider_model_service.go
new file mode 100644
index 00000000..06306320
--- /dev/null
+++ b/services/llm-api/internal/domain/model/provider_model_service.go
@@ -0,0 +1,339 @@
+package model
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/utils/httpclients/chat"
+	"jan-server/services/llm-api/internal/utils/idgen"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+	"jan-server/services/llm-api/internal/utils/ptr"
+)
+
+type ProviderModelService struct {
+	providerModelRepo ProviderModelRepository
+	modelCatalogRepo  ModelCatalogRepository
+}
+
+func NewProviderModelService(
+	providerModelRepo ProviderModelRepository,
+	modelCatalogRepo ModelCatalogRepository,
+) *ProviderModelService {
+	return &ProviderModelService{
+		providerModelRepo: providerModelRepo,
+		modelCatalogRepo:  modelCatalogRepo,
+	}
+}
+
+func (s *ProviderModelService) ListActiveProviderByIDs(ctx context.Context, providerIDs []uint) ([]*ProviderModel, error) {
+	if len(providerIDs) == 0 {
+		return nil, nil
+	}
+	ids := providerIDs
+	return s.providerModelRepo.FindByFilter(ctx, ProviderModelFilter{
+		ProviderIDs: &ids,
+		Active:      ptr.ToBool(true),
+	}, nil)
+}
+
+func (s *ProviderModelService) FindActiveByModelKey(ctx context.Context, modelPublicID string) ([]*ProviderModel, error) {
+	if strings.TrimSpace(modelPublicID) == "" {
+		return nil, nil
+	}
+	key := strings.TrimSpace(modelPublicID)
+	active := ptr.ToBool(true)
+	return s.providerModelRepo.FindByFilter(ctx, ProviderModelFilter{
+		ModelPublicID: &key,
+		Active:        active,
+	}, nil)
+}
+
+func (s *ProviderModelService) FindActiveByProviderIDsAndKey(ctx context.Context, providerIDs []uint, modelPublicID string) ([]*ProviderModel, error) {
+	if strings.TrimSpace(modelPublicID) == "" {
+		return nil, nil
+	}
+	ids := providerIDs
+	key := strings.TrimSpace(modelPublicID)
+	active := ptr.ToBool(true)
+	return s.providerModelRepo.FindByFilter(ctx, ProviderModelFilter{
+		ProviderIDs:   &ids,
+		ModelPublicID: &key,
+		Active:        active,
+	}, nil)
+}
+
+func (s *ProviderModelService) UpsertProviderModel(ctx context.Context, provider *Provider, catalog *ModelCatalog, model chat.Model) (*ProviderModel, error) {
+	return s.UpsertProviderModelWithOptions(ctx, provider, catalog, model, false)
+}
+
+func (s *ProviderModelService) UpsertProviderModelWithOptions(ctx context.Context, provider *Provider, catalog *ModelCatalog, model chat.Model, autoEnableNewModels bool) (*ProviderModel, error) {
+	originalModelID := strings.TrimSpace(model.ID)
+	if originalModelID == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "model identifier missing", nil, "aa85b5ad-cc4a-4b24-ae35-f260163768ff")
+	}
+
+	// Generate ModelPublicID using NormalizeModelKey which returns vendor/model format
+	kind := ProviderKind(provider.Kind)
+	modelPublicID := NormalizeModelKey(kind, originalModelID)
+
+	filter := ProviderModelFilter{
+		ProviderID:    ptr.ToUint(provider.ID),
+		ModelPublicID: &modelPublicID,
+	}
+	existing, err := s.providerModelRepo.FindByFilter(ctx, filter, &query.Pagination{Limit: ptr.ToInt(1)})
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to find existing provider model")
+	}
+
+	var catalogID *uint
+	if catalog != nil {
+		catalogID = &catalog.ID
+	}
+
+	if len(existing) > 0 {
+		pm := existing[0]
+		updateProviderModelFromRaw(pm, provider, catalogID, model)
+		if err := s.providerModelRepo.Update(ctx, pm); err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to update provider model")
+		}
+		return pm, nil
+	}
+
+	publicID, err := idgen.GenerateSecureID("pmdl", 16)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to generate provider model ID")
+	}
+
+	pm := buildProviderModelFromRaw(provider, catalogID, model)
+	pm.PublicID = publicID
+
+	pm.Active = autoEnableNewModels
+
+	if err := s.providerModelRepo.Create(ctx, pm); err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to create provider model")
+	}
+	return pm, nil
+}
+
+func (s *ProviderModelService) FindByPublicID(ctx context.Context, publicID string) (*ProviderModel, error) {
+	if publicID == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "provider model public ID is required", nil, "f7cdce27-bfed-48c2-a966-14549a666f6a")
+	}
+
+	return s.providerModelRepo.FindByPublicID(ctx, publicID)
+}
+
+func (s *ProviderModelService) Update(ctx context.Context, providerModel *ProviderModel) (*ProviderModel, error) {
+	if providerModel == nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "provider model cannot be nil", nil, "45c19f50-e0d1-4745-b6c4-be6de6ce0ec0")
+	}
+
+	if providerModel.Active && providerModel.ModelCatalogID != nil {
+		catalog, err := s.modelCatalogRepo.FindByID(ctx, *providerModel.ModelCatalogID)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to check catalog status")
+		}
+
+		if catalog != nil && catalog.Active != nil && !*catalog.Active {
+			providerModel.Active = false
+		}
+	}
+
+	if err := s.providerModelRepo.Update(ctx, providerModel); err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to update provider model")
+	}
+
+	return providerModel, nil
+}
+
+func (s *ProviderModelService) FindByFilter(ctx context.Context, filter ProviderModelFilter) ([]*ProviderModel, error) {
+	models, err := s.providerModelRepo.FindByFilter(ctx, filter, nil)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to find provider models")
+	}
+	return models, nil
+}
+
+func (s *ProviderModelService) FindByFilterWithPagination(ctx context.Context, filter ProviderModelFilter, pagination *query.Pagination) ([]*ProviderModel, error) {
+	models, err := s.providerModelRepo.FindByFilter(ctx, filter, pagination)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to find provider models")
+	}
+	return models, nil
+}
+
+func (s *ProviderModelService) Count(ctx context.Context, filter ProviderModelFilter) (int64, error) {
+	count, err := s.providerModelRepo.Count(ctx, filter)
+	if err != nil {
+		return 0, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to count provider models")
+	}
+	return count, nil
+}
+
+func (s *ProviderModelService) BatchUpdateActive(ctx context.Context, filter ProviderModelFilter, active bool) (int64, error) {
+	rowsAffected, err := s.providerModelRepo.BatchUpdateActive(ctx, filter, active)
+	if err != nil {
+		return 0, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to batch update active status")
+	}
+
+	affectedModels, err := s.providerModelRepo.FindByFilter(ctx, filter, nil)
+	if err != nil {
+		return rowsAffected, nil
+	}
+
+	modelKeys := make([]string, 0, len(affectedModels))
+	providerIDs := make(map[uint]bool)
+
+	for _, model := range affectedModels {
+		modelKeys = append(modelKeys, model.ModelPublicID)
+		providerIDs[model.ProviderID] = true
+	}
+
+	return rowsAffected, nil
+}
+
+func buildProviderModelFromRaw(provider *Provider, catalogID *uint, model chat.Model) *ProviderModel {
+	pricing := extractPricing(model.Raw["pricing"])
+	tokenLimits := extractTokenLimits(model.Raw)
+	family := extractFamily(model.ID)
+	supportsImages := containsString(extractStringSliceFromMap(model.Raw, "architecture", "input_modalities"), "image")
+	supportsReasoning := containsString(extractStringSlice(model.Raw["supported_parameters"]), "include_reasoning")
+
+	displayName := model.DisplayName
+	if displayName == "" {
+		displayName = model.ID
+	}
+
+	// Generate ModelPublicID using NormalizeModelKey which returns canonical vendor/model format
+	kind := ProviderKind(provider.Kind)
+	modelPublicID := NormalizeModelKey(kind, model.ID)
+
+	return &ProviderModel{
+		ProviderID:              provider.ID,
+		Kind:                    kind,
+		ModelCatalogID:          catalogID,
+		ModelPublicID:           modelPublicID,
+		ProviderOriginalModelID: model.ID, // Store original model ID from provider
+		DisplayName:             displayName,
+		Pricing:                 pricing,
+		TokenLimits:             tokenLimits,
+		Family:                  family,
+		SupportsImages:          supportsImages,
+		SupportsEmbeddings:      strings.Contains(strings.ToLower(model.ID), "embed"),
+		SupportsReasoning:       supportsReasoning,
+		Active:                  false, // Default to inactive, will be set by caller
+	}
+}
+
+func updateProviderModelFromRaw(pm *ProviderModel, provider *Provider, catalogID *uint, model chat.Model) {
+	pm.Kind = ProviderKind(provider.Kind) // Update Kind field to match provider
+	pm.ModelCatalogID = catalogID
+	pm.DisplayName = model.DisplayName
+	if pm.DisplayName == "" {
+		pm.DisplayName = model.ID
+	}
+	pm.Pricing = extractPricing(model.Raw["pricing"])
+	pm.TokenLimits = extractTokenLimits(model.Raw)
+	pm.Family = extractFamily(model.ID)
+	pm.SupportsImages = containsString(extractStringSliceFromMap(model.Raw, "architecture", "input_modalities"), "image")
+	pm.SupportsEmbeddings = strings.Contains(strings.ToLower(model.ID), "embed")
+	pm.SupportsReasoning = containsString(extractStringSlice(model.Raw["supported_parameters"]), "include_reasoning")
+	// Don't update Active field - keep existing value for already-synced models
+	pm.UpdatedAt = time.Now().UTC()
+}
+
+func extractPricing(value any) Pricing {
+	pricing := Pricing{}
+	pricingMap, ok := value.(map[string]any)
+	if !ok {
+		return pricing
+	}
+
+	if lines, ok := pricingMap["lines"].([]any); ok {
+		for _, line := range lines {
+			lineMap, ok := line.(map[string]any)
+			if !ok {
+				continue
+			}
+			unitStr, _ := getString(lineMap, "unit")
+			amount, ok := floatFromAny(lineMap["amount"])
+			if !ok {
+				continue
+			}
+			pricing.Lines = append(pricing.Lines, PriceLine{
+				Unit:     PriceUnit(strings.ToLower(strings.TrimSpace(unitStr))),
+				Amount:   MicroUSD(int64(amount * 1_000_000)),
+				Currency: "USD",
+			})
+		}
+	}
+
+	return pricing
+}
+
+func extractTokenLimits(raw map[string]any) *TokenLimits {
+	if raw == nil {
+		return nil
+	}
+	limits := TokenLimits{}
+	if contextLen, ok := floatFromAny(raw["context_length"]); ok {
+		limits.ContextLength = int(contextLen)
+	}
+	if maxCompletion, ok := floatFromAny(raw["max_completion_tokens"]); ok {
+		limits.MaxCompletionTokens = int(maxCompletion)
+	}
+	if limits.ContextLength == 0 && limits.MaxCompletionTokens == 0 {
+		return nil
+	}
+	return &limits
+}
+
+// Extracts the model family from the modelID using common delimiters ("/", "-", ":").
+func extractFamily(modelID string) *string {
+	delimiters := []string{"/", "-", ":"}
+	for _, delim := range delimiters {
+		if idx := strings.Index(modelID, delim); idx > 0 {
+			return ptr.ToString(strings.TrimSpace(modelID[:idx]))
+		}
+	}
+	return nil
+}
+
+func (s *ProviderModelService) FindModelCountsByProviderIDs(ctx context.Context, providerIDs []uint) (map[uint]int64, error) {
+	counts := make(map[uint]int64)
+
+	for _, providerID := range providerIDs {
+		filter := ProviderModelFilter{
+			ProviderID: &providerID,
+		}
+		count, err := s.providerModelRepo.Count(ctx, filter)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, fmt.Sprintf("failed to count models for provider %d", providerID))
+		}
+		counts[providerID] = count
+	}
+
+	return counts, nil
+}
+
+func (s *ProviderModelService) FindActiveModelCountsByProviderIDs(ctx context.Context, providerIDs []uint) (map[uint]int64, error) {
+	counts := make(map[uint]int64)
+
+	for _, providerID := range providerIDs {
+		active := true
+		filter := ProviderModelFilter{
+			ProviderID: &providerID,
+			Active:     &active,
+		}
+		count, err := s.providerModelRepo.Count(ctx, filter)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, fmt.Sprintf("failed to count active models for provider %d", providerID))
+		}
+		counts[providerID] = count
+	}
+
+	return counts, nil
+}
diff --git a/services/llm-api/internal/domain/model/provider_service.go b/services/llm-api/internal/domain/model/provider_service.go
new file mode 100644
index 00000000..09ff33a3
--- /dev/null
+++ b/services/llm-api/internal/domain/model/provider_service.go
@@ -0,0 +1,409 @@
+package model
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/url"
+	"strings"
+	"time"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+	"jan-server/services/llm-api/internal/utils/crypto"
+	"jan-server/services/llm-api/internal/utils/httpclients/chat"
+	"jan-server/services/llm-api/internal/utils/idgen"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+	"jan-server/services/llm-api/internal/utils/ptr"
+)
+
+type ProviderService struct {
+	providerRepo         ProviderRepository
+	providerModelService *ProviderModelService
+	modelCatalogService  *ModelCatalogService
+	modelProviderSecret  string // Encryption secret for provider API keys
+}
+
+func NewProviderService(
+	providerRepo ProviderRepository,
+	providerModelService *ProviderModelService,
+	modelCatalogService *ModelCatalogService,
+) *ProviderService {
+	return &ProviderService{
+		providerRepo:         providerRepo,
+		providerModelService: providerModelService,
+		modelCatalogService:  modelCatalogService,
+	}
+}
+
+type RegisterProviderInput struct {
+	Name     string
+	Vendor   string
+	BaseURL  string
+	APIKey   string
+	Metadata map[string]string
+	Active   bool
+}
+
+type UpdateProviderInput struct {
+	Name     *string
+	BaseURL  *string
+	APIKey   *string
+	Metadata *map[string]string
+	Active   *bool
+}
+
+type UpsertProviderInput struct {
+	Name     string
+	Vendor   string
+	BaseURL  string
+	APIKey   string
+	Metadata map[string]string
+	Active   bool
+}
+
+func (s *ProviderService) RegisterProvider(ctx context.Context, input RegisterProviderInput) (*Provider, error) {
+	name := strings.TrimSpace(input.Name)
+	if name == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "provider name is required", nil, "c86f2bc3-5ea3-41d3-b450-e86adb33352c")
+	}
+
+	baseURL := strings.TrimSpace(input.BaseURL)
+	if baseURL == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "base_url is required", nil, "c80a4867-6c8b-4adb-878d-41fe1b5e96ae")
+	}
+	if _, err := url.ParseRequestURI(baseURL); err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, fmt.Sprintf("invalid base_url format: %v", err), nil, "9e944ba1-c849-4959-957f-cb3de40e2eb1")
+	}
+
+	kind := ProviderKindFromVendor(input.Vendor)
+
+	if kind != ProviderCustom {
+		filter := ProviderFilter{Kind: &kind}
+		count, err := s.providerRepo.Count(ctx, filter)
+		if err != nil {
+			return nil, err
+		}
+		if count > 0 {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeConflict, "provider kind already exists", nil, "ac1dfff6-c184-4572-b613-6f900c36443f")
+		}
+	}
+
+	publicID, err := idgen.GenerateSecureID("prov", 16)
+	if err != nil {
+		return nil, err
+	}
+
+	plainAPIKey := strings.TrimSpace(input.APIKey)
+	apiKeyHint := apiKeyHint(plainAPIKey)
+	var encryptedAPIKey string
+	if plainAPIKey != "" {
+		secret := strings.TrimSpace(config.GetGlobal().ModelProviderSecret)
+		if secret == "" {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeInternal, "model provider secret is not configured", nil, "9fd675bb-1471-4dd4-9160-16df36500595")
+		}
+		cipher, err := crypto.EncryptString(secret, plainAPIKey)
+		if err != nil {
+			return nil, err
+		}
+		encryptedAPIKey = cipher
+	}
+
+	metadata := sanitizeMetadata(input.Metadata)
+	metadata = setDefaultCapabilities(kind, metadata)
+
+	provider := &Provider{
+		PublicID:        publicID,
+		DisplayName:     name,
+		Kind:            kind,
+		BaseURL:         normalizeURL(baseURL),
+		EncryptedAPIKey: encryptedAPIKey,
+		APIKeyHint:      apiKeyHint,
+		IsModerated:     false,
+		Active:          input.Active,
+		Metadata:        metadata,
+	}
+
+	if err := s.providerRepo.Create(ctx, provider); err != nil {
+		return nil, err
+	}
+
+	return provider, nil
+}
+
+func (s *ProviderService) FindProviderByVendor(ctx context.Context, vendor string) (*Provider, error) {
+	kind := ProviderKindFromVendor(vendor)
+	filter := ProviderFilter{Kind: &kind}
+	result, err := s.providerRepo.FindByFilter(ctx, filter, &query.Pagination{Limit: ptr.ToInt(1)})
+	if err != nil {
+		return nil, err
+	}
+	if len(result) == 0 {
+		return nil, nil
+	}
+	return result[0], nil
+}
+
+func ProviderKindFromVendor(vendor string) ProviderKind {
+	switch strings.ToLower(strings.TrimSpace(vendor)) {
+	case "jan":
+		return ProviderJan
+	case "openrouter":
+		return ProviderOpenRouter
+	case "openai":
+		return ProviderOpenAI
+	case "anthropic":
+		return ProviderAnthropic
+	case "gemini", "google", "googleai":
+		return ProviderGoogle
+	case "mistral":
+		return ProviderMistral
+	case "groq":
+		return ProviderGroq
+	case "cohere":
+		return ProviderCohere
+	case "ollama":
+		return ProviderOllama
+	case "replicate":
+		return ProviderReplicate
+	case "azure_openai", "azure-openai":
+		return ProviderAzureOpenAI
+	case "aws_bedrock", "bedrock":
+		return ProviderAWSBedrock
+	case "perplexity":
+		return ProviderPerplexity
+	case "togetherai", "together":
+		return ProviderTogetherAI
+	case "huggingface":
+		return ProviderHuggingFace
+	case "vercel_ai", "vercel-ai", "vercel":
+		return ProviderVercelAI
+	case "deepinfra":
+		return ProviderDeepInfra
+	default:
+		return ProviderCustom
+	}
+}
+
+func apiKeyHint(apiKey string) *string {
+	key := strings.TrimSpace(apiKey)
+	if len(key) < 4 {
+		return nil
+	}
+	hint := key[len(key)-4:]
+	return ptr.ToString(hint)
+}
+
+func (s *ProviderService) GetByID(ctx context.Context, providerId uint) (*Provider, error) {
+	provider, err := s.providerRepo.FindByID(ctx, providerId)
+	if err != nil {
+		return nil, err
+	}
+	return provider, nil
+}
+
+func (s *ProviderService) FindByPublicID(ctx context.Context, publicID string) (*Provider, error) {
+	return s.providerRepo.FindByPublicID(ctx, publicID)
+}
+
+func (s *ProviderService) GetByIDs(ctx context.Context, ids []uint) (map[uint]*Provider, error) {
+	if len(ids) == 0 {
+		return make(map[uint]*Provider), nil
+	}
+
+	providers, err := s.providerRepo.FindByIDs(ctx, ids)
+	if err != nil {
+		return nil, err
+	}
+
+	result := make(map[uint]*Provider, len(providers))
+	for _, provider := range providers {
+		result[provider.ID] = provider
+	}
+
+	return result, nil
+}
+
+func (s *ProviderService) FindAllProviders(ctx context.Context) ([]*Provider, error) {
+	filter := ProviderFilter{}
+	return s.providerRepo.FindByFilter(ctx, filter, nil)
+}
+
+func (s *ProviderService) FindAllActiveProviders(ctx context.Context) ([]*Provider, error) {
+	filter := ProviderFilter{Active: ptr.ToBool(true)}
+	return s.providerRepo.FindByFilter(ctx, filter, nil)
+}
+
+func (s *ProviderService) UpsertProvider(ctx context.Context, input UpsertProviderInput) (*Provider, error) {
+	// Check if provider exists by display name (since Name field doesn't exist in filter)
+	filter := ProviderFilter{}
+	allProviders, err := s.providerRepo.FindByFilter(ctx, filter, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	// Find existing provider by name
+	var existing *Provider
+	for _, p := range allProviders {
+		if p.DisplayName == input.Name {
+			existing = p
+			break
+		}
+	}
+
+	if existing != nil {
+		// Update existing provider
+		updateInput := UpdateProviderInput{
+			BaseURL:  &input.BaseURL,
+			APIKey:   &input.APIKey,
+			Metadata: &input.Metadata,
+			Active:   &input.Active,
+		}
+		return s.UpdateProvider(ctx, existing, updateInput)
+	}
+
+	// Register new provider
+	registerInput := RegisterProviderInput{
+		Name:     input.Name,
+		Vendor:   input.Vendor,
+		BaseURL:  input.BaseURL,
+		APIKey:   input.APIKey,
+		Metadata: input.Metadata,
+		Active:   input.Active,
+	}
+	return s.RegisterProvider(ctx, registerInput)
+}
+
+func (s *ProviderService) UpdateProvider(ctx context.Context, provider *Provider, input UpdateProviderInput) (*Provider, error) {
+	if input.Name != nil {
+		name := strings.TrimSpace(*input.Name)
+		if name == "" {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "provider name is required", nil, "a5df830c-8084-4238-9e17-f44950764ca5")
+		}
+		provider.DisplayName = name
+	}
+	if input.BaseURL != nil {
+		baseURL := strings.TrimSpace(*input.BaseURL)
+		if baseURL == "" {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "base_url is required", nil, "302ffb5e-243f-4112-99ec-e4f9bfbc331a")
+		}
+		if _, err := url.ParseRequestURI(baseURL); err != nil {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, fmt.Sprintf("invalid base_url format: %v", err), nil, "0037a0ec-1342-49e9-8479-cda7db9d1ce8")
+		}
+		provider.BaseURL = normalizeURL(baseURL)
+	}
+	if input.APIKey != nil {
+		key := strings.TrimSpace(*input.APIKey)
+		if key == "" {
+			provider.EncryptedAPIKey = ""
+			provider.APIKeyHint = nil
+		} else {
+			secret := strings.TrimSpace(config.GetGlobal().ModelProviderSecret)
+			if secret == "" {
+				return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeInternal, "model provider secret is not configured", nil, "b31c3083-4a15-4e86-baf9-35fc557cfa0a")
+			}
+			cipher, err := crypto.EncryptString(secret, key)
+			if err != nil {
+				return nil, err
+			}
+			provider.EncryptedAPIKey = cipher
+			provider.APIKeyHint = apiKeyHint(key)
+		}
+	}
+	if input.Metadata != nil {
+		sanitized := sanitizeMetadata(*input.Metadata)
+		// Apply default capabilities for missing keys (don't override user-provided values)
+		provider.Metadata = setDefaultCapabilities(provider.Kind, sanitized)
+	}
+	if input.Active != nil {
+		provider.Active = *input.Active
+	}
+	if err := s.providerRepo.Update(ctx, provider); err != nil {
+		return nil, err
+	}
+
+	return provider, nil
+}
+
+func (s *ProviderService) SyncProviderModelsWithOptions(ctx context.Context, provider *Provider, models []chat.Model, autoEnableNewModels bool) ([]*ProviderModel, error) {
+	results := make([]*ProviderModel, 0, len(models))
+	for _, model := range models {
+		catalog, created, err := s.modelCatalogService.UpsertCatalog(ctx, provider, model)
+		if err != nil {
+			log := logger.GetLogger()
+			log.Error().
+				Str("model_id", model.ID).
+				Str("provider", provider.DisplayName).
+				Err(err).
+				Msgf("failed to upsert catalog for model '%s' from provider '%s'", model.ID, provider.DisplayName)
+			continue
+		}
+		shouldAutoEnable := autoEnableNewModels && created
+		providerModel, err := s.providerModelService.UpsertProviderModelWithOptions(ctx, provider, catalog, model, shouldAutoEnable)
+		if err != nil {
+			log := logger.GetLogger()
+			log.Error().
+				Str("model_id", model.ID).
+				Str("provider", provider.DisplayName).
+				Err(err).
+				Msgf("failed to upsert provider model for '%s' from provider '%s'", model.ID, provider.DisplayName)
+			continue
+		}
+		results = append(results, providerModel)
+	}
+
+	now := time.Now().UTC()
+	provider.LastSyncedAt = &now
+	if err := s.providerRepo.Update(ctx, provider); err != nil {
+		return nil, err
+	}
+
+	return results, nil
+}
+
+func sanitizeMetadata(metadata map[string]string) map[string]string {
+	if len(metadata) == 0 {
+		return nil
+	}
+	result := make(map[string]string, len(metadata))
+	for key, value := range metadata {
+		k := strings.TrimSpace(key)
+		v := strings.TrimSpace(value)
+		if k == "" || v == "" {
+			continue
+		}
+		result[k] = v
+	}
+	if len(result) == 0 {
+		return nil
+	}
+	return result
+}
+
+// setDefaultCapabilities sets default capability metadata based on provider kind
+// if not already present in the metadata map
+func setDefaultCapabilities(kind ProviderKind, metadata map[string]string) map[string]string {
+	if metadata == nil {
+		metadata = make(map[string]string)
+	}
+
+	// Get default capabilities from the loaded defaults (providers_metadata_default.yml)
+	defaults := GetDefaultCapabilities(kind)
+
+	// Set image_input capability if not already configured
+	if _, exists := metadata[MetadataKeyImageInput]; !exists {
+		if imageInputJSON, err := json.Marshal(defaults.ImageInput); err == nil {
+			metadata[MetadataKeyImageInput] = string(imageInputJSON)
+		}
+	}
+
+	// Set file_attachment capability if not already configured
+	if _, exists := metadata[MetadataKeyFileAttachment]; !exists {
+		if fileAttachmentJSON, err := json.Marshal(defaults.FileAttachment); err == nil {
+			metadata[MetadataKeyFileAttachment] = string(fileAttachmentJSON)
+		}
+	}
+
+	return metadata
+}
diff --git a/services/llm-api/internal/domain/principal.go b/services/llm-api/internal/domain/principal.go
new file mode 100644
index 00000000..9a1c61da
--- /dev/null
+++ b/services/llm-api/internal/domain/principal.go
@@ -0,0 +1,34 @@
+package domain
+
+// AuthMethod describes how a caller authenticated with the API.
+type AuthMethod string
+
+const (
+	AuthMethodJWT    AuthMethod = "jwt"
+	AuthMethodAPIKey AuthMethod = "apikey"
+)
+
+// Principal captures normalized caller identity independent of auth mechanism.
+type Principal struct {
+	ID              string
+	AuthMethod      AuthMethod
+	Subject         string
+	Issuer          string
+	AuthorizedParty string
+	Audience        []string
+	Username        string
+	Email           string
+	Name            string
+	Scopes          []string
+	Credentials     map[string]string
+}
+
+// HasScope checks if the principal possesses a scope.
+func (p Principal) HasScope(scope string) bool {
+	for _, s := range p.Scopes {
+		if s == scope {
+			return true
+		}
+	}
+	return false
+}
diff --git a/services/llm-api/internal/domain/project/project.go b/services/llm-api/internal/domain/project/project.go
new file mode 100644
index 00000000..7c8a1969
--- /dev/null
+++ b/services/llm-api/internal/domain/project/project.go
@@ -0,0 +1,72 @@
+package project
+
+import (
+	"context"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/query"
+)
+
+// ===============================================
+// Project Types
+// ===============================================
+
+// Project represents a user's project that groups conversations and inherits instructions
+type Project struct {
+	ID          uint       `json:"-"`
+	PublicID    string     `json:"id"`     // OpenAI-compatible string ID like "proj_abc123"
+	Object      string     `json:"object"` // Always "project" for OpenAI compatibility
+	UserID      uint       `json:"-"`      // Internal user ID
+	Name        string     `json:"name"`
+	Instruction *string    `json:"instruction,omitempty"` // Optional persona/context text
+	Favorite    bool       `json:"favorite"`
+	ArchivedAt  *time.Time `json:"archived_at,omitempty"`
+	DeletedAt   *time.Time `json:"deleted_at,omitempty"`
+	LastUsedAt  *time.Time `json:"last_used_at,omitempty"`
+	CreatedAt   time.Time  `json:"created_at"`
+	UpdatedAt   time.Time  `json:"updated_at"`
+}
+
+// ===============================================
+// Project Repository
+// ===============================================
+
+type ProjectFilter struct {
+	ID       *uint
+	PublicID *string
+	UserID   *uint
+	Archived *bool
+	Search   *string
+}
+
+type ProjectRepository interface {
+	Create(ctx context.Context, project *Project) error
+	GetByPublicID(ctx context.Context, publicID string) (*Project, error)
+	GetByPublicIDAndUserID(ctx context.Context, publicID string, userID uint) (*Project, error)
+	ListByUserID(ctx context.Context, userID uint, pagination *query.Pagination) ([]*Project, int64, error)
+	Update(ctx context.Context, project *Project) error
+	Delete(ctx context.Context, publicID string) error
+}
+
+// ===============================================
+// Project Factory
+// ===============================================
+
+// NewProject creates a new project with the given parameters
+func NewProject(publicID string, userID uint, name string, instruction *string) *Project {
+	now := time.Now()
+
+	return &Project{
+		PublicID:    publicID,
+		Object:      "project",
+		UserID:      userID,
+		Name:        name,
+		Instruction: instruction,
+		Favorite:    false,
+		ArchivedAt:  nil,
+		DeletedAt:   nil,
+		LastUsedAt:  nil,
+		CreatedAt:   now,
+		UpdatedAt:   now,
+	}
+}
diff --git a/services/llm-api/internal/domain/project/project_service.go b/services/llm-api/internal/domain/project/project_service.go
new file mode 100644
index 00000000..9a82f68b
--- /dev/null
+++ b/services/llm-api/internal/domain/project/project_service.go
@@ -0,0 +1,97 @@
+package project
+
+import (
+	"context"
+
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+// ProjectService handles business logic for projects
+type ProjectService struct {
+	repo      ProjectRepository
+	validator *ProjectValidator
+}
+
+// NewProjectService creates a new project service
+func NewProjectService(repo ProjectRepository) *ProjectService {
+	return &ProjectService{
+		repo:      repo,
+		validator: NewProjectValidator(nil), // Use default config
+	}
+}
+
+// ===============================================
+// Core CRUD Operations
+// ===============================================
+
+// CreateProject creates a project (core function - direct repository call)
+func (s *ProjectService) CreateProject(ctx context.Context, proj *Project) (*Project, error) {
+	// Validate project
+	if err := s.validator.ValidateProject(proj); err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "project validation failed", err, "")
+	}
+
+	// Persist project
+	if err := s.repo.Create(ctx, proj); err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to create project")
+	}
+
+	return proj, nil
+}
+
+// GetProjectByPublicIDAndUserID retrieves a project by public ID and validates ownership (core function)
+func (s *ProjectService) GetProjectByPublicIDAndUserID(ctx context.Context, publicID string, userID uint) (*Project, error) {
+	// Validate project ID format
+	if err := s.validator.ValidateProjectID(publicID); err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "invalid project ID", err, "")
+	}
+
+	// Retrieve project
+	proj, err := s.repo.GetByPublicIDAndUserID(ctx, publicID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "project not found")
+	}
+
+	return proj, nil
+}
+
+// UpdateProject updates a project (core function - direct repository call)
+func (s *ProjectService) UpdateProject(ctx context.Context, proj *Project) (*Project, error) {
+	// Validate updated project
+	if err := s.validator.ValidateProject(proj); err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "project validation failed", err, "")
+	}
+
+	// Persist changes
+	if err := s.repo.Update(ctx, proj); err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to update project")
+	}
+
+	return proj, nil
+}
+
+// DeleteProject deletes a project (core function - soft delete)
+func (s *ProjectService) DeleteProject(ctx context.Context, publicID string, userID uint) error {
+	// Verify ownership before deletion
+	_, err := s.GetProjectByPublicIDAndUserID(ctx, publicID, userID)
+	if err != nil {
+		return err
+	}
+
+	if err := s.repo.Delete(ctx, publicID); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to delete project")
+	}
+	return nil
+}
+
+// ListProjectsByUserID retrieves all projects for a user with pagination
+func (s *ProjectService) ListProjectsByUserID(ctx context.Context, userID uint, pagination *query.Pagination) ([]*Project, int64, error) {
+	// Get projects
+	projects, total, err := s.repo.ListByUserID(ctx, userID, pagination)
+	if err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to list projects")
+	}
+
+	return projects, total, nil
+}
diff --git a/services/llm-api/internal/domain/project/project_validation.go b/services/llm-api/internal/domain/project/project_validation.go
new file mode 100644
index 00000000..42349e3a
--- /dev/null
+++ b/services/llm-api/internal/domain/project/project_validation.go
@@ -0,0 +1,161 @@
+package project
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+
+	"jan-server/services/llm-api/internal/utils/idgen"
+)
+
+// ===============================================
+// Project Validation
+// ===============================================
+
+// ProjectValidationConfig holds project-level validation rules
+type ProjectValidationConfig struct {
+	MaxNameLength        int
+	MaxInstructionLength int
+}
+
+// DefaultProjectValidationConfig returns default project validation rules
+func DefaultProjectValidationConfig() *ProjectValidationConfig {
+	return &ProjectValidationConfig{
+		MaxNameLength:        120,
+		MaxInstructionLength: 32768, // 32k chars
+	}
+}
+
+// ProjectValidator handles project-level validation
+type ProjectValidator struct {
+	config             *ProjectValidationConfig
+	invalidCharPattern *regexp.Regexp
+}
+
+// NewProjectValidator creates a validator for projects
+func NewProjectValidator(config *ProjectValidationConfig) *ProjectValidator {
+	if config == nil {
+		config = DefaultProjectValidationConfig()
+	}
+
+	// Pattern to detect control characters (except newline, tab, carriage return)
+	invalidCharPattern := regexp.MustCompile(`[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]`)
+
+	return &ProjectValidator{
+		config:             config,
+		invalidCharPattern: invalidCharPattern,
+	}
+}
+
+// ValidateProject performs full project validation
+func (v *ProjectValidator) ValidateProject(proj *Project) error {
+	if proj == nil {
+		return fmt.Errorf("project cannot be nil")
+	}
+
+	// Validate PublicID format
+	if proj.PublicID != "" {
+		if err := v.ValidateProjectID(proj.PublicID); err != nil {
+			return fmt.Errorf("invalid project ID: %w", err)
+		}
+	}
+
+	// Validate name
+	if err := v.validateName(proj.Name); err != nil {
+		return fmt.Errorf("invalid name: %w", err)
+	}
+
+	// Validate instruction
+	if proj.Instruction != nil {
+		if err := v.validateInstruction(*proj.Instruction); err != nil {
+			return fmt.Errorf("invalid instruction: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// ValidateProjectID validates project ID format
+func (v *ProjectValidator) ValidateProjectID(id string) error {
+	if id == "" {
+		return fmt.Errorf("project ID cannot be empty")
+	}
+
+	// Must start with "proj_" prefix
+	if !strings.HasPrefix(id, "proj_") {
+		return fmt.Errorf("project ID must start with 'proj_' prefix")
+	}
+
+	// Use domain-specific ID validation
+	if !idgen.ValidateIDFormat(id, "proj") {
+		return fmt.Errorf("invalid project ID format")
+	}
+
+	return nil
+}
+
+// validateName validates project name (internal use only)
+func (v *ProjectValidator) validateName(name string) error {
+	// Trim whitespace for validation
+	trimmedName := strings.TrimSpace(name)
+
+	if trimmedName == "" {
+		return fmt.Errorf("name cannot be empty or only whitespace")
+	}
+
+	// Check length
+	if utf8.RuneCountInString(trimmedName) > v.config.MaxNameLength {
+		return fmt.Errorf("name exceeds maximum length of %d characters", v.config.MaxNameLength)
+	}
+
+	// Check for control characters
+	if v.invalidCharPattern.MatchString(trimmedName) {
+		return fmt.Errorf("name contains invalid control characters")
+	}
+
+	// Check for unprintable characters
+	for _, r := range trimmedName {
+		if !unicode.IsPrint(r) && r != '\n' && r != '\t' && r != '\r' {
+			return fmt.Errorf("name contains unprintable characters")
+		}
+	}
+
+	return nil
+}
+
+// validateInstruction validates instruction text (internal use only)
+func (v *ProjectValidator) validateInstruction(instruction string) error {
+	// Trim whitespace for validation
+	trimmedInstruction := strings.TrimSpace(instruction)
+
+	if trimmedInstruction == "" {
+		// Empty instruction is allowed (optional field)
+		return nil
+	}
+
+	// Check length
+	if utf8.RuneCountInString(trimmedInstruction) > v.config.MaxInstructionLength {
+		return fmt.Errorf("instruction exceeds maximum length of %d characters", v.config.MaxInstructionLength)
+	}
+
+	// Check for control characters (except newline, tab, carriage return which are allowed in text)
+	for _, r := range trimmedInstruction {
+		if !unicode.IsPrint(r) && r != '\n' && r != '\t' && r != '\r' {
+			return fmt.Errorf("instruction contains unprintable characters")
+		}
+	}
+
+	return nil
+}
+
+// ValidateProjectName validates project name independently
+func (v *ProjectValidator) ValidateProjectName(name string) error {
+	return v.validateName(name)
+}
+
+// ValidateProjectInstruction validates instruction independently
+func (v *ProjectValidator) ValidateProjectInstruction(instruction string) error {
+	return v.validateInstruction(instruction)
+}
diff --git a/services/llm-api/internal/domain/prompt/README.md b/services/llm-api/internal/domain/prompt/README.md
new file mode 100644
index 00000000..27eca4c9
--- /dev/null
+++ b/services/llm-api/internal/domain/prompt/README.md
@@ -0,0 +1,197 @@
+# Prompt Orchestration Processor
+
+## Overview
+
+The Prompt Orchestration Processor is a pipeline component within the LLM API service that dynamically composes and enhances prompts before they are sent to inference providers. It applies conditional modules based on context, user preferences, and conversation history.
+
+## Architecture
+
+```
+HTTP Request (POST /v1/chat/completions)
+    ↓
+Gin Handler
+    ↓
+Chat Handler
+    ↓
+Prompt Orchestration Processor ← YOU ARE HERE
+    - Check context & user preferences
+    - Apply conditional modules
+    - Assemble final prompts
+    ↓
+Inference Provider Client
+    ↓
+vLLM or Remote Provider
+```
+
+## Features
+
+### Conditional Modules
+
+The processor includes several built-in modules that are automatically applied based on context:
+
+#### 1. **Memory Module** (Optional)
+- **Purpose**: Injects user-specific memory/preferences into prompts
+- **Activation**: Enabled via `PROMPT_ORCHESTRATION_MEMORY=true`
+- **Example**: Adds "User prefers concise answers" to system prompt
+
+#### 2. **Code Assistant Module** (Always Active)
+- **Purpose**: Enhances prompts for code-related questions
+- **Activation**: Automatically detects code keywords (function, implement, debug, etc.)
+- **Adds**: Code formatting guidelines, best practices, error handling tips
+
+#### 3. **Chain-of-Thought Module** (Always Active)
+- **Purpose**: Encourages step-by-step reasoning for complex questions
+- **Activation**: Detects questions with reasoning keywords (why, how, explain, analyze)
+- **Adds**: Instructions to break down problems and think systematically
+
+#### 4. **Tool Instructions Module** (Optional)
+- **Purpose**: Adds instructions for tool usage
+- **Activation**: Enabled via `PROMPT_ORCHESTRATION_TOOLS=true` and user preferences
+- **Adds**: Tool selection and usage guidelines
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `PROMPT_ORCHESTRATION_ENABLED` | `true` | Enable/disable the processor |
+| `PROMPT_ORCHESTRATION_MEMORY` | `false` | Enable memory injection |
+| `PROMPT_ORCHESTRATION_TEMPLATES` | `true` | Enable template-based prompts |
+| `PROMPT_ORCHESTRATION_TOOLS` | `false` | Enable tool usage instructions |
+| `PROMPT_ORCHESTRATION_PERSONA` | `helpful assistant` | Default assistant persona |
+
+### YAML Configuration
+
+In `config/defaults.yaml`:
+
+```yaml
+services:
+  llm_api:
+    prompt_orchestration:
+      enabled: true
+      enable_memory: false
+      enable_templates: true
+      enable_tools: false
+      default_persona: helpful assistant
+```
+
+## Implementation Details
+
+### Package Structure
+
+```
+services/llm-api/internal/domain/prompt/
+├── types.go          # Core interfaces and types
+├── modules.go        # Built-in module implementations
+├── processor.go      # Main processor implementation
+└── processor_test.go # Comprehensive tests
+```
+
+### Module Interface
+
+Each module implements the `Module` interface:
+
+```go
+type Module interface {
+    Name() string
+    ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool
+    Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error)
+}
+```
+
+### Processing Flow
+
+1. **Context Building**: Create a `prompt.Context` with user ID, conversation ID, preferences, and memory
+2. **Module Evaluation**: Each registered module checks if it should apply via `ShouldApply()`
+3. **Module Application**: Applicable modules modify messages via `Apply()`
+4. **Result**: Enhanced messages are passed to the inference provider
+
+## Usage Example
+
+The processor is automatically integrated into the chat completion flow:
+
+```go
+// In ChatHandler.CreateChatCompletion()
+promptCtx := &prompt.Context{
+    UserID:         userID,
+    ConversationID: conversationID,
+    Preferences:    make(map[string]interface{}),
+    Memory:         []string{}, // Load from user memory store
+}
+
+processedMessages, err := h.promptProcessor.Process(ctx, promptCtx, request.Messages)
+if err != nil {
+    // Log and continue with original messages
+    log.Warn().Err(err).Msg("prompt processing failed")
+} else {
+    request.Messages = processedMessages
+}
+```
+
+## Example Transformations
+
+### Before Processing
+```json
+{
+  "messages": [
+    {"role": "user", "content": "How do I implement binary search in Go?"}
+  ]
+}
+```
+
+### After Processing (Code Assistant + Memory modules applied)
+```json
+{
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant.\n\nUse the following personal memory for this user:\n- User prefers detailed code examples\n- User is learning Go\n\nWhen providing code assistance:\n1. Provide clear, well-commented code\n2. Explain your approach and reasoning\n3. Include error handling where appropriate\n4. Follow best practices and conventions\n5. Suggest testing approaches when relevant"
+    },
+    {"role": "user", "content": "How do I implement binary search in Go?"}
+  ]
+}
+```
+
+## Testing
+
+Run the test suite:
+
+```bash
+cd services/llm-api
+go test ./internal/domain/prompt/... -v
+```
+
+Tests cover:
+- Individual module behavior
+- Module conditional logic
+- Processor integration
+- Configuration handling
+
+## Observability
+
+The processor emits OpenTelemetry events:
+- `processing_prompts`: When processing starts
+- `prompts_processed`: When processing completes successfully
+
+Logs include:
+- Applied modules list
+- Processing errors (non-fatal)
+- Conversation and user context
+
+## Future Enhancements
+
+Potential additions to the processor:
+
+1. **Template Library**: Pre-built templates for common tasks (writing, analysis, translation)
+2. **User Memory Store**: Persistent storage for user preferences and memory
+3. **Dynamic Persona**: Adjust assistant personality based on context
+4. **Language Detection**: Automatically adapt to user's language
+5. **Safety Filters**: Add content moderation and safety rules
+6. **A/B Testing**: Compare different prompt strategies
+
+## Related Documentation
+
+- [Prompt Orchestration Design](../../../docs/todo/prompt-orchestration-todo.md)
+- [Data Flow Reference](../../../docs/architecture/data-flow.md)
+- [LLM API Documentation](../../../docs/api/llm-api/README.md)
diff --git a/services/llm-api/internal/domain/prompt/modules.go b/services/llm-api/internal/domain/prompt/modules.go
new file mode 100644
index 00000000..b44a1baf
--- /dev/null
+++ b/services/llm-api/internal/domain/prompt/modules.go
@@ -0,0 +1,810 @@
+package prompt
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	openai "github.com/sashabaranov/go-openai"
+
+	"jan-server/services/llm-api/internal/domain/usersettings"
+)
+
+const (
+	moduleMarkerFormat           = "[[prompt-module:%s]]"
+	projectInstructionModuleName = "project_instruction"
+	userProfileModuleName        = "user_profile"
+)
+
+func moduleMarker(name string) string {
+	return fmt.Sprintf(moduleMarkerFormat, strings.ToLower(name))
+}
+
+func hasMarker(content, marker string) bool {
+	return strings.Contains(strings.ToLower(content), strings.ToLower(marker))
+}
+
+func cloneMessage(msg openai.ChatCompletionMessage) openai.ChatCompletionMessage {
+	clone := msg
+
+	if len(msg.MultiContent) > 0 {
+		clone.MultiContent = make([]openai.ChatMessagePart, len(msg.MultiContent))
+		for i, part := range msg.MultiContent {
+			clone.MultiContent[i] = part
+			if part.ImageURL != nil {
+				img := *part.ImageURL
+				clone.MultiContent[i].ImageURL = &img
+			}
+		}
+	}
+
+	if len(msg.ToolCalls) > 0 {
+		clone.ToolCalls = make([]openai.ToolCall, len(msg.ToolCalls))
+		copy(clone.ToolCalls, msg.ToolCalls)
+	}
+
+	if msg.FunctionCall != nil {
+		fn := *msg.FunctionCall
+		clone.FunctionCall = &fn
+	}
+
+	return clone
+}
+
+// prependInstructionSystemMessage returns a copy of messages with the instruction system
+// message prepended. A marker is appended to avoid duplicate injections.
+func prependInstructionSystemMessage(messages []openai.ChatCompletionMessage, instruction, moduleName string) []openai.ChatCompletionMessage {
+	trimmed := strings.TrimSpace(instruction)
+	if trimmed == "" {
+		return messages
+	}
+
+	// If we've already injected this module, do nothing.
+	if hasModuleMarker(messages, moduleName) {
+		return messages
+	}
+
+	marker := moduleMarker(moduleName)
+
+	var builder strings.Builder
+	builder.WriteString(trimmed)
+
+	// Special handling for project instructions: explicitly state priority
+	if moduleName == projectInstructionModuleName {
+		builder.WriteString("\n\n")
+		builder.WriteString("Project priority: These project-specific instructions have the highest priority. ")
+		builder.WriteString("If any user settings, persona, style preferences, or other guidance conflict with these project instructions, ")
+		builder.WriteString("you must follow the project instructions.")
+	}
+
+	builder.WriteString("\n")
+	builder.WriteString(marker)
+
+	result := make([]openai.ChatCompletionMessage, 0, len(messages)+1)
+	result = append(result, openai.ChatCompletionMessage{
+		Role:    openai.ChatMessageRoleSystem,
+		Content: builder.String(),
+	})
+
+	for _, msg := range messages {
+		result = append(result, cloneMessage(msg))
+	}
+
+	return result
+}
+
+// PrependProjectInstruction injects the project instruction as the first system message.
+func PrependProjectInstruction(messages []openai.ChatCompletionMessage, instruction string) []openai.ChatCompletionMessage {
+	return prependInstructionSystemMessage(messages, instruction, projectInstructionModuleName)
+}
+
+// appendSystemContent attaches "additional" instructions into a suitable system message
+// or creates a new system message if needed.
+//
+// Rules:
+//   - If the module marker already exists anywhere, we do nothing.
+//   - We NEVER modify the project_instruction system message for lower-priority modules.
+//   - When we create a new system message, we insert it right after the project_instruction
+//     message (if present) so project instructions stay visually/topologically first.
+//   - defaultPersona is the full persona sentence (if non-empty), not just a label.
+func appendSystemContent(
+	messages []openai.ChatCompletionMessage,
+	additional, moduleName, defaultPersona string,
+) []openai.ChatCompletionMessage {
+	additional = strings.TrimSpace(additional)
+	marker := moduleMarker(moduleName)
+	projectMarker := moduleMarker(projectInstructionModuleName)
+
+	// If this module was already injected, don't inject it again.
+	if hasModuleMarker(messages, moduleName) {
+		return messages
+	}
+
+	result := make([]openai.ChatCompletionMessage, 0, len(messages)+1)
+	applied := false
+
+	for _, m := range messages {
+		msg := cloneMessage(m)
+		if msg.Role == openai.ChatMessageRoleSystem && !applied {
+			// Keep project instructions clean and highest priority:
+			// do not append persona/user settings/etc. onto that message.
+			if hasMarker(msg.Content, projectMarker) && moduleName != projectInstructionModuleName {
+				// Leave msg as-is.
+			} else if additional != "" && !hasMarker(msg.Content, marker) {
+				var b strings.Builder
+				b.WriteString(strings.TrimSpace(msg.Content))
+				b.WriteString("\n\n")
+				b.WriteString(additional)
+				b.WriteString("\n")
+				b.WriteString(marker)
+				msg.Content = b.String()
+				applied = true
+			}
+		}
+		result = append(result, msg)
+	}
+
+	// If we successfully appended to an existing system message, we're done.
+	if applied {
+		return result
+	}
+
+	// If we have nothing to say and no persona, just return.
+	if additional == "" && strings.TrimSpace(defaultPersona) == "" {
+		return result
+	}
+
+	// Create a new system message with persona + additional content.
+	var builder strings.Builder
+	personaText := strings.TrimSpace(defaultPersona)
+	if personaText == "" {
+		personaText = "You are a helpful assistant."
+	}
+	builder.WriteString(personaText)
+
+	if additional != "" {
+		builder.WriteString("\n\n")
+		builder.WriteString(additional)
+	}
+
+	builder.WriteString("\n")
+	builder.WriteString(marker)
+
+	systemMsg := openai.ChatCompletionMessage{
+		Role:    openai.ChatMessageRoleSystem,
+		Content: builder.String(),
+	}
+
+	// Insert AFTER project_instruction system message if it exists at the front,
+	// so project instructions are clearly top priority.
+	insertIdx := 0
+	if len(result) > 0 {
+		first := result[0]
+		if first.Role == openai.ChatMessageRoleSystem && hasMarker(first.Content, projectMarker) {
+			insertIdx = 1
+		}
+	}
+
+	// Insert at insertIdx
+	result = append(result, openai.ChatCompletionMessage{}) // grow slice
+	copy(result[insertIdx+1:], result[insertIdx:])
+	result[insertIdx] = systemMsg
+
+	return result
+}
+
+func hasModuleMarker(messages []openai.ChatCompletionMessage, moduleName string) bool {
+	marker := moduleMarker(moduleName)
+	for _, msg := range messages {
+		if msg.Role == openai.ChatMessageRoleSystem && hasMarker(msg.Content, marker) {
+			return true
+		}
+	}
+	return false
+}
+
+func personaFromPreferences(preferences map[string]interface{}) string {
+	if preferences == nil {
+		return ""
+	}
+	if persona, ok := preferences["persona"]; ok {
+		switch val := persona.(type) {
+		case string:
+			return strings.TrimSpace(val)
+		case []byte:
+			return strings.TrimSpace(string(val))
+		default:
+			return strings.TrimSpace(fmt.Sprint(val))
+		}
+	}
+	return ""
+}
+
+func disabledModules(preferences map[string]interface{}) map[string]struct{} {
+	disabled := map[string]struct{}{}
+	if preferences == nil {
+		return disabled
+	}
+	raw, ok := preferences["disable_modules"]
+	if !ok {
+		return disabled
+	}
+	switch v := raw.(type) {
+	case string:
+		for _, part := range strings.Split(v, ",") {
+			if trimmed := strings.ToLower(strings.TrimSpace(part)); trimmed != "" {
+				disabled[trimmed] = struct{}{}
+			}
+		}
+	case []string:
+		for _, part := range v {
+			if trimmed := strings.ToLower(strings.TrimSpace(part)); trimmed != "" {
+				disabled[trimmed] = struct{}{}
+			}
+		}
+	case []interface{}:
+		for _, part := range v {
+			if str, ok := part.(string); ok {
+				if trimmed := strings.ToLower(strings.TrimSpace(str)); trimmed != "" {
+					disabled[trimmed] = struct{}{}
+				}
+			}
+		}
+	}
+	return disabled
+}
+
+func isModuleDisabled(preferences map[string]interface{}, moduleName string) bool {
+	disabled := disabledModules(preferences)
+	_, found := disabled[strings.ToLower(moduleName)]
+	return found
+}
+
+// ProjectInstructionModule injects project-specific instructions at the start of the conversation.
+type ProjectInstructionModule struct{}
+
+// NewProjectInstructionModule creates a new project instruction module.
+func NewProjectInstructionModule() *ProjectInstructionModule {
+	return &ProjectInstructionModule{}
+}
+
+// Name returns the module identifier.
+func (m *ProjectInstructionModule) Name() string {
+	return projectInstructionModuleName
+}
+
+// ShouldApply determines if project instructions should be injected.
+func (m *ProjectInstructionModule) ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool {
+	if ctx == nil || ctx.Err() != nil {
+		return false
+	}
+	if promptCtx == nil {
+		return false
+	}
+	if promptCtx.Preferences != nil && isModuleDisabled(promptCtx.Preferences, m.Name()) {
+		return false
+	}
+	return strings.TrimSpace(promptCtx.ProjectInstruction) != ""
+}
+
+// Apply prepends the project instruction as a system message.
+func (m *ProjectInstructionModule) Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error) {
+	if ctx != nil {
+		if err := ctx.Err(); err != nil {
+			return messages, err
+		}
+	}
+	if promptCtx == nil || strings.TrimSpace(promptCtx.ProjectInstruction) == "" {
+		return messages, nil
+	}
+
+	return PrependProjectInstruction(messages, promptCtx.ProjectInstruction), nil
+}
+
+// PersonaModule ensures a consistent system prompt/persona is applied.
+type PersonaModule struct {
+	defaultPersona string
+}
+
+// NewPersonaModule creates a new persona module.
+func NewPersonaModule(defaultPersona string) *PersonaModule {
+	return &PersonaModule{defaultPersona: strings.TrimSpace(defaultPersona)}
+}
+
+// Name returns the module identifier.
+func (m *PersonaModule) Name() string {
+	return "persona"
+}
+
+// resolvePersona picks persona from user preferences or default.
+func (m *PersonaModule) resolvePersona(promptCtx *Context) string {
+	if promptCtx != nil {
+		if persona := personaFromPreferences(promptCtx.Preferences); persona != "" {
+			return persona
+		}
+	}
+	if m.defaultPersona != "" {
+		return m.defaultPersona
+	}
+	return "helpful assistant"
+}
+
+// ShouldApply applies when a persona is available and module not disabled.
+func (m *PersonaModule) ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool {
+	if ctx == nil || ctx.Err() != nil {
+		return false
+	}
+	if promptCtx == nil {
+		return false
+	}
+	if promptCtx.Preferences != nil && isModuleDisabled(promptCtx.Preferences, m.Name()) {
+		return false
+	}
+	persona := strings.TrimSpace(m.resolvePersona(promptCtx))
+	return persona != ""
+}
+
+// Apply injects or prefixes the system prompt with persona instructions.
+func (m *PersonaModule) Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error) {
+	if ctx != nil {
+		if err := ctx.Err(); err != nil {
+			return messages, err
+		}
+	}
+	if promptCtx == nil {
+		return messages, nil
+	}
+
+	personaDesc := strings.TrimSpace(m.resolvePersona(promptCtx))
+	if personaDesc == "" {
+		return messages, nil
+	}
+
+	// Full persona sentence. Note that project/system instructions still win.
+	personaText := fmt.Sprintf(
+		"You are a %s. Use this persona for tone and behavior, but never override explicit system or project instructions. "+
+			"If any user settings or persona preferences conflict with project instructions, always follow the project instructions.",
+		personaDesc,
+	)
+
+	result := appendSystemContent(messages, "", m.Name(), personaText)
+	return result, nil
+}
+
+// UserProfileModule injects user profile personalization into the system prompt.
+type UserProfileModule struct{}
+
+// NewUserProfileModule creates a new user profile module.
+func NewUserProfileModule() *UserProfileModule {
+	return &UserProfileModule{}
+}
+
+// Name returns the module identifier.
+func (m *UserProfileModule) Name() string {
+	return userProfileModuleName
+}
+
+// ShouldApply determines if user profile information should be injected.
+func (m *UserProfileModule) ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool {
+	if ctx == nil || ctx.Err() != nil {
+		return false
+	}
+	if promptCtx == nil || promptCtx.Profile == nil {
+		return false
+	}
+	if promptCtx.Preferences != nil && isModuleDisabled(promptCtx.Preferences, m.Name()) {
+		return false
+	}
+
+	profile := promptCtx.Profile
+
+	// Apply when any personalization field is present (base style defaults to Friendly so non-empty).
+	return profile.BaseStyle != "" ||
+		strings.TrimSpace(profile.CustomInstructions) != "" ||
+		strings.TrimSpace(profile.NickName) != "" ||
+		strings.TrimSpace(profile.Occupation) != "" ||
+		strings.TrimSpace(profile.MoreAboutYou) != ""
+}
+
+func baseStyleInstruction(style usersettings.BaseStyle) string {
+	switch style {
+	case usersettings.BaseStyleConcise:
+		return "Use a concise style: brief, direct answers with minimal filler."
+	case usersettings.BaseStyleFriendly:
+		return "Use a friendly, warm, and encouraging tone while staying helpful."
+	case usersettings.BaseStyleProfessional:
+		return "Use a professional, clear, and structured tone appropriate for business settings."
+	default:
+		if strings.TrimSpace(string(style)) != "" {
+			return fmt.Sprintf("Use the user's preferred style: %s.", style)
+		}
+		return ""
+	}
+}
+
+// Apply injects user profile guidance and persona instructions.
+func (m *UserProfileModule) Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error) {
+	if ctx != nil {
+		if err := ctx.Err(); err != nil {
+			return messages, err
+		}
+	}
+	if promptCtx == nil || promptCtx.Profile == nil {
+		return messages, nil
+	}
+
+	profile := promptCtx.Profile
+	var sections []string
+
+	// General note about precedence vs project instructions.
+	sections = append(sections,
+		"User-level settings are preferences for style and context. "+
+			"If they ever conflict with explicit project or system instructions, always follow the project or system instructions.")
+
+	if styleText := baseStyleInstruction(profile.BaseStyle); styleText != "" {
+		sections = append(sections, styleText)
+	}
+
+	if custom := strings.TrimSpace(profile.CustomInstructions); custom != "" {
+		sections = append(sections, fmt.Sprintf("Custom instructions from the user:\n%s", custom))
+	}
+
+	var details []string
+	if nick := strings.TrimSpace(profile.NickName); nick != "" {
+		details = append(details, fmt.Sprintf("Address the user as \"%s\".", nick))
+	}
+	if occupation := strings.TrimSpace(profile.Occupation); occupation != "" {
+		details = append(details, fmt.Sprintf("Occupation: %s.", occupation))
+	}
+	if more := strings.TrimSpace(profile.MoreAboutYou); more != "" {
+		details = append(details, fmt.Sprintf("About the user: %s.", more))
+	}
+	if len(details) > 0 {
+		var builder strings.Builder
+		builder.WriteString("User context:\n")
+		for _, detail := range details {
+			builder.WriteString("- ")
+			builder.WriteString(detail)
+			builder.WriteString("\n")
+		}
+		sections = append(sections, strings.TrimSpace(builder.String()))
+	}
+
+	instruction := strings.TrimSpace(strings.Join(sections, "\n\n"))
+	if instruction == "" {
+		return messages, nil
+	}
+
+	result := appendSystemContent(messages, instruction, m.Name(), "")
+	return result, nil
+}
+
+// WithDisabledModules returns a shallow copy of Context with module disable list merged.
+func WithDisabledModules(ctx *Context, disable []string) *Context {
+	if ctx == nil {
+		return &Context{
+			Preferences: map[string]interface{}{
+				"disable_modules": disable,
+			},
+		}
+	}
+	prefs := ctx.Preferences
+	if prefs == nil {
+		prefs = map[string]interface{}{}
+	}
+	prefs["disable_modules"] = disable
+	ctx.Preferences = prefs
+	return ctx
+}
+
+// MemoryModule adds user memory to system prompts.
+type MemoryModule struct {
+	enabled bool
+}
+
+// NewMemoryModule creates a new memory module.
+func NewMemoryModule(enabled bool) *MemoryModule {
+	return &MemoryModule{enabled: enabled}
+}
+
+// Name returns the module identifier.
+func (m *MemoryModule) Name() string {
+	return "memory"
+}
+
+// ShouldApply checks if memory should be included.
+func (m *MemoryModule) ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool {
+	if ctx == nil || ctx.Err() != nil {
+		return false
+	}
+	if !m.enabled || promptCtx == nil {
+		return false
+	}
+	if promptCtx.Preferences != nil && isModuleDisabled(promptCtx.Preferences, m.Name()) {
+		return false
+	}
+	return len(promptCtx.Memory) > 0
+}
+
+// Apply adds memory to the system prompt.
+func (m *MemoryModule) Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error) {
+	if ctx != nil {
+		if err := ctx.Err(); err != nil {
+			return messages, err
+		}
+	}
+	if promptCtx == nil || len(promptCtx.Memory) == 0 {
+		return messages, nil
+	}
+
+	var builder strings.Builder
+	builder.WriteString("Use the following personal memory for this user when helpful, without overriding project or system instructions:\n")
+	for _, item := range promptCtx.Memory {
+		builder.WriteString("- ")
+		builder.WriteString(item)
+		builder.WriteString("\n")
+	}
+
+	result := appendSystemContent(messages, strings.TrimSpace(builder.String()), m.Name(), "")
+	return result, nil
+}
+
+// ToolInstructionsModule adds tool usage instructions.
+type ToolInstructionsModule struct {
+	enabled bool
+}
+
+// NewToolInstructionsModule creates a new tool instructions module.
+func NewToolInstructionsModule(enabled bool) *ToolInstructionsModule {
+	return &ToolInstructionsModule{enabled: enabled}
+}
+
+// Name returns the module identifier.
+func (m *ToolInstructionsModule) Name() string {
+	return "tool_instructions"
+}
+
+// ShouldApply checks if tool instructions should be added.
+func (m *ToolInstructionsModule) ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool {
+	if ctx == nil || ctx.Err() != nil {
+		return false
+	}
+	if !m.enabled {
+		return false
+	}
+	if promptCtx == nil {
+		return false
+	}
+	if promptCtx.Preferences != nil && isModuleDisabled(promptCtx.Preferences, m.Name()) {
+		return false
+	}
+
+	return detectToolUsage(promptCtx, messages)
+}
+
+// Apply adds tool instructions to the system prompt.
+func (m *ToolInstructionsModule) Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error) {
+	if ctx != nil {
+		if err := ctx.Err(); err != nil {
+			return messages, err
+		}
+	}
+	if promptCtx == nil || !detectToolUsage(promptCtx, messages) {
+		return messages, nil
+	}
+
+	var builder strings.Builder
+	builder.WriteString("You have access to various tools. Always choose the best tool for the task.\n")
+	builder.WriteString("When you need to search for information, use web search. When you need to execute code, use the code execution tool.\n")
+	builder.WriteString("Tool usage must respect project instructions and system-level constraints at all times.")
+
+	if promptCtx.Preferences != nil {
+		if desc, ok := promptCtx.Preferences["tool_descriptions"].(string); ok && strings.TrimSpace(desc) != "" {
+			builder.WriteString("\nAvailable tools: ")
+			builder.WriteString(strings.TrimSpace(desc))
+		}
+		if list, ok := promptCtx.Preferences["tool_descriptions"].([]string); ok && len(list) > 0 {
+			builder.WriteString("\nAvailable tools:\n")
+			for _, item := range list {
+				builder.WriteString("- ")
+				builder.WriteString(strings.TrimSpace(item))
+				builder.WriteString("\n")
+			}
+		}
+	}
+
+	result := appendSystemContent(messages, strings.TrimSpace(builder.String()), m.Name(), "")
+	return result, nil
+}
+
+// CodeAssistantModule adds code-specific instructions.
+type CodeAssistantModule struct{}
+
+// NewCodeAssistantModule creates a new code assistant module.
+func NewCodeAssistantModule() *CodeAssistantModule {
+	return &CodeAssistantModule{}
+}
+
+// Name returns the module identifier.
+func (m *CodeAssistantModule) Name() string {
+	return "code_assistant"
+}
+
+// ShouldApply checks if the question is code-related.
+func (m *CodeAssistantModule) ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool {
+	if ctx == nil || ctx.Err() != nil {
+		return false
+	}
+	if promptCtx != nil && promptCtx.Preferences != nil && isModuleDisabled(promptCtx.Preferences, m.Name()) {
+		return false
+	}
+	// Check last user message for code-related keywords.
+	for i := len(messages) - 1; i >= 0; i-- {
+		if messages[i].Role == openai.ChatMessageRoleUser {
+			content := strings.ToLower(messages[i].Content)
+			if isLikelyCodeQuery(content) {
+				return true
+			}
+			break
+		}
+	}
+	return false
+}
+
+// Apply adds code assistant instructions.
+func (m *CodeAssistantModule) Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error) {
+	if ctx != nil {
+		if err := ctx.Err(); err != nil {
+			return messages, err
+		}
+	}
+	if hasModuleMarker(messages, m.Name()) {
+		return messages, nil
+	}
+
+	var builder strings.Builder
+	builder.WriteString("When providing code assistance:\n")
+	builder.WriteString("1. Provide clear, well-commented code.\n")
+	builder.WriteString("2. Explain your approach and reasoning.\n")
+	builder.WriteString("3. Include error handling where appropriate.\n")
+	builder.WriteString("4. Follow best practices and conventions.\n")
+	builder.WriteString("5. Suggest testing approaches when relevant.\n")
+	builder.WriteString("6. Respect project instructions and user constraints; never violate them to simplify code.")
+
+	result := appendSystemContent(messages, builder.String(), m.Name(), "")
+	return result, nil
+}
+
+// ChainOfThoughtModule adds chain-of-thought reasoning instructions.
+type ChainOfThoughtModule struct{}
+
+// NewChainOfThoughtModule creates a new chain-of-thought module.
+func NewChainOfThoughtModule() *ChainOfThoughtModule {
+	return &ChainOfThoughtModule{}
+}
+
+// Name returns the module identifier.
+func (m *ChainOfThoughtModule) Name() string {
+	return "chain_of_thought"
+}
+
+// ShouldApply checks if the question requires reasoning.
+func (m *ChainOfThoughtModule) ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool {
+	if ctx == nil || ctx.Err() != nil {
+		return false
+	}
+	if promptCtx != nil && promptCtx.Preferences != nil && isModuleDisabled(promptCtx.Preferences, m.Name()) {
+		return false
+	}
+	// Apply for complex questions
+	for i := len(messages) - 1; i >= 0; i-- {
+		if messages[i].Role == openai.ChatMessageRoleUser {
+			content := messages[i].Content
+			if isComplexQuestion(content) {
+				return true
+			}
+			break
+		}
+	}
+	return false
+}
+
+// Apply adds chain-of-thought instructions.
+func (m *ChainOfThoughtModule) Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error) {
+	if ctx != nil {
+		if err := ctx.Err(); err != nil {
+			return messages, err
+		}
+	}
+	if hasModuleMarker(messages, m.Name()) {
+		return messages, nil
+	}
+
+	var builder strings.Builder
+	builder.WriteString("For complex questions, think step-by-step:\n")
+	builder.WriteString("1. Break down the problem\n")
+	builder.WriteString("2. Analyze each component\n")
+	builder.WriteString("3. Consider different perspectives\n")
+	builder.WriteString("4. Synthesize your conclusion\n")
+	builder.WriteString("5. Provide a clear, structured answer")
+
+	result := appendSystemContent(messages, builder.String(), m.Name(), "")
+	return result, nil
+}
+
+func detectToolUsage(promptCtx *Context, messages []openai.ChatCompletionMessage) bool {
+	if promptCtx != nil && promptCtx.Preferences != nil {
+		if useTools, ok := promptCtx.Preferences["use_tools"].(bool); ok && useTools {
+			return true
+		}
+	}
+
+	for i := len(messages) - 1; i >= 0; i-- {
+		if messages[i].Role == openai.ChatMessageRoleTool {
+			return true
+		}
+		if len(messages[i].ToolCalls) > 0 || messages[i].FunctionCall != nil {
+			return true
+		}
+	}
+	return false
+}
+
+func isLikelyCodeQuery(content string) bool {
+	if content == "" {
+		return false
+	}
+	if strings.Contains(content, "```") {
+		return true
+	}
+	strongSignals := []string{"func ", "function(", "class ", "package ", "import ", "console.log", "panic(", "error ", "exception", "stack trace", "traceback", "sql", "json", "yaml", "schema"}
+	for _, sig := range strongSignals {
+		if strings.Contains(content, sig) {
+			return true
+		}
+	}
+
+	if strings.Contains(content, "code of conduct") {
+		return false
+	}
+
+	codeKeywords := []string{"code", "function", "implement", "debug", "bug", "syntax", "compile", "script", "api", "snippet", "library"}
+	actionKeywords := []string{"write", "example", "implement", "show", "fix", "break down", "refactor", "debug", "troubleshoot"}
+	keywordHit := false
+	for _, keyword := range codeKeywords {
+		if strings.Contains(content, keyword) {
+			keywordHit = true
+			break
+		}
+	}
+	actionHit := false
+	for _, act := range actionKeywords {
+		if strings.Contains(content, act) {
+			actionHit = true
+			break
+		}
+	}
+	return keywordHit && actionHit
+}
+
+func isComplexQuestion(content string) bool {
+	if strings.TrimSpace(content) == "" {
+		return false
+	}
+	lower := strings.ToLower(content)
+	reasoningKeywords := []string{"why", "how", "explain", "analyze", "compare", "evaluate", "what if", "step by step"}
+	for _, keyword := range reasoningKeywords {
+		if strings.Contains(lower, keyword) {
+			return true
+		}
+	}
+
+	wordCount := len(strings.Fields(content))
+	if wordCount >= 20 && strings.Contains(content, "?") {
+		return true
+	}
+	if wordCount >= 30 {
+		return true
+	}
+	return false
+}
diff --git a/services/llm-api/internal/domain/prompt/processor.go b/services/llm-api/internal/domain/prompt/processor.go
new file mode 100644
index 00000000..c321297b
--- /dev/null
+++ b/services/llm-api/internal/domain/prompt/processor.go
@@ -0,0 +1,166 @@
+package prompt
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strings"
+
+	"github.com/rs/zerolog"
+	openai "github.com/sashabaranov/go-openai"
+)
+
+// ProcessorImpl implements the Processor interface
+type ProcessorImpl struct {
+	config  ProcessorConfig
+	modules []moduleEntry
+	log     zerolog.Logger
+}
+
+type moduleEntry struct {
+	module   Module
+	priority int
+}
+
+func modulePriority(module Module) int {
+	switch module.(type) {
+	case *ProjectInstructionModule:
+		return -10
+	case *PersonaModule:
+		return 0
+	case *UserProfileModule:
+		return 5
+	case *MemoryModule:
+		return 10
+	case *ToolInstructionsModule:
+		return 20
+	case *CodeAssistantModule:
+		return 30
+	case *ChainOfThoughtModule:
+		return 40
+	default:
+		return 100
+	}
+}
+
+// NewProcessor creates a new prompt processor with the given configuration
+// If disabled, a no-op processor is returned.
+func NewProcessor(config ProcessorConfig, log zerolog.Logger) *ProcessorImpl {
+	processor := &ProcessorImpl{
+		config:  config,
+		modules: make([]moduleEntry, 0),
+		log:     log.With().Str("component", "prompt-processor").Logger(),
+	}
+
+	if !config.Enabled {
+		processor.log = processor.log.With().Str("mode", "noop").Logger()
+		return processor
+	}
+
+	processor.RegisterModule(NewProjectInstructionModule())
+
+	// Always ensure a base persona/system prompt exists when a default is provided
+	if strings.TrimSpace(config.DefaultPersona) != "" {
+		processor.RegisterModule(NewPersonaModule(config.DefaultPersona))
+	}
+
+	processor.RegisterModule(NewUserProfileModule())
+
+	// Register modules based on configuration
+	if config.EnableMemory {
+		processor.RegisterModule(NewMemoryModule(true))
+	}
+
+	if config.EnableTools {
+		processor.RegisterModule(NewToolInstructionsModule(true))
+	}
+
+	// Conditional template-based modules (CoT, code assistant)
+	if config.EnableTemplates {
+		processor.RegisterModule(NewCodeAssistantModule())
+		processor.RegisterModule(NewChainOfThoughtModule())
+	}
+
+	return processor
+}
+
+// RegisterModule adds a module to the processor
+func (p *ProcessorImpl) RegisterModule(module Module) {
+	entry := moduleEntry{
+		module:   module,
+		priority: modulePriority(module),
+	}
+	p.modules = append(p.modules, entry)
+	sort.Slice(p.modules, func(i, j int) bool {
+		return p.modules[i].priority < p.modules[j].priority
+	})
+	p.log.Debug().Str("module", module.Name()).Int("priority", entry.priority).Msg("registered prompt module")
+}
+
+// Process applies all relevant modules to the messages
+func (p *ProcessorImpl) Process(
+	ctx context.Context,
+	promptCtx *Context,
+	messages []openai.ChatCompletionMessage,
+) ([]openai.ChatCompletionMessage, error) {
+	if ctx != nil && ctx.Err() != nil {
+		return messages, ctx.Err()
+	}
+	if promptCtx == nil {
+		promptCtx = &Context{}
+	}
+	if !p.config.Enabled {
+		return messages, nil
+	}
+	if len(messages) == 0 {
+		return messages, nil
+	}
+
+	result := messages
+	appliedModules := make([]string, 0, len(p.modules))
+
+	for idx, entry := range p.modules {
+		if ctx != nil && ctx.Err() != nil {
+			p.log.Warn().Err(ctx.Err()).Msg("context cancelled during prompt processing")
+			return result, ctx.Err()
+		}
+
+		if isModuleDisabled(promptCtx.Preferences, entry.module.Name()) {
+			p.log.Debug().
+				Str("module", entry.module.Name()).
+				Str("conversation_id", promptCtx.ConversationID).
+				Msg("prompt module disabled via preferences")
+			continue
+		}
+
+		if entry.module.ShouldApply(ctx, promptCtx, result) {
+			before := result
+			var err error
+			result, err = entry.module.Apply(ctx, promptCtx, result)
+			if err != nil {
+				p.log.Error().
+					Err(err).
+					Str("module", entry.module.Name()).
+					Str("position", fmt.Sprintf("%d/%d", idx+1, len(p.modules))).
+					Msg("failed to apply prompt module")
+				return before, err
+			}
+			if result == nil {
+				return before, fmt.Errorf("module %s returned nil messages", entry.module.Name())
+			}
+			appliedModules = append(appliedModules, entry.module.Name())
+		}
+	}
+
+	if len(appliedModules) > 0 {
+		promptCtx.AppliedModules = append([]string(nil), appliedModules...)
+		p.log.Debug().
+			Strs("applied_modules", appliedModules).
+			Str("conversation_id", promptCtx.ConversationID).
+			Msg("applied prompt orchestration modules")
+	} else {
+		promptCtx.AppliedModules = nil
+	}
+
+	return result, nil
+}
diff --git a/services/llm-api/internal/domain/prompt/types.go b/services/llm-api/internal/domain/prompt/types.go
new file mode 100644
index 00000000..7b2c5a43
--- /dev/null
+++ b/services/llm-api/internal/domain/prompt/types.go
@@ -0,0 +1,48 @@
+package prompt
+
+import (
+	"context"
+
+	openai "github.com/sashabaranov/go-openai"
+
+	"jan-server/services/llm-api/internal/domain/usersettings"
+)
+
+// ProcessorConfig contains configuration for the prompt orchestration processor
+type ProcessorConfig struct {
+	Enabled         bool
+	EnableMemory    bool
+	EnableTemplates bool
+	EnableTools     bool
+	DefaultPersona  string
+}
+
+// Context contains contextual information for prompt processing
+type Context struct {
+	UserID             uint
+	ConversationID     string
+	Language           string
+	Preferences        map[string]interface{}
+	Memory             []string
+	ProjectInstruction string
+	AppliedModules     []string
+	Profile            *usersettings.ProfileSettings
+}
+
+// Module represents a prompt module that can be applied
+type Module interface {
+	// Name returns the module identifier
+	Name() string
+
+	// ShouldApply determines if this module should be applied based on context
+	ShouldApply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) bool
+
+	// Apply modifies the messages array by adding or modifying prompts
+	Apply(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error)
+}
+
+// Processor orchestrates prompt composition by applying conditional modules
+type Processor interface {
+	// Process takes a request and applies all relevant modules
+	Process(ctx context.Context, promptCtx *Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, error)
+}
diff --git a/services/llm-api/internal/domain/provider.go b/services/llm-api/internal/domain/provider.go
new file mode 100644
index 00000000..4ce6c5f5
--- /dev/null
+++ b/services/llm-api/internal/domain/provider.go
@@ -0,0 +1,62 @@
+package domain
+
+import (
+	"github.com/google/wire"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/domain/apikey"
+	"jan-server/services/llm-api/internal/domain/conversation"
+	"jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/domain/project"
+	"jan-server/services/llm-api/internal/domain/prompt"
+	"jan-server/services/llm-api/internal/domain/user"
+	"jan-server/services/llm-api/internal/domain/usersettings"
+)
+
+// ServiceProvider provides all domain services
+var ServiceProvider = wire.NewSet(
+	// Conversation domain
+	conversation.NewConversationService,
+
+	// Project domain
+	project.NewProjectService,
+
+	// Model domain
+	model.NewProviderModelService,
+	model.NewModelCatalogService,
+	model.NewProviderService,
+
+	// User domain
+	user.NewService,
+
+	// User settings
+	usersettings.NewService,
+
+	// API keys
+	ProvideAPIKeyConfig,
+	apikey.NewService,
+
+	// Prompt orchestration
+	ProvidePromptProcessorConfig,
+	prompt.NewProcessor,
+)
+
+func ProvideAPIKeyConfig(cfg *config.Config) apikey.Config {
+	return apikey.Config{
+		DefaultTTL: cfg.APIKeyDefaultTTL,
+		MaxTTL:     cfg.APIKeyMaxTTL,
+		MaxPerUser: cfg.APIKeyMaxPerUser,
+		KeyPrefix:  cfg.APIKeyPrefix,
+	}
+}
+
+func ProvidePromptProcessorConfig(cfg *config.Config, log zerolog.Logger) prompt.ProcessorConfig {
+	return prompt.ProcessorConfig{
+		Enabled:         cfg.PromptOrchestrationEnabled,
+		EnableMemory:    cfg.PromptOrchestrationEnableMemory,
+		EnableTemplates: cfg.PromptOrchestrationEnableTemplates,
+		EnableTools:     cfg.PromptOrchestrationEnableTools,
+		DefaultPersona:  cfg.PromptOrchestrationDefaultPersona,
+	}
+}
diff --git a/services/llm-api/internal/domain/query/query.go b/services/llm-api/internal/domain/query/query.go
new file mode 100644
index 00000000..95440c07
--- /dev/null
+++ b/services/llm-api/internal/domain/query/query.go
@@ -0,0 +1,8 @@
+package query
+
+type Pagination struct {
+	Limit  *int
+	Offset *int
+	After  *uint
+	Order  string
+}
diff --git a/services/llm-api/internal/domain/user/user.go b/services/llm-api/internal/domain/user/user.go
new file mode 100644
index 00000000..07a9da62
--- /dev/null
+++ b/services/llm-api/internal/domain/user/user.go
@@ -0,0 +1,77 @@
+// Package user provides user domain models and behaviors.
+package user
+
+import (
+	"context"
+	"errors"
+	"time"
+)
+
+// User models an application user resolved from an external identity provider.
+type User struct {
+	ID           uint
+	AuthProvider string
+	Issuer       string
+	Subject      string
+	Username     *string
+	Email        *string
+	Name         *string
+	Picture      *string
+	CreatedAt    time.Time
+	UpdatedAt    time.Time
+}
+
+// Identity encapsulates the externally provided identity attributes.
+type Identity struct {
+	Provider string
+	Issuer   string
+	Subject  string
+	Username *string
+	Email    *string
+	Name     *string
+	Picture  *string
+}
+
+// Repository defines storage operations for users.
+type Repository interface {
+	FindByIssuerAndSubject(ctx context.Context, issuer, subject string) (*User, error)
+	FindByID(ctx context.Context, id uint) (*User, error)
+	Upsert(ctx context.Context, user *User) (*User, error)
+}
+
+// ErrInvalidIdentity indicates missing issuer or subject on the identity payload.
+var ErrInvalidIdentity = errors.New("invalid identity: issuer and subject are required")
+
+// Service persists and resolves users from external identities.
+type Service struct {
+	repo Repository
+}
+
+// NewService constructs a Service with required dependencies.
+func NewService(repo Repository) *Service {
+	return &Service{repo: repo}
+}
+
+// EnsureUser persists the given identity and returns the internal user record.
+func (s *Service) EnsureUser(ctx context.Context, identity Identity) (*User, error) {
+	if identity.Issuer == "" || identity.Subject == "" {
+		return nil, ErrInvalidIdentity
+	}
+
+	authProvider := identity.Provider
+	if authProvider == "" {
+		authProvider = "keycloak"
+	}
+
+	user := &User{
+		AuthProvider: authProvider,
+		Issuer:       identity.Issuer,
+		Subject:      identity.Subject,
+		Username:     identity.Username,
+		Email:        identity.Email,
+		Name:         identity.Name,
+		Picture:      identity.Picture,
+	}
+
+	return s.repo.Upsert(ctx, user)
+}
diff --git a/services/llm-api/internal/domain/usersettings/user_settings.go b/services/llm-api/internal/domain/usersettings/user_settings.go
new file mode 100644
index 00000000..ab924e60
--- /dev/null
+++ b/services/llm-api/internal/domain/usersettings/user_settings.go
@@ -0,0 +1,229 @@
+// Package usersettings provides domain models for user preferences and settings.
+package usersettings
+
+import (
+	"context"
+	"encoding/json"
+	"time"
+)
+
+// UserSettings represents user preferences and feature toggles.
+type UserSettings struct {
+	ID     uint
+	UserID uint
+
+	// Memory Configuration stored as JSON
+	MemoryConfig MemoryConfig `gorm:"type:jsonb;serializer:json"`
+
+	// Profile Settings
+	ProfileSettings ProfileSettings `gorm:"type:jsonb;serializer:json"`
+
+	// Advanced Settings
+	AdvancedSettings AdvancedSettings `gorm:"type:jsonb;serializer:json"`
+
+	// Other Feature Toggles
+	EnableTrace bool
+	EnableTools bool
+
+	// Preferences - flexible JSON for future extensions
+	Preferences map[string]interface{}
+
+	CreatedAt time.Time
+	UpdatedAt time.Time
+}
+
+// MemoryConfig returns memory configuration derived from settings.
+type MemoryConfig struct {
+	Enabled          bool    `json:"enabled"`
+	ObserveEnabled   bool    `json:"observe_enabled"`
+	InjectUserCore   bool    `json:"inject_user_core"`
+	InjectSemantic   bool    `json:"inject_semantic"`
+	InjectEpisodic   bool    `json:"inject_episodic"`
+	MaxUserItems     int     `json:"max_user_items"`
+	MaxProjectItems  int     `json:"max_project_items"`
+	MaxEpisodicItems int     `json:"max_episodic_items"`
+	MinSimilarity    float32 `json:"min_similarity"`
+}
+
+// BaseStyle represents the conversation style preference.
+type BaseStyle string
+
+const (
+	BaseStyleConcise      BaseStyle = "Concise"
+	BaseStyleFriendly     BaseStyle = "Friendly"
+	BaseStyleProfessional BaseStyle = "Professional"
+)
+
+// IsValid checks if the base style is one of the allowed values.
+func (bs BaseStyle) IsValid() bool {
+	return bs == BaseStyleConcise || bs == BaseStyleFriendly || bs == BaseStyleProfessional
+}
+
+// ProfileSettings stores user profile information.
+type ProfileSettings struct {
+	BaseStyle          BaseStyle `json:"base_style"`          // Conversation style: Concise, Friendly, or Professional
+	CustomInstructions string    `json:"custom_instructions"` // Additional behavior, style, and tone preferences
+	NickName           string    `json:"nick_name"`           // What should Jan call you? (alias: nickname)
+	Occupation         string    `json:"occupation"`          // User's occupation
+	MoreAboutYou       string    `json:"more_about_you"`      // Additional information about the user
+}
+
+// AdvancedSettings stores advanced feature toggles.
+type AdvancedSettings struct {
+	WebSearch   bool `json:"web_search"`   // Let Jan automatically search the web for answers
+	CodeEnabled bool `json:"code_enabled"` // Enable code execution features
+}
+
+// DefaultMemoryConfig returns default memory configuration
+func DefaultMemoryConfig() MemoryConfig {
+	return MemoryConfig{
+		Enabled:          true,
+		ObserveEnabled:   true, // Default ON - auto-learn from conversations
+		InjectUserCore:   true,
+		InjectSemantic:   true,
+		InjectEpisodic:   false,
+		MaxUserItems:     3,
+		MaxProjectItems:  5,
+		MaxEpisodicItems: 3,
+		MinSimilarity:    0.75,
+	}
+}
+
+// DefaultProfileSettings returns default profile settings
+func DefaultProfileSettings() ProfileSettings {
+	return ProfileSettings{
+		BaseStyle:          BaseStyleFriendly, // Default to Friendly style
+		CustomInstructions: "",
+		NickName:           "",
+		Occupation:         "",
+		MoreAboutYou:       "",
+	}
+}
+
+// DefaultAdvancedSettings returns default advanced settings
+func DefaultAdvancedSettings() AdvancedSettings {
+	return AdvancedSettings{
+		WebSearch:   false, // Default OFF for privacy
+		CodeEnabled: false, // Default OFF for security
+	}
+}
+
+// DefaultUserSettings returns settings with safe defaults.
+func DefaultUserSettings(userID uint) *UserSettings {
+	return &UserSettings{
+		UserID:           userID,
+		MemoryConfig:     DefaultMemoryConfig(),
+		ProfileSettings:  DefaultProfileSettings(),
+		AdvancedSettings: DefaultAdvancedSettings(),
+		EnableTrace:      false,
+		EnableTools:      true,
+		Preferences:      make(map[string]interface{}),
+	}
+}
+
+// UpdateRequest represents fields that can be updated via API.
+type UpdateRequest struct {
+	MemoryConfig     *MemoryConfig          `json:"memory_config,omitempty"`
+	ProfileSettings  *ProfileSettings       `json:"profile_settings,omitempty"`
+	AdvancedSettings *AdvancedSettings      `json:"advanced_settings,omitempty"`
+	EnableTrace      *bool                  `json:"enable_trace,omitempty"`
+	EnableTools      *bool                  `json:"enable_tools,omitempty"`
+	Preferences      map[string]interface{} `json:"preferences,omitempty"`
+}
+
+// Apply updates the UserSettings with non-nil fields from UpdateRequest.
+func (s *UserSettings) Apply(req UpdateRequest) {
+	if req.MemoryConfig != nil {
+		s.MemoryConfig = *req.MemoryConfig
+	}
+	if req.ProfileSettings != nil {
+		s.ProfileSettings = *req.ProfileSettings
+	}
+	if req.AdvancedSettings != nil {
+		s.AdvancedSettings = *req.AdvancedSettings
+	}
+	if req.EnableTrace != nil {
+		s.EnableTrace = *req.EnableTrace
+	}
+	if req.EnableTools != nil {
+		s.EnableTools = *req.EnableTools
+	}
+	if req.Preferences != nil {
+		s.Preferences = req.Preferences
+	}
+}
+
+type profileSettingsAlias ProfileSettings
+
+// MarshalJSON ensures we consistently emit nick_name while keeping the struct lean.
+func (p ProfileSettings) MarshalJSON() ([]byte, error) {
+	return json.Marshal(profileSettingsAlias(p))
+}
+
+// UnmarshalJSON accepts both nick_name and the legacy nickname key for backward compatibility.
+func (p *ProfileSettings) UnmarshalJSON(data []byte) error {
+	var aux struct {
+		profileSettingsAlias
+		NicknameLegacy string `json:"nickname"`
+	}
+
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	*p = ProfileSettings(aux.profileSettingsAlias)
+	if p.NickName == "" && aux.NicknameLegacy != "" {
+		p.NickName = aux.NicknameLegacy
+	}
+
+	return nil
+}
+
+// Repository defines storage operations for user settings.
+type Repository interface {
+	FindByUserID(ctx context.Context, userID uint) (*UserSettings, error)
+	Upsert(ctx context.Context, settings *UserSettings) (*UserSettings, error)
+	Update(ctx context.Context, settings *UserSettings) error
+}
+
+// Service manages user settings operations.
+type Service struct {
+	repo Repository
+}
+
+// NewService constructs a Service with required dependencies.
+func NewService(repo Repository) *Service {
+	return &Service{repo: repo}
+}
+
+// GetOrCreateSettings retrieves existing settings or creates defaults for a user.
+func (s *Service) GetOrCreateSettings(ctx context.Context, userID uint) (*UserSettings, error) {
+	settings, err := s.repo.FindByUserID(ctx, userID)
+	if err != nil {
+		return nil, err
+	}
+
+	// Create default settings if none exist
+	if settings == nil {
+		defaults := DefaultUserSettings(userID)
+		return s.repo.Upsert(ctx, defaults)
+	}
+
+	return settings, nil
+}
+
+// UpdateSettings applies updates to user settings.
+func (s *Service) UpdateSettings(ctx context.Context, userID uint, req UpdateRequest) (*UserSettings, error) {
+	settings, err := s.GetOrCreateSettings(ctx, userID)
+	if err != nil {
+		return nil, err
+	}
+
+	settings.Apply(req)
+
+	if err := s.repo.Update(ctx, settings); err != nil {
+		return nil, err
+	}
+
+	return settings, nil
+}
diff --git a/services/llm-api/internal/infrastructure/auth/jwt_keycloak.go b/services/llm-api/internal/infrastructure/auth/jwt_keycloak.go
new file mode 100644
index 00000000..2e07b408
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/auth/jwt_keycloak.go
@@ -0,0 +1,297 @@
+package auth
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"strings"
+	"sync/atomic"
+	"time"
+
+	"github.com/MicahParks/keyfunc/v2"
+	"github.com/golang-jwt/jwt/v5"
+	"github.com/rs/zerolog"
+)
+
+// PrincipalClaims represent the subset of JWT claims we care about.
+type PrincipalClaims struct {
+	Subject           string
+	Issuer            string
+	Audience          []string
+	PreferredUsername string
+	Email             string
+	Name              string
+	Picture           string
+	Roles             []string
+	Scopes            []string
+	ExpiresAt         time.Time
+	IssuedAt          time.Time
+	NotBefore         time.Time
+	TokenID           string
+	AuthorizedParty   string
+}
+
+// KeycloakValidator validates JWT tokens against Keycloak JWKS.
+type KeycloakValidator struct {
+	issuer          string
+	audience        string
+	authorizedParty string
+	jwksURL         string
+	logger          zerolog.Logger
+	refreshEvery    time.Duration
+	clockSkew       time.Duration
+	jwks            atomic.Pointer[keyfunc.JWKS]
+	lastErr         atomic.Value // stores lastErrWrap
+}
+
+// lastErrWrap is a sentinel wrapper to avoid storing bare nil in atomic.Value.
+type lastErrWrap struct{ Err error }
+
+const (
+	jwksInitialRetryInterval   = time.Second
+	jwksInitialRetryMaxBackoff = 10 * time.Second
+	jwksInitialRetryTimeout    = 2 * time.Minute
+)
+
+// NewKeycloakValidator initialises JWKS fetching and returns a validator.
+func NewKeycloakValidator(
+	ctx context.Context,
+	jwksURL,
+	issuer,
+	audience,
+	authorizedParty string,
+	refreshEvery,
+	clockSkew time.Duration,
+	logger zerolog.Logger,
+) (*KeycloakValidator, error) {
+	if jwksURL == "" {
+		return nil, errors.New("jwks url is required")
+	}
+
+	validator := &KeycloakValidator{
+		issuer:          issuer,
+		audience:        audience,
+		authorizedParty: authorizedParty,
+		jwksURL:         jwksURL,
+		logger:          logger,
+		refreshEvery:    refreshEvery,
+		clockSkew:       clockSkew,
+	}
+	// Initialize with a non-nil wrapper value
+	validator.lastErr.Store(lastErrWrap{Err: nil})
+
+	if err := validator.initJWKS(ctx); err != nil {
+		return nil, err
+	}
+
+	return validator, nil
+}
+
+func (v *KeycloakValidator) initJWKS(ctx context.Context) error {
+	options := keyfunc.Options{
+		RefreshErrorHandler: func(err error) {
+			// Always store non-nil wrapper type
+			v.lastErr.Store(lastErrWrap{Err: err})
+			if err != nil {
+				v.logger.Error().Err(err).Msg("jwks refresh failed")
+			}
+		},
+		RefreshInterval:   v.refreshEvery,
+		RefreshUnknownKID: true,
+	}
+
+	if ctx != nil {
+		options.Ctx = ctx
+	}
+
+	backoff := jwksInitialRetryInterval
+	deadline := time.Now().Add(jwksInitialRetryTimeout)
+	if ctx != nil {
+		if ctxDeadline, ok := ctx.Deadline(); ok && ctxDeadline.Before(deadline) {
+			deadline = ctxDeadline
+		}
+	}
+
+	for attempt := 1; ; attempt++ {
+		jwks, err := keyfunc.Get(v.jwksURL, options)
+		if err == nil {
+			v.lastErr.Store(lastErrWrap{Err: nil})
+			v.jwks.Store(jwks)
+			return nil
+		}
+
+		v.logger.Warn().
+			Err(err).
+			Str("jwks_url", v.jwksURL).
+			Int("attempt", attempt).
+			Msg("initial jwks fetch failed, retrying")
+
+		if ctx != nil {
+			select {
+			case <-ctx.Done():
+				return fmt.Errorf("fetch jwks: %w", ctx.Err())
+			case <-time.After(backoff):
+			}
+		} else {
+			time.Sleep(backoff)
+		}
+
+		if time.Now().After(deadline) {
+			return fmt.Errorf("fetch jwks: %w", err)
+		}
+
+		if next := backoff * 2; next <= jwksInitialRetryMaxBackoff {
+			backoff = next
+		} else {
+			backoff = jwksInitialRetryMaxBackoff
+		}
+	}
+}
+
+// Validate parses and validates the given JWT returning principal claims.
+func (v *KeycloakValidator) Validate(_ context.Context, rawToken string) (*PrincipalClaims, error) {
+	jwks := v.jwks.Load()
+	if jwks == nil {
+		return nil, errors.New("jwks not initialised")
+	}
+
+	parser := jwt.NewParser(jwt.WithValidMethods([]string{"RS256"}))
+	token, err := parser.ParseWithClaims(rawToken, jwt.MapClaims{}, jwks.Keyfunc)
+	if err != nil {
+		return nil, fmt.Errorf("parse token: %w", err)
+	}
+
+	if !token.Valid {
+		return nil, errors.New("invalid token")
+	}
+
+	mapClaims, ok := token.Claims.(jwt.MapClaims)
+	if !ok {
+		return nil, errors.New("invalid claims")
+	}
+
+	iss, _ := mapClaims["iss"].(string)
+	if iss != v.issuer {
+		return nil, fmt.Errorf("issuer mismatch %s", iss)
+	}
+
+	var audiences []string
+	if audRaw, ok := mapClaims["aud"]; ok {
+		switch val := audRaw.(type) {
+		case string:
+			if val != v.audience {
+				return nil, fmt.Errorf("audience mismatch")
+			}
+			audiences = append(audiences, val)
+		case []interface{}:
+			found := false
+			for _, item := range val {
+				if s, ok := item.(string); ok {
+					if s == v.audience {
+						found = true
+					}
+					audiences = append(audiences, s)
+				}
+			}
+			if !found {
+				return nil, fmt.Errorf("audience mismatch")
+			}
+		default:
+			return nil, fmt.Errorf("aud claim unsupported type %T", val)
+		}
+	}
+
+	sub, _ := mapClaims["sub"].(string)
+	if sub == "" {
+		return nil, errors.New("sub claim missing")
+	}
+
+	preferredUsername, _ := mapClaims["preferred_username"].(string)
+	email, _ := mapClaims["email"].(string)
+	name, _ := mapClaims["name"].(string)
+	picture, _ := mapClaims["picture"].(string)
+	azp := claimString(mapClaims["azp"])
+	if v.authorizedParty != "" && azp != "" && azp != v.authorizedParty {
+		return nil, errors.New("authorized party mismatch")
+	}
+
+	var roles []string
+	if realmAccess, ok := mapClaims["realm_access"].(map[string]any); ok {
+		if rawRoles, ok := realmAccess["roles"].([]interface{}); ok {
+			for _, role := range rawRoles {
+				if s, ok := role.(string); ok {
+					roles = append(roles, s)
+				}
+			}
+		}
+	}
+
+	var scopes []string
+	if scopeStr, ok := mapClaims["scope"].(string); ok && scopeStr != "" {
+		scopes = strings.Split(scopeStr, " ")
+	}
+
+	expires := jwtNumericTime(mapClaims["exp"])
+	issued := jwtNumericTime(mapClaims["iat"])
+	notBefore := jwtNumericTime(mapClaims["nbf"])
+
+	now := time.Now().UTC()
+	if !expires.IsZero() && now.After(expires.Add(v.clockSkew)) {
+		return nil, errors.New("token expired")
+	}
+	if !notBefore.IsZero() && now.Add(v.clockSkew).Before(notBefore) {
+		return nil, errors.New("token not yet valid")
+	}
+
+	return &PrincipalClaims{
+		Subject:           sub,
+		Issuer:            iss,
+		PreferredUsername: preferredUsername,
+		Email:             email,
+		Name:              name,
+		Picture:           picture,
+		Roles:             roles,
+		Scopes:            scopes,
+		ExpiresAt:         expires,
+		IssuedAt:          issued,
+		NotBefore:         notBefore,
+		TokenID:           claimString(mapClaims["jti"]),
+		Audience:          audiences,
+		AuthorizedParty:   azp,
+	}, nil
+}
+
+// Ready indicates whether JWKS has been successfully loaded.
+func (v *KeycloakValidator) Ready() bool {
+	if v.jwks.Load() == nil {
+		return false
+	}
+	if val := v.lastErr.Load(); val != nil {
+		if wrap, ok := val.(lastErrWrap); ok && wrap.Err != nil {
+			return false
+		}
+	}
+	return true
+}
+
+func jwtNumericTime(value any) time.Time {
+	switch timeValue := value.(type) {
+	case float64:
+		return time.Unix(int64(timeValue), 0).UTC()
+	case int64:
+		return time.Unix(timeValue, 0).UTC()
+	case json.Number:
+		if unixTime, err := timeValue.Int64(); err == nil {
+			return time.Unix(unixTime, 0).UTC()
+		}
+	}
+	return time.Time{}
+}
+
+func claimString(value any) string {
+	if str, ok := value.(string); ok {
+		return str
+	}
+	return ""
+}
diff --git a/services/llm-api/internal/infrastructure/crontab/crontab.go b/services/llm-api/internal/infrastructure/crontab/crontab.go
new file mode 100644
index 00000000..479ff357
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/crontab/crontab.go
@@ -0,0 +1,129 @@
+package crontab
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/infrastructure/inference"
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+
+	"github.com/mileusna/crontab"
+)
+
+const (
+	MetadataAutoEnableNewModels = "auto_enable_new_models" // "true" or "false"
+	DefaultModelSyncInterval    = 1                        // in minutes
+	CronJobTimeout              = 10 * time.Minute         // Timeout for each cron job execution
+)
+
+type Crontab struct {
+	ctab              *crontab.Crontab
+	providerService   *model.ProviderService
+	inferenceProvider *inference.InferenceProvider
+}
+
+func NewCrontab(
+	providerService *model.ProviderService,
+	inferenceProvider *inference.InferenceProvider,
+) *Crontab {
+	return &Crontab{
+		ctab:              crontab.New(),
+		providerService:   providerService,
+		inferenceProvider: inferenceProvider,
+	}
+}
+
+func (c *Crontab) Run(ctx context.Context) error {
+	log := logger.GetLogger()
+	// execute once on server start
+	c.syncAllProviderModels(ctx)
+
+	// Schedule model sync job if enabled
+	cfg := config.GetGlobal()
+	if cfg != nil && cfg.ModelSyncEnabled {
+		syncInterval := cfg.ModelSyncIntervalMinutes
+		if syncInterval <= 0 {
+			syncInterval = DefaultModelSyncInterval
+		}
+
+		cronExpr := fmt.Sprintf("*/%d * * * *", syncInterval)
+		if err := c.ctab.AddJob(cronExpr, func() {
+			jobCtx, cancel := context.WithTimeout(context.Background(), CronJobTimeout)
+			defer cancel()
+			c.syncAllProviderModels(jobCtx)
+		}); err != nil {
+			return platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to add model sync job")
+		}
+		log.Warn().Msgf("Model sync scheduled: every %d minute(s)", syncInterval)
+	}
+
+	// Schedule environment reload job
+	if err := c.ctab.AddJob("* * * * *", func() {
+		// Reload config
+		config.Load()
+	}); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to add env reload job")
+	}
+
+	<-ctx.Done()
+	c.ctab.Shutdown()
+	return nil
+}
+
+func (c *Crontab) syncAllProviderModels(ctx context.Context) {
+	log := logger.GetLogger()
+	providers, err := c.providerService.FindAllActiveProviders(ctx)
+
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to list providers for sync")
+		return
+	}
+
+	if len(providers) == 0 {
+		return
+	}
+
+	const maxConcurrentSyncs = 10
+	sem := make(chan struct{}, maxConcurrentSyncs)
+	var wg sync.WaitGroup
+
+	for _, provider := range providers {
+		wg.Add(1)
+		go func(p *model.Provider) {
+			defer wg.Done()
+			sem <- struct{}{}
+			defer func() { <-sem }()
+
+			c.syncProviderModels(ctx, p)
+		}(provider)
+	}
+	wg.Wait()
+}
+
+func (c *Crontab) syncProviderModels(ctx context.Context, provider *model.Provider) {
+	log := logger.GetLogger()
+
+	models, err := c.inferenceProvider.ListModels(ctx, provider)
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to fetch models from provider")
+		return
+	}
+
+	if len(models) == 0 {
+		return
+	}
+
+	autoEnable := provider.Metadata != nil && provider.Metadata[MetadataAutoEnableNewModels] == "true"
+
+	if _, err := c.providerService.SyncProviderModelsWithOptions(ctx, provider, models, autoEnable); err != nil {
+		log.Error().Err(err).Msg("Failed to sync provider models")
+		return
+	}
+
+	log.Info().Msgf("Synced %d models", len(models))
+}
diff --git a/services/llm-api/internal/infrastructure/database/database.go b/services/llm-api/internal/infrastructure/database/database.go
new file mode 100644
index 00000000..7bd2d1ef
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/database.go
@@ -0,0 +1,118 @@
+package database
+
+import (
+	"fmt"
+	"time"
+
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+
+	"gorm.io/driver/postgres"
+	"gorm.io/gorm"
+	gormlogger "gorm.io/gorm/logger"
+	"gorm.io/gorm/schema"
+)
+
+var SchemaRegistry []interface{}
+
+func RegisterSchemaForAutoMigrate(models ...interface{}) {
+	SchemaRegistry = append(SchemaRegistry, models...)
+}
+
+var DB *gorm.DB
+
+// Config holds database configuration
+type Config struct {
+	DatabaseURL string
+	MaxIdle     int
+	MaxOpen     int
+	MaxLifetime time.Duration
+	LogLevel    gormlogger.LogLevel
+}
+
+// Connect creates a new database connection with the given configuration
+func Connect(cfg Config) (*gorm.DB, error) {
+	db, err := gorm.Open(postgres.Open(cfg.DatabaseURL), &gorm.Config{
+		NamingStrategy: schema.NamingStrategy{
+			TablePrefix:   "llm_api.",
+			SingularTable: false,
+		},
+		Logger: gormlogger.Default.LogMode(cfg.LogLevel),
+	})
+	if err != nil {
+		log := logger.GetLogger()
+		log.Error().
+			Str("error_code", "5c16fb53-d98c-4fc6-8bb4-9abd3c0b9e88").
+			Err(err).
+			Msg("unable to connect to database")
+		return nil, err
+	}
+
+	// Configure connection pool
+	sqlDB, err := db.DB()
+	if err != nil {
+		return nil, err
+	}
+	sqlDB.SetMaxIdleConns(cfg.MaxIdle)
+	sqlDB.SetMaxOpenConns(cfg.MaxOpen)
+	sqlDB.SetConnMaxLifetime(cfg.MaxLifetime)
+
+	log := logger.GetLogger()
+	log.Info().Msg("Successfully connected to database")
+	DB = db
+	return DB, nil
+}
+
+// NewDB creates a new database connection using DSN
+func NewDB(dsn string) (*gorm.DB, error) {
+	return Connect(Config{
+		DatabaseURL: dsn,
+		MaxIdle:     10,
+		MaxOpen:     25,
+		MaxLifetime: 1 * time.Hour,
+		LogLevel:    gormlogger.Silent,
+	})
+}
+
+type DatabaseMigration struct {
+	gorm.Model
+	Version string `gorm:"not null;uniqueIndex"`
+}
+
+func Migration(db *gorm.DB, tablePrefix string) error {
+	schemaName := "llm_api"
+	if tablePrefix != "" {
+		// Extract schema from table prefix (e.g., "llm_api." -> "llm_api")
+		if len(tablePrefix) > 0 && tablePrefix[len(tablePrefix)-1] == '.' {
+			schemaName = tablePrefix[:len(tablePrefix)-1]
+		}
+	}
+
+	if err := db.Exec(fmt.Sprintf("CREATE SCHEMA IF NOT EXISTS %s;", schemaName)).Error; err != nil {
+		return fmt.Errorf("failed to create schema: %w", err)
+	}
+
+	hasTable := db.Migrator().HasTable(&DatabaseMigration{})
+	if !hasTable {
+		if err := db.Exec(fmt.Sprintf("DROP SCHEMA IF EXISTS %s CASCADE;", schemaName)).Error; err != nil {
+			return fmt.Errorf("failed to drop %s schema: %w", schemaName, err)
+		}
+		if err := db.Exec(fmt.Sprintf("CREATE SCHEMA %s;", schemaName)).Error; err != nil {
+			return fmt.Errorf("failed to create %s schema: %w", schemaName, err)
+		}
+		if err := db.AutoMigrate(&DatabaseMigration{}); err != nil {
+			return fmt.Errorf("failed to create 'database_migration' table: %w", err)
+		}
+		for _, model := range SchemaRegistry {
+			err := db.AutoMigrate(model)
+			if err != nil {
+				log := logger.GetLogger()
+				log.Error().
+					Str("error_code", "75333e43-8157-4f0a-8e34-aa34e6e7c285").
+					Err(err).
+					Msgf("failed to auto migrate schema: %T", model)
+				return err
+			}
+		}
+	}
+	return nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/dbschema/api_key.go b/services/llm-api/internal/infrastructure/database/dbschema/api_key.go
new file mode 100644
index 00000000..aff741f8
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/dbschema/api_key.go
@@ -0,0 +1,67 @@
+package dbschema
+
+import (
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/apikey"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+)
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(&APIKey{})
+}
+
+// APIKey represents persisted API key metadata.
+type APIKey struct {
+	ID         string    `gorm:"type:uuid;primaryKey"`
+	UserID     uint      `gorm:"not null;index"`
+	Name       string    `gorm:"type:varchar(128);not null"`
+	Prefix     string    `gorm:"type:varchar(32);not null"`
+	Suffix     string    `gorm:"type:varchar(8);not null"`
+	Hash       string    `gorm:"type:varchar(128);not null"`
+	ExpiresAt  time.Time `gorm:"not null;index"`
+	RevokedAt  *time.Time
+	LastUsedAt *time.Time
+	CreatedAt  time.Time
+	UpdatedAt  time.Time
+}
+
+// EtoD converts schema model to domain representation.
+func (k *APIKey) EtoD() *apikey.APIKey {
+	if k == nil {
+		return nil
+	}
+	return &apikey.APIKey{
+		ID:         k.ID,
+		UserID:     k.UserID,
+		Name:       k.Name,
+		Prefix:     k.Prefix,
+		Suffix:     k.Suffix,
+		Hash:       k.Hash,
+		ExpiresAt:  k.ExpiresAt,
+		RevokedAt:  k.RevokedAt,
+		LastUsedAt: k.LastUsedAt,
+		CreatedAt:  k.CreatedAt,
+		UpdatedAt:  k.UpdatedAt,
+	}
+}
+
+// FromDomain converts domain model to schema representation.
+func FromDomain(apiKey *apikey.APIKey) *APIKey {
+	if apiKey == nil {
+		return nil
+	}
+	return &APIKey{
+		ID:         apiKey.ID,
+		UserID:     apiKey.UserID,
+		Name:       apiKey.Name,
+		Prefix:     apiKey.Prefix,
+		Suffix:     apiKey.Suffix,
+		Hash:       apiKey.Hash,
+		ExpiresAt:  apiKey.ExpiresAt,
+		RevokedAt:  apiKey.RevokedAt,
+		LastUsedAt: apiKey.LastUsedAt,
+		CreatedAt:  apiKey.CreatedAt,
+		UpdatedAt:  apiKey.UpdatedAt,
+	}
+}
diff --git a/services/llm-api/internal/infrastructure/database/dbschema/conversation.go b/services/llm-api/internal/infrastructure/database/dbschema/conversation.go
new file mode 100644
index 00000000..6ff1bdec
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/dbschema/conversation.go
@@ -0,0 +1,351 @@
+package dbschema
+
+import (
+	"database/sql/driver"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/conversation"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+)
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(Conversation{})
+	database.RegisterSchemaForAutoMigrate(ConversationItem{})
+	database.RegisterSchemaForAutoMigrate(ConversationBranch{})
+}
+
+// Conversation represents the database schema for conversations
+type Conversation struct {
+	BaseModel
+	PublicID        string                          `gorm:"type:varchar(50);uniqueIndex;not null"`
+	Object          string                          `gorm:"type:varchar(50);not null;default:'conversation'"`
+	Title           *string                         `gorm:"type:varchar(256)"`
+	UserID          uint                            `gorm:"index:idx_conversation_user_referrer;index:idx_conversation_user_status;not null"`
+	User            User                            `gorm:"foreignKey:UserID"`
+	ProjectID       *uint                           `gorm:"index:idx_conversations_project_updated_at"`                 // Optional project grouping
+	ProjectPublicID *string                         `gorm:"type:varchar(64);index:idx_conversations_project_public_id"` // Public ID of the project
+	Status          conversation.ConversationStatus `gorm:"type:varchar(20);index:idx_conversation_user_status;not null;default:'active'"`
+	ActiveBranch    string                          `gorm:"type:varchar(50);not null;default:'MAIN'"` // Currently active branch
+	Referrer        *string                         `gorm:"type:varchar(100);index:idx_conversation_user_referrer"`
+	Metadata        JSONMap                         `gorm:"type:jsonb"`
+	IsPrivate       *bool                           `gorm:"default:false"`
+
+	// Project instruction inheritance
+	InstructionVersion           int     `gorm:"not null;default:1"` // Version of project instruction when conversation was created
+	EffectiveInstructionSnapshot *string `gorm:"type:text"`          // Snapshot of merged instruction for reproducibility
+
+	Items    []ConversationItem   `gorm:"foreignKey:ConversationID"`
+	Branches []ConversationBranch `gorm:"foreignKey:ConversationID"`
+}
+
+// ConversationBranch represents metadata about a conversation branch
+type ConversationBranch struct {
+	BaseModel
+	ConversationID   uint         `gorm:"uniqueIndex:idx_conversation_branch_name;not null"`
+	Conversation     Conversation `gorm:"foreignKey:ConversationID"`
+	Name             string       `gorm:"type:varchar(50);uniqueIndex:idx_conversation_branch_name;not null"` // Branch identifier (MAIN, EDIT_1, etc.)
+	Description      *string      `gorm:"type:text"`
+	ParentBranch     *string      `gorm:"type:varchar(50)"` // Branch this was forked from
+	ForkedAt         *time.Time   `gorm:"type:timestamp"`
+	ForkedFromItemID *string      `gorm:"type:varchar(50)"` // Item ID where fork occurred
+	ItemCount        int          `gorm:"default:0"`        // Cached count of items in this branch
+}
+
+// ConversationItem represents the database schema for conversation items
+type ConversationItem struct {
+	BaseModel
+	ConversationID    uint                  `gorm:"index:idx_item_conversation_branch;index:idx_item_conversation_sequence;not null"`
+	Conversation      Conversation          `gorm:"foreignKey:ConversationID"`
+	PublicID          string                `gorm:"type:varchar(50);uniqueIndex;not null"`
+	Object            string                `gorm:"type:varchar(50);not null;default:'conversation.item'"`
+	Branch            string                `gorm:"type:varchar(50);index:idx_item_conversation_branch;not null;default:'MAIN'"` // Branch identifier
+	SequenceNumber    int                   `gorm:"index:idx_item_conversation_sequence;not null"`                               // Order within branch
+	Type              conversation.ItemType `gorm:"type:varchar(50);not null"`
+	Role              *string               `gorm:"type:varchar(20)"` // Stored as string, converted to/from ItemRole
+	Content           JSONContent           `gorm:"type:jsonb"`       // Stores []Content as JSON
+	Status            *string               `gorm:"type:varchar(20)"` // Stored as string, converted to/from ItemStatus
+	IncompleteAt      *time.Time            `gorm:"type:timestamp"`
+	IncompleteDetails JSONIncompleteDetails `gorm:"type:jsonb"`
+	CompletedAt       *time.Time            `gorm:"type:timestamp"`
+	ResponseID        *uint                 `gorm:"index"`
+
+	// User feedback/rating
+	Rating        *string    `gorm:"type:varchar(10)"` // 'like' or 'unlike'
+	RatedAt       *time.Time `gorm:"type:timestamp"`
+	RatingComment *string    `gorm:"type:text"`
+}
+
+// JSONMap is a custom type for map[string]string stored as JSON
+type JSONMap map[string]string
+
+func (j JSONMap) Value() (driver.Value, error) {
+	if j == nil {
+		return nil, nil
+	}
+	return json.Marshal(j)
+}
+
+func (j *JSONMap) Scan(value any) error {
+	if value == nil {
+		*j = nil
+		return nil
+	}
+	bytes, ok := value.([]byte)
+	if !ok {
+		return nil
+	}
+	return json.Unmarshal(bytes, j)
+}
+
+// JSONContent is a custom type for []Content stored as JSON
+type JSONContent []conversation.Content
+
+func (j JSONContent) Value() (driver.Value, error) {
+	if j == nil {
+		return nil, nil
+	}
+	return json.Marshal(j)
+}
+
+func (j *JSONContent) Scan(value any) error {
+	if value == nil {
+		*j = nil
+		return nil
+	}
+	bytes, ok := value.([]byte)
+	if !ok {
+		return nil
+	}
+	return json.Unmarshal(bytes, j)
+}
+
+// JSONIncompleteDetails is a custom type for IncompleteDetails stored as JSON
+type JSONIncompleteDetails conversation.IncompleteDetails
+
+func (j JSONIncompleteDetails) Value() (driver.Value, error) {
+	return json.Marshal(j)
+}
+
+func (j *JSONIncompleteDetails) Scan(value any) error {
+	if value == nil {
+		return nil
+	}
+	bytes, ok := value.([]byte)
+	if !ok {
+		return fmt.Errorf("expected []byte, got %T", value)
+	}
+	return json.Unmarshal(bytes, j)
+}
+
+// NewSchemaConversation creates a database schema from domain conversation
+func NewSchemaConversation(c *conversation.Conversation) *Conversation {
+	isPrivate := c.IsPrivate
+	return &Conversation{
+		BaseModel: BaseModel{
+			ID:        c.ID,
+			CreatedAt: c.CreatedAt,
+			UpdatedAt: c.UpdatedAt,
+		},
+		PublicID:                     c.PublicID,
+		Object:                       c.Object,
+		Title:                        c.Title,
+		UserID:                       c.UserID,
+		ProjectID:                    c.ProjectID,
+		ProjectPublicID:              c.ProjectPublicID,
+		Status:                       c.Status,
+		ActiveBranch:                 c.ActiveBranch,
+		Referrer:                     c.Referrer,
+		Metadata:                     JSONMap(c.Metadata),
+		IsPrivate:                    &isPrivate,
+		InstructionVersion:           c.InstructionVersion,
+		EffectiveInstructionSnapshot: c.EffectiveInstructionSnapshot,
+	}
+}
+
+// NewSchemaConversationBranch creates a database schema from domain branch metadata
+func NewSchemaConversationBranch(conversationID uint, meta conversation.BranchMetadata) *ConversationBranch {
+	return &ConversationBranch{
+		BaseModel: BaseModel{
+			CreatedAt: meta.CreatedAt,
+			UpdatedAt: meta.UpdatedAt,
+		},
+		ConversationID:   conversationID,
+		Name:             meta.Name,
+		Description:      meta.Description,
+		ParentBranch:     meta.ParentBranch,
+		ForkedAt:         meta.ForkedAt,
+		ForkedFromItemID: meta.ForkedFromItemID,
+		ItemCount:        meta.ItemCount,
+	}
+}
+
+// EtoD converts database branch to domain branch metadata
+func (b *ConversationBranch) EtoD() conversation.BranchMetadata {
+	return conversation.BranchMetadata{
+		Name:             b.Name,
+		Description:      b.Description,
+		ParentBranch:     b.ParentBranch,
+		ForkedAt:         b.ForkedAt,
+		ForkedFromItemID: b.ForkedFromItemID,
+		ItemCount:        b.ItemCount,
+		CreatedAt:        b.CreatedAt,
+		UpdatedAt:        b.UpdatedAt,
+	}
+}
+
+// EtoD converts database schema to domain conversation (Entity to Domain)
+func (c *Conversation) EtoD() *conversation.Conversation {
+	isPrivate := false
+	if c.IsPrivate != nil {
+		isPrivate = *c.IsPrivate
+	}
+	conv := &conversation.Conversation{
+		ID:                           c.ID,
+		PublicID:                     c.PublicID,
+		Object:                       c.Object,
+		Title:                        c.Title,
+		UserID:                       c.UserID,
+		ProjectID:                    c.ProjectID,
+		ProjectPublicID:              c.ProjectPublicID,
+		Status:                       c.Status,
+		ActiveBranch:                 c.ActiveBranch,
+		Branches:                     make(map[string][]conversation.Item),
+		BranchMetadata:               make(map[string]conversation.BranchMetadata),
+		Metadata:                     map[string]string(c.Metadata),
+		IsPrivate:                    isPrivate,
+		InstructionVersion:           c.InstructionVersion,
+		EffectiveInstructionSnapshot: c.EffectiveInstructionSnapshot,
+		CreatedAt:                    c.CreatedAt,
+		UpdatedAt:                    c.UpdatedAt,
+	}
+	if c.Referrer != nil {
+		conv.Referrer = c.Referrer
+	}
+
+	// Convert branch metadata
+	if len(c.Branches) > 0 {
+		for _, branch := range c.Branches {
+			conv.BranchMetadata[branch.Name] = branch.EtoD()
+		}
+	}
+
+	// Convert and organize items by branch
+	if len(c.Items) > 0 {
+		for _, item := range c.Items {
+			domainItem := item.EtoD()
+			branchName := domainItem.Branch
+			if branchName == "" {
+				branchName = "MAIN" // Default to MAIN if not set
+			}
+			conv.Branches[branchName] = append(conv.Branches[branchName], *domainItem)
+		}
+
+		// Also populate legacy Items field with MAIN branch for backward compatibility
+		if mainItems, exists := conv.Branches["MAIN"]; exists {
+			conv.Items = mainItems
+		}
+	}
+
+	return conv
+}
+
+// NewSchemaConversationItem creates a database schema from domain item
+func NewSchemaConversationItem(item *conversation.Item) *ConversationItem {
+	branch := item.Branch
+	if branch == "" {
+		branch = "MAIN" // Default to MAIN if not set
+	}
+
+	schemaItem := &ConversationItem{
+		BaseModel: BaseModel{
+			ID:        item.ID,
+			CreatedAt: item.CreatedAt,
+		},
+		ConversationID: item.ConversationID,
+		PublicID:       item.PublicID,
+		Object:         item.Object,
+		Branch:         branch,
+		SequenceNumber: item.SequenceNumber,
+		Type:           item.Type,
+		Content:        JSONContent(item.Content),
+		IncompleteAt:   item.IncompleteAt,
+		CompletedAt:    item.CompletedAt,
+		ResponseID:     item.ResponseID,
+	}
+
+	// Convert Role pointer to string pointer
+	if item.Role != nil {
+		roleStr := string(*item.Role)
+		schemaItem.Role = &roleStr
+	}
+
+	// Convert Status pointer to string pointer
+	if item.Status != nil {
+		statusStr := string(*item.Status)
+		schemaItem.Status = &statusStr
+	}
+
+	// Convert IncompleteDetails
+	if item.IncompleteDetails != nil {
+		details := JSONIncompleteDetails(*item.IncompleteDetails)
+		schemaItem.IncompleteDetails = details
+	}
+
+	// Convert Rating
+	if item.Rating != nil {
+		ratingStr := string(*item.Rating)
+		schemaItem.Rating = &ratingStr
+	}
+	schemaItem.RatedAt = item.RatedAt
+	schemaItem.RatingComment = item.RatingComment
+
+	return schemaItem
+}
+
+// EtoD converts database schema to domain item (Entity to Domain)
+func (i *ConversationItem) EtoD() *conversation.Item {
+	item := &conversation.Item{
+		ID:             i.ID,
+		ConversationID: i.ConversationID,
+		PublicID:       i.PublicID,
+		Object:         i.Object,
+		Branch:         i.Branch,
+		SequenceNumber: i.SequenceNumber,
+		Type:           i.Type,
+		Content:        []conversation.Content(i.Content),
+		IncompleteAt:   i.IncompleteAt,
+		CompletedAt:    i.CompletedAt,
+		ResponseID:     i.ResponseID,
+		CreatedAt:      i.CreatedAt,
+	}
+
+	// Convert Role string pointer to ItemRole pointer
+	if i.Role != nil {
+		role := conversation.ItemRole(*i.Role)
+		item.Role = &role
+	}
+
+	// Convert Status string pointer to ItemStatus pointer
+	if i.Status != nil {
+		status := conversation.ItemStatus(*i.Status)
+		item.Status = &status
+	}
+
+	// Convert IncompleteDetails
+	if i.IncompleteDetails != (JSONIncompleteDetails{}) {
+		details := conversation.IncompleteDetails(i.IncompleteDetails)
+		item.IncompleteDetails = &details
+	}
+
+	// Convert Rating
+	if i.Rating != nil {
+		rating := conversation.ItemRating(*i.Rating)
+		item.Rating = &rating
+	}
+	item.RatedAt = i.RatedAt
+	item.RatingComment = i.RatingComment
+
+	return item
+}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/dbschema/embedded.go b/services/llm-api/internal/infrastructure/database/dbschema/embedded.go
similarity index 100%
rename from apps/jan-api-gateway/application/app/infrastructure/database/dbschema/embedded.go
rename to services/llm-api/internal/infrastructure/database/dbschema/embedded.go
diff --git a/services/llm-api/internal/infrastructure/database/dbschema/model.go b/services/llm-api/internal/infrastructure/database/dbschema/model.go
new file mode 100644
index 00000000..46ef1ae4
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/dbschema/model.go
@@ -0,0 +1,87 @@
+package dbschema
+
+import (
+	"encoding/json"
+	"time"
+
+	"gorm.io/datatypes"
+
+	"jan-server/services/llm-api/internal/domain"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+)
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(Model{})
+}
+
+type Model struct {
+	ID           string         `gorm:"column:id;size:255;primaryKey"`
+	Provider     string         `gorm:"column:provider;size:255;not null"`
+	DisplayName  string         `gorm:"column:display_name;size:255;not null"`
+	Family       string         `gorm:"column:family;size:255"`
+	Capabilities datatypes.JSON `gorm:"column:capabilities;type:jsonb"`
+	Active       *bool          `gorm:"column:active;not null;default:true"`
+	CreatedAt    time.Time      `gorm:"column:created_at;not null"`
+	UpdatedAt    time.Time      `gorm:"column:updated_at;not null"`
+}
+
+func (Model) TableName() string {
+	return "models"
+}
+
+func NewSchemaModel(model *domain.Model) (*Model, error) {
+	if model == nil {
+		return nil, nil
+	}
+
+	var capabilities datatypes.JSON
+	if len(model.Capabilities) > 0 {
+		data, err := json.Marshal(model.Capabilities)
+		if err != nil {
+			return nil, err
+		}
+		capabilities = datatypes.JSON(data)
+	}
+
+	active := model.Active
+
+	return &Model{
+		ID:           model.ID,
+		Provider:     model.Provider,
+		DisplayName:  model.DisplayName,
+		Family:       model.Family,
+		Capabilities: capabilities,
+		Active:       &active,
+		CreatedAt:    model.CreatedAt,
+		UpdatedAt:    model.UpdatedAt,
+	}, nil
+}
+
+func (m *Model) EtoD() (*domain.Model, error) {
+	if m == nil {
+		return nil, nil
+	}
+
+	var capabilities []string
+	if len(m.Capabilities) > 0 {
+		if err := json.Unmarshal(m.Capabilities, &capabilities); err != nil {
+			return nil, err
+		}
+	}
+
+	active := false
+	if m.Active != nil {
+		active = *m.Active
+	}
+
+	return &domain.Model{
+		ID:           m.ID,
+		Provider:     m.Provider,
+		DisplayName:  m.DisplayName,
+		Family:       m.Family,
+		Capabilities: capabilities,
+		Active:       active,
+		CreatedAt:    m.CreatedAt,
+		UpdatedAt:    m.UpdatedAt,
+	}, nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/dbschema/model_catalog.go b/services/llm-api/internal/infrastructure/database/dbschema/model_catalog.go
new file mode 100644
index 00000000..7f18d028
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/dbschema/model_catalog.go
@@ -0,0 +1,131 @@
+package dbschema
+
+import (
+	"encoding/json"
+
+	"gorm.io/datatypes"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+)
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(ModelCatalog{})
+}
+
+type ModelCatalog struct {
+	BaseModel
+	PublicID            string         `gorm:"size:64;not null;uniqueIndex"`
+	SupportedParameters datatypes.JSON `gorm:"type:jsonb;not null"`
+	Architecture        datatypes.JSON `gorm:"type:jsonb;not null"`
+	Tags                datatypes.JSON `gorm:"type:jsonb"`
+	Notes               *string        `gorm:"type:text"`
+	IsModerated         *bool          `gorm:"index"`
+	Active              *bool          `gorm:"default:true;index;index:idx_model_catalog_status_active,priority:2"`
+	Status              string         `gorm:"size:32;not null;default:'init';index;index:idx_model_catalog_status_active,priority:1"`
+	Extras              datatypes.JSON `gorm:"type:jsonb"`
+}
+
+func NewSchemaModelCatalog(m *domainmodel.ModelCatalog) (*ModelCatalog, error) {
+	status := string(m.Status)
+	if status == "" {
+		status = string(domainmodel.ModelCatalogStatusInit)
+	}
+
+	supportedParametersJSON, err := json.Marshal(m.SupportedParameters)
+	if err != nil {
+		return nil, err
+	}
+
+	architectureJSON, err := json.Marshal(m.Architecture)
+	if err != nil {
+		return nil, err
+	}
+
+	var tagsJSON datatypes.JSON
+	if len(m.Tags) > 0 {
+		data, err := json.Marshal(m.Tags)
+		if err != nil {
+			return nil, err
+		}
+		tagsJSON = datatypes.JSON(data)
+	}
+
+	var extrasJSON datatypes.JSON
+	if len(m.Extras) > 0 {
+		data, err := json.Marshal(m.Extras)
+		if err != nil {
+			return nil, err
+		}
+		extrasJSON = datatypes.JSON(data)
+	}
+
+	return &ModelCatalog{
+		BaseModel: BaseModel{
+			ID:        m.ID,
+			CreatedAt: m.CreatedAt,
+			UpdatedAt: m.UpdatedAt,
+		},
+		PublicID:            m.PublicID,
+		SupportedParameters: datatypes.JSON(supportedParametersJSON),
+		Architecture:        datatypes.JSON(architectureJSON),
+		Tags:                tagsJSON,
+		Notes:               m.Notes,
+		IsModerated:         m.IsModerated,
+		Active:              m.Active,
+		Status:              status,
+		Extras:              extrasJSON,
+	}, nil
+}
+
+func (m *ModelCatalog) EtoD() (*domainmodel.ModelCatalog, error) {
+
+	var supportedParameters domainmodel.SupportedParameters
+	if len(m.SupportedParameters) > 0 {
+		if err := json.Unmarshal(m.SupportedParameters, &supportedParameters); err != nil {
+			return nil, err
+		}
+	}
+
+	var architecture domainmodel.Architecture
+	if len(m.Architecture) > 0 {
+		if err := json.Unmarshal(m.Architecture, &architecture); err != nil {
+			return nil, err
+		}
+	}
+
+	var tags []string
+	if len(m.Tags) > 0 {
+		if err := json.Unmarshal(m.Tags, &tags); err != nil {
+			return nil, err
+		}
+	}
+
+	var extras map[string]any
+	if len(m.Extras) > 0 {
+		if err := json.Unmarshal(m.Extras, &extras); err != nil {
+			return nil, err
+		}
+	}
+
+	return &domainmodel.ModelCatalog{
+		ID:                  m.ID,
+		PublicID:            m.PublicID,
+		SupportedParameters: supportedParameters,
+		Architecture:        architecture,
+		Tags:                tags,
+		Notes:               m.Notes,
+		IsModerated:         m.IsModerated,
+		Active:              m.Active,
+		Extras:              extras,
+		Status: func() domainmodel.ModelCatalogStatus {
+			status := domainmodel.ModelCatalogStatus(m.Status)
+			if status == "" {
+				return domainmodel.ModelCatalogStatusInit
+			}
+			return status
+		}(),
+		CreatedAt: m.CreatedAt,
+		UpdatedAt: m.UpdatedAt,
+	}, nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/dbschema/project.go b/services/llm-api/internal/infrastructure/database/dbschema/project.go
new file mode 100644
index 00000000..29ec25cc
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/dbschema/project.go
@@ -0,0 +1,94 @@
+package dbschema
+
+import (
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/project"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+)
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(Project{})
+}
+
+// ===============================================
+// Project Schema
+// ===============================================
+
+// Project represents the database schema for projects
+type Project struct {
+	BaseModel
+	PublicID    string     `gorm:"uniqueIndex;size:64;not null"`
+	UserID      uint       `gorm:"index:idx_projects_user;not null"`
+	Name        string     `gorm:"size:255;not null"`
+	Instruction *string    `gorm:"type:text"`
+	Favorite    bool       `gorm:"not null;default:false"`
+	ArchivedAt  *time.Time `gorm:"index"`
+	DeletedAt   *time.Time `gorm:"index"`
+	LastUsedAt  *time.Time
+}
+
+// TableName specifies the table name for Project
+func (Project) TableName() string {
+	return "llm_api.projects"
+}
+
+// ===============================================
+// Conversion Methods
+// ===============================================
+
+// EtoD converts database schema to domain project (Entity to Domain)
+func (p *Project) EtoD() *project.Project {
+	return &project.Project{
+		ID:          p.ID,
+		PublicID:    p.PublicID,
+		Object:      "project",
+		UserID:      p.UserID,
+		Name:        p.Name,
+		Instruction: p.Instruction,
+		Favorite:    p.Favorite,
+		ArchivedAt:  p.ArchivedAt,
+		DeletedAt:   p.DeletedAt,
+		LastUsedAt:  p.LastUsedAt,
+		CreatedAt:   p.CreatedAt,
+		UpdatedAt:   p.UpdatedAt,
+	}
+}
+
+// DtoE converts domain project to database schema (Domain to Entity)
+func ProjectDtoE(p *project.Project) *Project {
+	return &Project{
+		BaseModel: BaseModel{
+			ID:        p.ID,
+			CreatedAt: p.CreatedAt,
+			UpdatedAt: p.UpdatedAt,
+		},
+		PublicID:    p.PublicID,
+		UserID:      p.UserID,
+		Name:        p.Name,
+		Instruction: p.Instruction,
+		Favorite:    p.Favorite,
+		ArchivedAt:  p.ArchivedAt,
+		DeletedAt:   p.DeletedAt,
+		LastUsedAt:  p.LastUsedAt,
+	}
+}
+
+// NewSchemaProject creates a database schema from domain project
+func NewSchemaProject(p *project.Project) *Project {
+	return &Project{
+		BaseModel: BaseModel{
+			ID:        p.ID,
+			CreatedAt: p.CreatedAt,
+			UpdatedAt: p.UpdatedAt,
+		},
+		PublicID:    p.PublicID,
+		UserID:      p.UserID,
+		Name:        p.Name,
+		Instruction: p.Instruction,
+		Favorite:    p.Favorite,
+		ArchivedAt:  p.ArchivedAt,
+		DeletedAt:   p.DeletedAt,
+		LastUsedAt:  p.LastUsedAt,
+	}
+}
diff --git a/services/llm-api/internal/infrastructure/database/dbschema/provider.go b/services/llm-api/internal/infrastructure/database/dbschema/provider.go
new file mode 100644
index 00000000..e379d383
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/dbschema/provider.go
@@ -0,0 +1,96 @@
+package dbschema
+
+import (
+	"encoding/json"
+	"time"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+
+	"gorm.io/datatypes"
+)
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(Provider{})
+}
+
+type Provider struct {
+	BaseModel
+	PublicID        string         `gorm:"size:64;not null;uniqueIndex"`
+	DisplayName     string         `gorm:"size:255;not null"`
+	Kind            string         `gorm:"size:64;not null;index;index:idx_provider_active_kind,priority:2"`
+	BaseURL         string         `gorm:"size:512"`
+	EncryptedAPIKey string         `gorm:"type:text"`
+	APIKeyHint      *string        `gorm:"size:128"`
+	IsModerated     *bool          `gorm:"not null;default:false;index"`
+	Active          *bool          `gorm:"not null;default:true;index;index:idx_provider_active_kind,priority:1"`
+	Metadata        datatypes.JSON `gorm:"type:jsonb"`
+	LastSyncedAt    *time.Time     `gorm:"index"`
+}
+
+func NewSchemaProvider(p *domainmodel.Provider) *Provider {
+	var metadataJSON datatypes.JSON
+	if len(p.Metadata) > 0 {
+		if data, err := json.Marshal(p.Metadata); err == nil {
+			metadataJSON = datatypes.JSON(data)
+		}
+	}
+
+	isModerated := p.IsModerated
+	active := p.Active
+	return &Provider{
+		BaseModel: BaseModel{
+			ID:        p.ID,
+			CreatedAt: p.CreatedAt,
+			UpdatedAt: p.UpdatedAt,
+		},
+		PublicID:        p.PublicID,
+		DisplayName:     p.DisplayName,
+		Kind:            string(p.Kind),
+		BaseURL:         p.BaseURL,
+		EncryptedAPIKey: p.EncryptedAPIKey,
+		APIKeyHint:      p.APIKeyHint,
+		IsModerated:     &isModerated,
+		Active:          &active,
+		Metadata:        metadataJSON,
+		LastSyncedAt:    p.LastSyncedAt,
+	}
+}
+
+// EtoD converts a database provider into its domain representation.
+func (p *Provider) EtoD() *domainmodel.Provider {
+	var metadata map[string]string
+	if len(p.Metadata) > 0 {
+		err := json.Unmarshal(p.Metadata, &metadata)
+		if err != nil {
+			log := logger.GetLogger()
+			log.Error().Msgf("failed to unmarshal provider metadata for provider ID %d: %v", p.ID, err)
+		}
+	}
+
+	isModerated := false
+	if p.IsModerated != nil {
+		isModerated = *p.IsModerated
+	}
+	active := false
+	if p.Active != nil {
+		active = *p.Active
+	}
+
+	return &domainmodel.Provider{
+		ID:              p.ID,
+		PublicID:        p.PublicID,
+		DisplayName:     p.DisplayName,
+		Kind:            domainmodel.ProviderKind(p.Kind),
+		BaseURL:         p.BaseURL,
+		EncryptedAPIKey: p.EncryptedAPIKey,
+		APIKeyHint:      p.APIKeyHint,
+		IsModerated:     isModerated,
+		Active:          active,
+		Metadata:        metadata,
+		LastSyncedAt:    p.LastSyncedAt,
+		CreatedAt:       p.CreatedAt,
+		UpdatedAt:       p.UpdatedAt,
+	}
+}
diff --git a/services/llm-api/internal/infrastructure/database/dbschema/provider_model.go b/services/llm-api/internal/infrastructure/database/dbschema/provider_model.go
new file mode 100644
index 00000000..70a03657
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/dbschema/provider_model.go
@@ -0,0 +1,148 @@
+package dbschema
+
+import (
+	"encoding/json"
+
+	"gorm.io/datatypes"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+)
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(ProviderModel{})
+}
+
+type ProviderModel struct {
+	BaseModel
+	ProviderID              uint           `gorm:"not null;index;index:idx_provider_model_active,priority:1;index:idx_provider_model_catalog_active,priority:1;uniqueIndex:ux_provider_model_public_id,priority:1"`
+	PublicID                string         `gorm:"size:64;not null;uniqueIndex"`
+	Kind                    string         `gorm:"size:64;not null;index"`
+	ModelCatalogID          *uint          `gorm:"index;index:idx_provider_model_catalog_active,priority:2"`
+	ModelPublicID           string         `gorm:"size:128;not null;index;uniqueIndex:ux_provider_model_public_id,priority:2"`
+	ProviderOriginalModelID string         `gorm:"size:255;not null"`
+	DisplayName             string         `gorm:"size:255;not null"`
+	Pricing                 datatypes.JSON `gorm:"type:jsonb;not null"`
+	TokenLimits             datatypes.JSON `gorm:"type:jsonb"`
+	Family                  *string        `gorm:"size:128"`
+	SupportsImages          *bool          `gorm:"not null;default:false"`
+	SupportsEmbeddings      *bool          `gorm:"not null;default:false"`
+	SupportsReasoning       *bool          `gorm:"not null;default:false"`
+	SupportsAudio           *bool          `gorm:"not null;default:false"`
+	SupportsVideo           *bool          `gorm:"not null;default:false"`
+	Active                  *bool          `gorm:"not null;default:true;index;index:idx_provider_model_active,priority:2;index:idx_provider_model_catalog_active,priority:3"`
+}
+
+func NewSchemaProviderModel(m *domainmodel.ProviderModel) (*ProviderModel, error) {
+
+	pricingJSON, err := json.Marshal(m.Pricing)
+	if err != nil {
+		return nil, err
+	}
+
+	var tokenLimitsJSON datatypes.JSON
+	if m.TokenLimits != nil {
+		data, err := json.Marshal(m.TokenLimits)
+		if err != nil {
+			return nil, err
+		}
+		tokenLimitsJSON = datatypes.JSON(data)
+	}
+
+	supportsImages := m.SupportsImages
+	supportsEmbeddings := m.SupportsEmbeddings
+	supportsReasoning := m.SupportsReasoning
+	supportsAudio := m.SupportsAudio
+	supportsVideo := m.SupportsVideo
+	active := m.Active
+
+	return &ProviderModel{
+		BaseModel: BaseModel{
+			ID:        m.ID,
+			CreatedAt: m.CreatedAt,
+			UpdatedAt: m.UpdatedAt,
+		},
+		ProviderID:              m.ProviderID,
+		PublicID:                m.PublicID,
+		Kind:                    string(m.Kind),
+		ModelCatalogID:          m.ModelCatalogID,
+		ModelPublicID:           m.ModelPublicID,
+		ProviderOriginalModelID: m.ProviderOriginalModelID,
+		DisplayName:             m.DisplayName,
+		Pricing:                 datatypes.JSON(pricingJSON),
+		TokenLimits:             tokenLimitsJSON,
+		Family:                  m.Family,
+		SupportsImages:          &supportsImages,
+		SupportsEmbeddings:      &supportsEmbeddings,
+		SupportsReasoning:       &supportsReasoning,
+		SupportsAudio:           &supportsAudio,
+		SupportsVideo:           &supportsVideo,
+		Active:                  &active,
+	}, nil
+}
+
+// EtoD converts a database provider model into its domain representation.
+func (m *ProviderModel) EtoD() (*domainmodel.ProviderModel, error) {
+	var pricing domainmodel.Pricing
+	if len(m.Pricing) > 0 {
+		if err := json.Unmarshal(m.Pricing, &pricing); err != nil {
+			return nil, err
+		}
+	}
+
+	var tokenLimits *domainmodel.TokenLimits
+	if len(m.TokenLimits) > 0 {
+		var limits domainmodel.TokenLimits
+		if err := json.Unmarshal(m.TokenLimits, &limits); err != nil {
+			return nil, err
+		}
+		tokenLimits = &limits
+	}
+
+	supportsImages := false
+	if m.SupportsImages != nil {
+		supportsImages = *m.SupportsImages
+	}
+	supportsEmbeddings := false
+	if m.SupportsEmbeddings != nil {
+		supportsEmbeddings = *m.SupportsEmbeddings
+	}
+	supportsReasoning := false
+	if m.SupportsReasoning != nil {
+		supportsReasoning = *m.SupportsReasoning
+	}
+	supportsAudio := false
+	if m.SupportsAudio != nil {
+		supportsAudio = *m.SupportsAudio
+	}
+	supportsVideo := false
+	if m.SupportsVideo != nil {
+		supportsVideo = *m.SupportsVideo
+	}
+	active := false
+	if m.Active != nil {
+		active = *m.Active
+	}
+
+	return &domainmodel.ProviderModel{
+		ID:                      m.ID,
+		ProviderID:              m.ProviderID,
+		PublicID:                m.PublicID,
+		Kind:                    domainmodel.ProviderKind(m.Kind),
+		ModelCatalogID:          m.ModelCatalogID,
+		ModelPublicID:           m.ModelPublicID,
+		ProviderOriginalModelID: m.ProviderOriginalModelID,
+		DisplayName:             m.DisplayName,
+		Pricing:                 pricing,
+		TokenLimits:             tokenLimits,
+		Family:                  m.Family,
+		SupportsImages:          supportsImages,
+		SupportsEmbeddings:      supportsEmbeddings,
+		SupportsReasoning:       supportsReasoning,
+		SupportsAudio:           supportsAudio,
+		SupportsVideo:           supportsVideo,
+		Active:                  active,
+		CreatedAt:               m.CreatedAt,
+		UpdatedAt:               m.UpdatedAt,
+	}, nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/dbschema/user.go b/services/llm-api/internal/infrastructure/database/dbschema/user.go
new file mode 100644
index 00000000..f5eb64b7
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/dbschema/user.go
@@ -0,0 +1,64 @@
+package dbschema
+
+import (
+	"jan-server/services/llm-api/internal/domain/user"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+)
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(User{})
+}
+
+// User represents the persisted user schema tied to an external identity provider.
+type User struct {
+	BaseModel
+	AuthProvider string  `gorm:"type:varchar(50);not null;default:'keycloak'"`
+	Issuer       string  `gorm:"type:varchar(255);not null;uniqueIndex:ux_users_issuer_subject"`
+	Subject      string  `gorm:"type:varchar(255);not null;uniqueIndex:ux_users_issuer_subject"`
+	Username     *string `gorm:"type:varchar(150)"`
+	Email        *string `gorm:"type:varchar(320)"`
+	Name         *string `gorm:"type:varchar(255)"`
+	Picture      *string `gorm:"type:varchar(512)"`
+}
+
+// NewSchemaUser converts a domain user into a schema instance.
+func NewSchemaUser(u *user.User) *User {
+	if u == nil {
+		return nil
+	}
+
+	return &User{
+		BaseModel: BaseModel{
+			ID:        u.ID,
+			CreatedAt: u.CreatedAt,
+			UpdatedAt: u.UpdatedAt,
+		},
+		AuthProvider: u.AuthProvider,
+		Issuer:       u.Issuer,
+		Subject:      u.Subject,
+		Username:     u.Username,
+		Email:        u.Email,
+		Name:         u.Name,
+		Picture:      u.Picture,
+	}
+}
+
+// EtoD converts a schema user back to the domain representation.
+func (u *User) EtoD() *user.User {
+	if u == nil {
+		return nil
+	}
+
+	return &user.User{
+		ID:           u.ID,
+		AuthProvider: u.AuthProvider,
+		Issuer:       u.Issuer,
+		Subject:      u.Subject,
+		Username:     u.Username,
+		Email:        u.Email,
+		Name:         u.Name,
+		Picture:      u.Picture,
+		CreatedAt:    u.CreatedAt,
+		UpdatedAt:    u.UpdatedAt,
+	}
+}
diff --git a/services/llm-api/internal/infrastructure/database/dbschema/user_settings.go b/services/llm-api/internal/infrastructure/database/dbschema/user_settings.go
new file mode 100644
index 00000000..5fd22545
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/dbschema/user_settings.go
@@ -0,0 +1,114 @@
+package dbschema
+
+import (
+	"database/sql/driver"
+	"encoding/json"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/usersettings"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+)
+
+func init() {
+	database.RegisterSchemaForAutoMigrate(UserSettings{})
+}
+
+// UserSettings is the database schema for user_settings table.
+type UserSettings struct {
+	ID     uint `gorm:"primaryKey"`
+	UserID uint `gorm:"not null;uniqueIndex:ux_user_settings_user_id"`
+
+	// Other Feature Toggles
+	EnableTrace bool `gorm:"not null;default:false"`
+	EnableTools bool `gorm:"not null;default:true"`
+
+	// JSONB Settings Groups
+	MemoryConfig     MemoryConfigJSON     `gorm:"type:jsonb;serializer:json;not null"`
+	ProfileSettings  ProfileSettingsJSON  `gorm:"type:jsonb;serializer:json;not null"`
+	AdvancedSettings AdvancedSettingsJSON `gorm:"type:jsonb;serializer:json;not null"`
+
+	// Legacy Preferences - flexible JSON (deprecated)
+	Preferences JSONB `gorm:"type:jsonb;not null;default:'{}'"`
+
+	CreatedAt time.Time `gorm:"not null;default:now()"`
+	UpdatedAt time.Time `gorm:"not null;default:now()"`
+}
+
+// TableName specifies the table name for UserSettings.
+func (UserSettings) TableName() string {
+	return "llm_api.user_settings"
+}
+
+// JSONB is a custom type for JSONB columns.
+type JSONB map[string]interface{}
+
+// Value implements driver.Valuer interface for JSONB.
+func (j JSONB) Value() (driver.Value, error) {
+	if j == nil {
+		return "{}", nil
+	}
+	return json.Marshal(j)
+}
+
+// Scan implements sql.Scanner interface for JSONB.
+func (j *JSONB) Scan(value interface{}) error {
+	if value == nil {
+		*j = make(map[string]interface{})
+		return nil
+	}
+
+	bytes, ok := value.([]byte)
+	if !ok {
+		return nil
+	}
+
+	result := make(map[string]interface{})
+	if err := json.Unmarshal(bytes, &result); err != nil {
+		return err
+	}
+
+	*j = result
+	return nil
+}
+
+// Type aliases for JSONB settings groups
+type MemoryConfigJSON usersettings.MemoryConfig
+type ProfileSettingsJSON usersettings.ProfileSettings
+type AdvancedSettingsJSON usersettings.AdvancedSettings
+
+// EtoD converts entity (database schema) to domain model.
+func (e *UserSettings) EtoD() *usersettings.UserSettings {
+	return &usersettings.UserSettings{
+		ID:               e.ID,
+		UserID:           e.UserID,
+		EnableTrace:      e.EnableTrace,
+		EnableTools:      e.EnableTools,
+		MemoryConfig:     usersettings.MemoryConfig(e.MemoryConfig),
+		ProfileSettings:  usersettings.ProfileSettings(e.ProfileSettings),
+		AdvancedSettings: usersettings.AdvancedSettings(e.AdvancedSettings),
+		Preferences:      map[string]interface{}(e.Preferences),
+		CreatedAt:        e.CreatedAt,
+		UpdatedAt:        e.UpdatedAt,
+	}
+}
+
+// NewSchemaUserSettings converts domain model to entity (database schema).
+func NewSchemaUserSettings(d *usersettings.UserSettings) *UserSettings {
+	prefs := JSONB(d.Preferences)
+	if prefs == nil {
+		prefs = make(JSONB)
+	}
+
+	return &UserSettings{
+		ID:               d.ID,
+		UserID:           d.UserID,
+		EnableTrace:      d.EnableTrace,
+		EnableTools:      d.EnableTools,
+		MemoryConfig:     MemoryConfigJSON(d.MemoryConfig),
+		ProfileSettings:  ProfileSettingsJSON(d.ProfileSettings),
+		AdvancedSettings: AdvancedSettingsJSON(d.AdvancedSettings),
+		Preferences:      prefs,
+		CreatedAt:        d.CreatedAt,
+		UpdatedAt:        d.UpdatedAt,
+	}
+}
diff --git a/services/llm-api/internal/infrastructure/database/gormgen/conversation_branches.gen.go b/services/llm-api/internal/infrastructure/database/gormgen/conversation_branches.gen.go
new file mode 100644
index 00000000..ade8f98d
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/gormgen/conversation_branches.gen.go
@@ -0,0 +1,561 @@
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+
+package gormgen
+
+import (
+	"context"
+	"database/sql"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+	"gorm.io/gorm/schema"
+
+	"gorm.io/gen"
+	"gorm.io/gen/field"
+
+	"gorm.io/plugin/dbresolver"
+
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+)
+
+func newConversationBranch(db *gorm.DB, opts ...gen.DOOption) conversationBranch {
+	_conversationBranch := conversationBranch{}
+
+	_conversationBranch.conversationBranchDo.UseDB(db, opts...)
+	_conversationBranch.conversationBranchDo.UseModel(&dbschema.ConversationBranch{})
+
+	tableName := _conversationBranch.conversationBranchDo.TableName()
+	_conversationBranch.ALL = field.NewAsterisk(tableName)
+	_conversationBranch.ID = field.NewUint(tableName, "id")
+	_conversationBranch.CreatedAt = field.NewTime(tableName, "created_at")
+	_conversationBranch.UpdatedAt = field.NewTime(tableName, "updated_at")
+	_conversationBranch.DeletedAt = field.NewField(tableName, "deleted_at")
+	_conversationBranch.ConversationID = field.NewUint(tableName, "conversation_id")
+	_conversationBranch.Name = field.NewString(tableName, "name")
+	_conversationBranch.Description = field.NewString(tableName, "description")
+	_conversationBranch.ParentBranch = field.NewString(tableName, "parent_branch")
+	_conversationBranch.ForkedAt = field.NewTime(tableName, "forked_at")
+	_conversationBranch.ForkedFromItemID = field.NewString(tableName, "forked_from_item_id")
+	_conversationBranch.ItemCount = field.NewInt(tableName, "item_count")
+	_conversationBranch.Conversation = conversationBranchBelongsToConversation{
+		db: db.Session(&gorm.Session{}),
+
+		RelationField: field.NewRelation("Conversation", "dbschema.Conversation"),
+		User: struct {
+			field.RelationField
+		}{
+			RelationField: field.NewRelation("Conversation.User", "dbschema.User"),
+		},
+		Items: struct {
+			field.RelationField
+			Conversation struct {
+				field.RelationField
+			}
+		}{
+			RelationField: field.NewRelation("Conversation.Items", "dbschema.ConversationItem"),
+			Conversation: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Conversation.Items.Conversation", "dbschema.Conversation"),
+			},
+		},
+		Branches: struct {
+			field.RelationField
+			Conversation struct {
+				field.RelationField
+			}
+		}{
+			RelationField: field.NewRelation("Conversation.Branches", "dbschema.ConversationBranch"),
+			Conversation: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Conversation.Branches.Conversation", "dbschema.Conversation"),
+			},
+		},
+	}
+
+	_conversationBranch.fillFieldMap()
+
+	return _conversationBranch
+}
+
+type conversationBranch struct {
+	conversationBranchDo
+
+	ALL              field.Asterisk
+	ID               field.Uint
+	CreatedAt        field.Time
+	UpdatedAt        field.Time
+	DeletedAt        field.Field
+	ConversationID   field.Uint
+	Name             field.String
+	Description      field.String
+	ParentBranch     field.String
+	ForkedAt         field.Time
+	ForkedFromItemID field.String
+	ItemCount        field.Int
+	Conversation     conversationBranchBelongsToConversation
+
+	fieldMap map[string]field.Expr
+}
+
+func (c conversationBranch) Table(newTableName string) *conversationBranch {
+	c.conversationBranchDo.UseTable(newTableName)
+	return c.updateTableName(newTableName)
+}
+
+func (c conversationBranch) As(alias string) *conversationBranch {
+	c.conversationBranchDo.DO = *(c.conversationBranchDo.As(alias).(*gen.DO))
+	return c.updateTableName(alias)
+}
+
+func (c *conversationBranch) updateTableName(table string) *conversationBranch {
+	c.ALL = field.NewAsterisk(table)
+	c.ID = field.NewUint(table, "id")
+	c.CreatedAt = field.NewTime(table, "created_at")
+	c.UpdatedAt = field.NewTime(table, "updated_at")
+	c.DeletedAt = field.NewField(table, "deleted_at")
+	c.ConversationID = field.NewUint(table, "conversation_id")
+	c.Name = field.NewString(table, "name")
+	c.Description = field.NewString(table, "description")
+	c.ParentBranch = field.NewString(table, "parent_branch")
+	c.ForkedAt = field.NewTime(table, "forked_at")
+	c.ForkedFromItemID = field.NewString(table, "forked_from_item_id")
+	c.ItemCount = field.NewInt(table, "item_count")
+
+	c.fillFieldMap()
+
+	return c
+}
+
+func (c *conversationBranch) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
+	_f, ok := c.fieldMap[fieldName]
+	if !ok || _f == nil {
+		return nil, false
+	}
+	_oe, ok := _f.(field.OrderExpr)
+	return _oe, ok
+}
+
+func (c *conversationBranch) fillFieldMap() {
+	c.fieldMap = make(map[string]field.Expr, 12)
+	c.fieldMap["id"] = c.ID
+	c.fieldMap["created_at"] = c.CreatedAt
+	c.fieldMap["updated_at"] = c.UpdatedAt
+	c.fieldMap["deleted_at"] = c.DeletedAt
+	c.fieldMap["conversation_id"] = c.ConversationID
+	c.fieldMap["name"] = c.Name
+	c.fieldMap["description"] = c.Description
+	c.fieldMap["parent_branch"] = c.ParentBranch
+	c.fieldMap["forked_at"] = c.ForkedAt
+	c.fieldMap["forked_from_item_id"] = c.ForkedFromItemID
+	c.fieldMap["item_count"] = c.ItemCount
+
+}
+
+func (c conversationBranch) clone(db *gorm.DB) conversationBranch {
+	c.conversationBranchDo.ReplaceConnPool(db.Statement.ConnPool)
+	c.Conversation.db = db.Session(&gorm.Session{Initialized: true})
+	c.Conversation.db.Statement.ConnPool = db.Statement.ConnPool
+	return c
+}
+
+func (c conversationBranch) replaceDB(db *gorm.DB) conversationBranch {
+	c.conversationBranchDo.ReplaceDB(db)
+	c.Conversation.db = db.Session(&gorm.Session{})
+	return c
+}
+
+type conversationBranchBelongsToConversation struct {
+	db *gorm.DB
+
+	field.RelationField
+
+	User struct {
+		field.RelationField
+	}
+	Items struct {
+		field.RelationField
+		Conversation struct {
+			field.RelationField
+		}
+	}
+	Branches struct {
+		field.RelationField
+		Conversation struct {
+			field.RelationField
+		}
+	}
+}
+
+func (a conversationBranchBelongsToConversation) Where(conds ...field.Expr) *conversationBranchBelongsToConversation {
+	if len(conds) == 0 {
+		return &a
+	}
+
+	exprs := make([]clause.Expression, 0, len(conds))
+	for _, cond := range conds {
+		exprs = append(exprs, cond.BeCond().(clause.Expression))
+	}
+	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
+	return &a
+}
+
+func (a conversationBranchBelongsToConversation) WithContext(ctx context.Context) *conversationBranchBelongsToConversation {
+	a.db = a.db.WithContext(ctx)
+	return &a
+}
+
+func (a conversationBranchBelongsToConversation) Session(session *gorm.Session) *conversationBranchBelongsToConversation {
+	a.db = a.db.Session(session)
+	return &a
+}
+
+func (a conversationBranchBelongsToConversation) Model(m *dbschema.ConversationBranch) *conversationBranchBelongsToConversationTx {
+	return &conversationBranchBelongsToConversationTx{a.db.Model(m).Association(a.Name())}
+}
+
+func (a conversationBranchBelongsToConversation) Unscoped() *conversationBranchBelongsToConversation {
+	a.db = a.db.Unscoped()
+	return &a
+}
+
+type conversationBranchBelongsToConversationTx struct{ tx *gorm.Association }
+
+func (a conversationBranchBelongsToConversationTx) Find() (result *dbschema.Conversation, err error) {
+	return result, a.tx.Find(&result)
+}
+
+func (a conversationBranchBelongsToConversationTx) Append(values ...*dbschema.Conversation) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Append(targetValues...)
+}
+
+func (a conversationBranchBelongsToConversationTx) Replace(values ...*dbschema.Conversation) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Replace(targetValues...)
+}
+
+func (a conversationBranchBelongsToConversationTx) Delete(values ...*dbschema.Conversation) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Delete(targetValues...)
+}
+
+func (a conversationBranchBelongsToConversationTx) Clear() error {
+	return a.tx.Clear()
+}
+
+func (a conversationBranchBelongsToConversationTx) Count() int64 {
+	return a.tx.Count()
+}
+
+func (a conversationBranchBelongsToConversationTx) Unscoped() *conversationBranchBelongsToConversationTx {
+	a.tx = a.tx.Unscoped()
+	return &a
+}
+
+type conversationBranchDo struct{ gen.DO }
+
+type IConversationBranchDo interface {
+	gen.SubQuery
+	Debug() IConversationBranchDo
+	WithContext(ctx context.Context) IConversationBranchDo
+	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
+	ReplaceDB(db *gorm.DB)
+	ReadDB() IConversationBranchDo
+	WriteDB() IConversationBranchDo
+	As(alias string) gen.Dao
+	Session(config *gorm.Session) IConversationBranchDo
+	Columns(cols ...field.Expr) gen.Columns
+	Clauses(conds ...clause.Expression) IConversationBranchDo
+	Not(conds ...gen.Condition) IConversationBranchDo
+	Or(conds ...gen.Condition) IConversationBranchDo
+	Select(conds ...field.Expr) IConversationBranchDo
+	Where(conds ...gen.Condition) IConversationBranchDo
+	Order(conds ...field.Expr) IConversationBranchDo
+	Distinct(cols ...field.Expr) IConversationBranchDo
+	Omit(cols ...field.Expr) IConversationBranchDo
+	Join(table schema.Tabler, on ...field.Expr) IConversationBranchDo
+	LeftJoin(table schema.Tabler, on ...field.Expr) IConversationBranchDo
+	RightJoin(table schema.Tabler, on ...field.Expr) IConversationBranchDo
+	Group(cols ...field.Expr) IConversationBranchDo
+	Having(conds ...gen.Condition) IConversationBranchDo
+	Limit(limit int) IConversationBranchDo
+	Offset(offset int) IConversationBranchDo
+	Count() (count int64, err error)
+	Scopes(funcs ...func(gen.Dao) gen.Dao) IConversationBranchDo
+	Unscoped() IConversationBranchDo
+	Create(values ...*dbschema.ConversationBranch) error
+	CreateInBatches(values []*dbschema.ConversationBranch, batchSize int) error
+	Save(values ...*dbschema.ConversationBranch) error
+	First() (*dbschema.ConversationBranch, error)
+	Take() (*dbschema.ConversationBranch, error)
+	Last() (*dbschema.ConversationBranch, error)
+	Find() ([]*dbschema.ConversationBranch, error)
+	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ConversationBranch, err error)
+	FindInBatches(result *[]*dbschema.ConversationBranch, batchSize int, fc func(tx gen.Dao, batch int) error) error
+	Pluck(column field.Expr, dest interface{}) error
+	Delete(...*dbschema.ConversationBranch) (info gen.ResultInfo, err error)
+	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	Updates(value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
+	UpdateFrom(q gen.SubQuery) gen.Dao
+	Attrs(attrs ...field.AssignExpr) IConversationBranchDo
+	Assign(attrs ...field.AssignExpr) IConversationBranchDo
+	Joins(fields ...field.RelationField) IConversationBranchDo
+	Preload(fields ...field.RelationField) IConversationBranchDo
+	FirstOrInit() (*dbschema.ConversationBranch, error)
+	FirstOrCreate() (*dbschema.ConversationBranch, error)
+	FindByPage(offset int, limit int) (result []*dbschema.ConversationBranch, count int64, err error)
+	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
+	Rows() (*sql.Rows, error)
+	Row() *sql.Row
+	Scan(result interface{}) (err error)
+	Returning(value interface{}, columns ...string) IConversationBranchDo
+	UnderlyingDB() *gorm.DB
+	schema.Tabler
+}
+
+func (c conversationBranchDo) Debug() IConversationBranchDo {
+	return c.withDO(c.DO.Debug())
+}
+
+func (c conversationBranchDo) WithContext(ctx context.Context) IConversationBranchDo {
+	return c.withDO(c.DO.WithContext(ctx))
+}
+
+func (c conversationBranchDo) ReadDB() IConversationBranchDo {
+	return c.Clauses(dbresolver.Read)
+}
+
+func (c conversationBranchDo) WriteDB() IConversationBranchDo {
+	return c.Clauses(dbresolver.Write)
+}
+
+func (c conversationBranchDo) Session(config *gorm.Session) IConversationBranchDo {
+	return c.withDO(c.DO.Session(config))
+}
+
+func (c conversationBranchDo) Clauses(conds ...clause.Expression) IConversationBranchDo {
+	return c.withDO(c.DO.Clauses(conds...))
+}
+
+func (c conversationBranchDo) Returning(value interface{}, columns ...string) IConversationBranchDo {
+	return c.withDO(c.DO.Returning(value, columns...))
+}
+
+func (c conversationBranchDo) Not(conds ...gen.Condition) IConversationBranchDo {
+	return c.withDO(c.DO.Not(conds...))
+}
+
+func (c conversationBranchDo) Or(conds ...gen.Condition) IConversationBranchDo {
+	return c.withDO(c.DO.Or(conds...))
+}
+
+func (c conversationBranchDo) Select(conds ...field.Expr) IConversationBranchDo {
+	return c.withDO(c.DO.Select(conds...))
+}
+
+func (c conversationBranchDo) Where(conds ...gen.Condition) IConversationBranchDo {
+	return c.withDO(c.DO.Where(conds...))
+}
+
+func (c conversationBranchDo) Order(conds ...field.Expr) IConversationBranchDo {
+	return c.withDO(c.DO.Order(conds...))
+}
+
+func (c conversationBranchDo) Distinct(cols ...field.Expr) IConversationBranchDo {
+	return c.withDO(c.DO.Distinct(cols...))
+}
+
+func (c conversationBranchDo) Omit(cols ...field.Expr) IConversationBranchDo {
+	return c.withDO(c.DO.Omit(cols...))
+}
+
+func (c conversationBranchDo) Join(table schema.Tabler, on ...field.Expr) IConversationBranchDo {
+	return c.withDO(c.DO.Join(table, on...))
+}
+
+func (c conversationBranchDo) LeftJoin(table schema.Tabler, on ...field.Expr) IConversationBranchDo {
+	return c.withDO(c.DO.LeftJoin(table, on...))
+}
+
+func (c conversationBranchDo) RightJoin(table schema.Tabler, on ...field.Expr) IConversationBranchDo {
+	return c.withDO(c.DO.RightJoin(table, on...))
+}
+
+func (c conversationBranchDo) Group(cols ...field.Expr) IConversationBranchDo {
+	return c.withDO(c.DO.Group(cols...))
+}
+
+func (c conversationBranchDo) Having(conds ...gen.Condition) IConversationBranchDo {
+	return c.withDO(c.DO.Having(conds...))
+}
+
+func (c conversationBranchDo) Limit(limit int) IConversationBranchDo {
+	return c.withDO(c.DO.Limit(limit))
+}
+
+func (c conversationBranchDo) Offset(offset int) IConversationBranchDo {
+	return c.withDO(c.DO.Offset(offset))
+}
+
+func (c conversationBranchDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IConversationBranchDo {
+	return c.withDO(c.DO.Scopes(funcs...))
+}
+
+func (c conversationBranchDo) Unscoped() IConversationBranchDo {
+	return c.withDO(c.DO.Unscoped())
+}
+
+func (c conversationBranchDo) Create(values ...*dbschema.ConversationBranch) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return c.DO.Create(values)
+}
+
+func (c conversationBranchDo) CreateInBatches(values []*dbschema.ConversationBranch, batchSize int) error {
+	return c.DO.CreateInBatches(values, batchSize)
+}
+
+// Save : !!! underlying implementation is different with GORM
+// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
+func (c conversationBranchDo) Save(values ...*dbschema.ConversationBranch) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return c.DO.Save(values)
+}
+
+func (c conversationBranchDo) First() (*dbschema.ConversationBranch, error) {
+	if result, err := c.DO.First(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationBranch), nil
+	}
+}
+
+func (c conversationBranchDo) Take() (*dbschema.ConversationBranch, error) {
+	if result, err := c.DO.Take(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationBranch), nil
+	}
+}
+
+func (c conversationBranchDo) Last() (*dbschema.ConversationBranch, error) {
+	if result, err := c.DO.Last(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationBranch), nil
+	}
+}
+
+func (c conversationBranchDo) Find() ([]*dbschema.ConversationBranch, error) {
+	result, err := c.DO.Find()
+	return result.([]*dbschema.ConversationBranch), err
+}
+
+func (c conversationBranchDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ConversationBranch, err error) {
+	buf := make([]*dbschema.ConversationBranch, 0, batchSize)
+	err = c.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
+		defer func() { results = append(results, buf...) }()
+		return fc(tx, batch)
+	})
+	return results, err
+}
+
+func (c conversationBranchDo) FindInBatches(result *[]*dbschema.ConversationBranch, batchSize int, fc func(tx gen.Dao, batch int) error) error {
+	return c.DO.FindInBatches(result, batchSize, fc)
+}
+
+func (c conversationBranchDo) Attrs(attrs ...field.AssignExpr) IConversationBranchDo {
+	return c.withDO(c.DO.Attrs(attrs...))
+}
+
+func (c conversationBranchDo) Assign(attrs ...field.AssignExpr) IConversationBranchDo {
+	return c.withDO(c.DO.Assign(attrs...))
+}
+
+func (c conversationBranchDo) Joins(fields ...field.RelationField) IConversationBranchDo {
+	for _, _f := range fields {
+		c = *c.withDO(c.DO.Joins(_f))
+	}
+	return &c
+}
+
+func (c conversationBranchDo) Preload(fields ...field.RelationField) IConversationBranchDo {
+	for _, _f := range fields {
+		c = *c.withDO(c.DO.Preload(_f))
+	}
+	return &c
+}
+
+func (c conversationBranchDo) FirstOrInit() (*dbschema.ConversationBranch, error) {
+	if result, err := c.DO.FirstOrInit(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationBranch), nil
+	}
+}
+
+func (c conversationBranchDo) FirstOrCreate() (*dbschema.ConversationBranch, error) {
+	if result, err := c.DO.FirstOrCreate(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationBranch), nil
+	}
+}
+
+func (c conversationBranchDo) FindByPage(offset int, limit int) (result []*dbschema.ConversationBranch, count int64, err error) {
+	result, err = c.Offset(offset).Limit(limit).Find()
+	if err != nil {
+		return
+	}
+
+	if size := len(result); 0 < limit && 0 < size && size < limit {
+		count = int64(size + offset)
+		return
+	}
+
+	count, err = c.Offset(-1).Limit(-1).Count()
+	return
+}
+
+func (c conversationBranchDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
+	count, err = c.Count()
+	if err != nil {
+		return
+	}
+
+	err = c.Offset(offset).Limit(limit).Scan(result)
+	return
+}
+
+func (c conversationBranchDo) Scan(result interface{}) (err error) {
+	return c.DO.Scan(result)
+}
+
+func (c conversationBranchDo) Delete(models ...*dbschema.ConversationBranch) (result gen.ResultInfo, err error) {
+	return c.DO.Delete(models)
+}
+
+func (c *conversationBranchDo) withDO(do gen.Dao) *conversationBranchDo {
+	c.DO = *do.(*gen.DO)
+	return c
+}
diff --git a/services/llm-api/internal/infrastructure/database/gormgen/conversation_items.gen.go b/services/llm-api/internal/infrastructure/database/gormgen/conversation_items.gen.go
new file mode 100644
index 00000000..1d9ccd3a
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/gormgen/conversation_items.gen.go
@@ -0,0 +1,597 @@
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+
+package gormgen
+
+import (
+	"context"
+	"database/sql"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+	"gorm.io/gorm/schema"
+
+	"gorm.io/gen"
+	"gorm.io/gen/field"
+
+	"gorm.io/plugin/dbresolver"
+
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+)
+
+func newConversationItem(db *gorm.DB, opts ...gen.DOOption) conversationItem {
+	_conversationItem := conversationItem{}
+
+	_conversationItem.conversationItemDo.UseDB(db, opts...)
+	_conversationItem.conversationItemDo.UseModel(&dbschema.ConversationItem{})
+
+	tableName := _conversationItem.conversationItemDo.TableName()
+	_conversationItem.ALL = field.NewAsterisk(tableName)
+	_conversationItem.ID = field.NewUint(tableName, "id")
+	_conversationItem.CreatedAt = field.NewTime(tableName, "created_at")
+	_conversationItem.UpdatedAt = field.NewTime(tableName, "updated_at")
+	_conversationItem.DeletedAt = field.NewField(tableName, "deleted_at")
+	_conversationItem.ConversationID = field.NewUint(tableName, "conversation_id")
+	_conversationItem.PublicID = field.NewString(tableName, "public_id")
+	_conversationItem.Object = field.NewString(tableName, "object")
+	_conversationItem.Branch = field.NewString(tableName, "branch")
+	_conversationItem.SequenceNumber = field.NewInt(tableName, "sequence_number")
+	_conversationItem.Type = field.NewString(tableName, "type")
+	_conversationItem.Role = field.NewString(tableName, "role")
+	_conversationItem.Content = field.NewField(tableName, "content")
+	_conversationItem.Status = field.NewString(tableName, "status")
+	_conversationItem.IncompleteAt = field.NewTime(tableName, "incomplete_at")
+	_conversationItem.IncompleteDetails = field.NewField(tableName, "incomplete_details")
+	_conversationItem.CompletedAt = field.NewTime(tableName, "completed_at")
+	_conversationItem.ResponseID = field.NewUint(tableName, "response_id")
+	_conversationItem.Rating = field.NewString(tableName, "rating")
+	_conversationItem.RatedAt = field.NewTime(tableName, "rated_at")
+	_conversationItem.RatingComment = field.NewString(tableName, "rating_comment")
+	_conversationItem.Conversation = conversationItemBelongsToConversation{
+		db: db.Session(&gorm.Session{}),
+
+		RelationField: field.NewRelation("Conversation", "dbschema.Conversation"),
+		User: struct {
+			field.RelationField
+		}{
+			RelationField: field.NewRelation("Conversation.User", "dbschema.User"),
+		},
+		Items: struct {
+			field.RelationField
+			Conversation struct {
+				field.RelationField
+			}
+		}{
+			RelationField: field.NewRelation("Conversation.Items", "dbschema.ConversationItem"),
+			Conversation: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Conversation.Items.Conversation", "dbschema.Conversation"),
+			},
+		},
+		Branches: struct {
+			field.RelationField
+			Conversation struct {
+				field.RelationField
+			}
+		}{
+			RelationField: field.NewRelation("Conversation.Branches", "dbschema.ConversationBranch"),
+			Conversation: struct {
+				field.RelationField
+			}{
+				RelationField: field.NewRelation("Conversation.Branches.Conversation", "dbschema.Conversation"),
+			},
+		},
+	}
+
+	_conversationItem.fillFieldMap()
+
+	return _conversationItem
+}
+
+type conversationItem struct {
+	conversationItemDo
+
+	ALL               field.Asterisk
+	ID                field.Uint
+	CreatedAt         field.Time
+	UpdatedAt         field.Time
+	DeletedAt         field.Field
+	ConversationID    field.Uint
+	PublicID          field.String
+	Object            field.String
+	Branch            field.String
+	SequenceNumber    field.Int
+	Type              field.String
+	Role              field.String
+	Content           field.Field
+	Status            field.String
+	IncompleteAt      field.Time
+	IncompleteDetails field.Field
+	CompletedAt       field.Time
+	ResponseID        field.Uint
+	Rating            field.String
+	RatedAt           field.Time
+	RatingComment     field.String
+	Conversation      conversationItemBelongsToConversation
+
+	fieldMap map[string]field.Expr
+}
+
+func (c conversationItem) Table(newTableName string) *conversationItem {
+	c.conversationItemDo.UseTable(newTableName)
+	return c.updateTableName(newTableName)
+}
+
+func (c conversationItem) As(alias string) *conversationItem {
+	c.conversationItemDo.DO = *(c.conversationItemDo.As(alias).(*gen.DO))
+	return c.updateTableName(alias)
+}
+
+func (c *conversationItem) updateTableName(table string) *conversationItem {
+	c.ALL = field.NewAsterisk(table)
+	c.ID = field.NewUint(table, "id")
+	c.CreatedAt = field.NewTime(table, "created_at")
+	c.UpdatedAt = field.NewTime(table, "updated_at")
+	c.DeletedAt = field.NewField(table, "deleted_at")
+	c.ConversationID = field.NewUint(table, "conversation_id")
+	c.PublicID = field.NewString(table, "public_id")
+	c.Object = field.NewString(table, "object")
+	c.Branch = field.NewString(table, "branch")
+	c.SequenceNumber = field.NewInt(table, "sequence_number")
+	c.Type = field.NewString(table, "type")
+	c.Role = field.NewString(table, "role")
+	c.Content = field.NewField(table, "content")
+	c.Status = field.NewString(table, "status")
+	c.IncompleteAt = field.NewTime(table, "incomplete_at")
+	c.IncompleteDetails = field.NewField(table, "incomplete_details")
+	c.CompletedAt = field.NewTime(table, "completed_at")
+	c.ResponseID = field.NewUint(table, "response_id")
+	c.Rating = field.NewString(table, "rating")
+	c.RatedAt = field.NewTime(table, "rated_at")
+	c.RatingComment = field.NewString(table, "rating_comment")
+
+	c.fillFieldMap()
+
+	return c
+}
+
+func (c *conversationItem) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
+	_f, ok := c.fieldMap[fieldName]
+	if !ok || _f == nil {
+		return nil, false
+	}
+	_oe, ok := _f.(field.OrderExpr)
+	return _oe, ok
+}
+
+func (c *conversationItem) fillFieldMap() {
+	c.fieldMap = make(map[string]field.Expr, 21)
+	c.fieldMap["id"] = c.ID
+	c.fieldMap["created_at"] = c.CreatedAt
+	c.fieldMap["updated_at"] = c.UpdatedAt
+	c.fieldMap["deleted_at"] = c.DeletedAt
+	c.fieldMap["conversation_id"] = c.ConversationID
+	c.fieldMap["public_id"] = c.PublicID
+	c.fieldMap["object"] = c.Object
+	c.fieldMap["branch"] = c.Branch
+	c.fieldMap["sequence_number"] = c.SequenceNumber
+	c.fieldMap["type"] = c.Type
+	c.fieldMap["role"] = c.Role
+	c.fieldMap["content"] = c.Content
+	c.fieldMap["status"] = c.Status
+	c.fieldMap["incomplete_at"] = c.IncompleteAt
+	c.fieldMap["incomplete_details"] = c.IncompleteDetails
+	c.fieldMap["completed_at"] = c.CompletedAt
+	c.fieldMap["response_id"] = c.ResponseID
+	c.fieldMap["rating"] = c.Rating
+	c.fieldMap["rated_at"] = c.RatedAt
+	c.fieldMap["rating_comment"] = c.RatingComment
+
+}
+
+func (c conversationItem) clone(db *gorm.DB) conversationItem {
+	c.conversationItemDo.ReplaceConnPool(db.Statement.ConnPool)
+	c.Conversation.db = db.Session(&gorm.Session{Initialized: true})
+	c.Conversation.db.Statement.ConnPool = db.Statement.ConnPool
+	return c
+}
+
+func (c conversationItem) replaceDB(db *gorm.DB) conversationItem {
+	c.conversationItemDo.ReplaceDB(db)
+	c.Conversation.db = db.Session(&gorm.Session{})
+	return c
+}
+
+type conversationItemBelongsToConversation struct {
+	db *gorm.DB
+
+	field.RelationField
+
+	User struct {
+		field.RelationField
+	}
+	Items struct {
+		field.RelationField
+		Conversation struct {
+			field.RelationField
+		}
+	}
+	Branches struct {
+		field.RelationField
+		Conversation struct {
+			field.RelationField
+		}
+	}
+}
+
+func (a conversationItemBelongsToConversation) Where(conds ...field.Expr) *conversationItemBelongsToConversation {
+	if len(conds) == 0 {
+		return &a
+	}
+
+	exprs := make([]clause.Expression, 0, len(conds))
+	for _, cond := range conds {
+		exprs = append(exprs, cond.BeCond().(clause.Expression))
+	}
+	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
+	return &a
+}
+
+func (a conversationItemBelongsToConversation) WithContext(ctx context.Context) *conversationItemBelongsToConversation {
+	a.db = a.db.WithContext(ctx)
+	return &a
+}
+
+func (a conversationItemBelongsToConversation) Session(session *gorm.Session) *conversationItemBelongsToConversation {
+	a.db = a.db.Session(session)
+	return &a
+}
+
+func (a conversationItemBelongsToConversation) Model(m *dbschema.ConversationItem) *conversationItemBelongsToConversationTx {
+	return &conversationItemBelongsToConversationTx{a.db.Model(m).Association(a.Name())}
+}
+
+func (a conversationItemBelongsToConversation) Unscoped() *conversationItemBelongsToConversation {
+	a.db = a.db.Unscoped()
+	return &a
+}
+
+type conversationItemBelongsToConversationTx struct{ tx *gorm.Association }
+
+func (a conversationItemBelongsToConversationTx) Find() (result *dbschema.Conversation, err error) {
+	return result, a.tx.Find(&result)
+}
+
+func (a conversationItemBelongsToConversationTx) Append(values ...*dbschema.Conversation) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Append(targetValues...)
+}
+
+func (a conversationItemBelongsToConversationTx) Replace(values ...*dbschema.Conversation) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Replace(targetValues...)
+}
+
+func (a conversationItemBelongsToConversationTx) Delete(values ...*dbschema.Conversation) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Delete(targetValues...)
+}
+
+func (a conversationItemBelongsToConversationTx) Clear() error {
+	return a.tx.Clear()
+}
+
+func (a conversationItemBelongsToConversationTx) Count() int64 {
+	return a.tx.Count()
+}
+
+func (a conversationItemBelongsToConversationTx) Unscoped() *conversationItemBelongsToConversationTx {
+	a.tx = a.tx.Unscoped()
+	return &a
+}
+
+type conversationItemDo struct{ gen.DO }
+
+type IConversationItemDo interface {
+	gen.SubQuery
+	Debug() IConversationItemDo
+	WithContext(ctx context.Context) IConversationItemDo
+	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
+	ReplaceDB(db *gorm.DB)
+	ReadDB() IConversationItemDo
+	WriteDB() IConversationItemDo
+	As(alias string) gen.Dao
+	Session(config *gorm.Session) IConversationItemDo
+	Columns(cols ...field.Expr) gen.Columns
+	Clauses(conds ...clause.Expression) IConversationItemDo
+	Not(conds ...gen.Condition) IConversationItemDo
+	Or(conds ...gen.Condition) IConversationItemDo
+	Select(conds ...field.Expr) IConversationItemDo
+	Where(conds ...gen.Condition) IConversationItemDo
+	Order(conds ...field.Expr) IConversationItemDo
+	Distinct(cols ...field.Expr) IConversationItemDo
+	Omit(cols ...field.Expr) IConversationItemDo
+	Join(table schema.Tabler, on ...field.Expr) IConversationItemDo
+	LeftJoin(table schema.Tabler, on ...field.Expr) IConversationItemDo
+	RightJoin(table schema.Tabler, on ...field.Expr) IConversationItemDo
+	Group(cols ...field.Expr) IConversationItemDo
+	Having(conds ...gen.Condition) IConversationItemDo
+	Limit(limit int) IConversationItemDo
+	Offset(offset int) IConversationItemDo
+	Count() (count int64, err error)
+	Scopes(funcs ...func(gen.Dao) gen.Dao) IConversationItemDo
+	Unscoped() IConversationItemDo
+	Create(values ...*dbschema.ConversationItem) error
+	CreateInBatches(values []*dbschema.ConversationItem, batchSize int) error
+	Save(values ...*dbschema.ConversationItem) error
+	First() (*dbschema.ConversationItem, error)
+	Take() (*dbschema.ConversationItem, error)
+	Last() (*dbschema.ConversationItem, error)
+	Find() ([]*dbschema.ConversationItem, error)
+	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ConversationItem, err error)
+	FindInBatches(result *[]*dbschema.ConversationItem, batchSize int, fc func(tx gen.Dao, batch int) error) error
+	Pluck(column field.Expr, dest interface{}) error
+	Delete(...*dbschema.ConversationItem) (info gen.ResultInfo, err error)
+	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	Updates(value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
+	UpdateFrom(q gen.SubQuery) gen.Dao
+	Attrs(attrs ...field.AssignExpr) IConversationItemDo
+	Assign(attrs ...field.AssignExpr) IConversationItemDo
+	Joins(fields ...field.RelationField) IConversationItemDo
+	Preload(fields ...field.RelationField) IConversationItemDo
+	FirstOrInit() (*dbschema.ConversationItem, error)
+	FirstOrCreate() (*dbschema.ConversationItem, error)
+	FindByPage(offset int, limit int) (result []*dbschema.ConversationItem, count int64, err error)
+	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
+	Rows() (*sql.Rows, error)
+	Row() *sql.Row
+	Scan(result interface{}) (err error)
+	Returning(value interface{}, columns ...string) IConversationItemDo
+	UnderlyingDB() *gorm.DB
+	schema.Tabler
+}
+
+func (c conversationItemDo) Debug() IConversationItemDo {
+	return c.withDO(c.DO.Debug())
+}
+
+func (c conversationItemDo) WithContext(ctx context.Context) IConversationItemDo {
+	return c.withDO(c.DO.WithContext(ctx))
+}
+
+func (c conversationItemDo) ReadDB() IConversationItemDo {
+	return c.Clauses(dbresolver.Read)
+}
+
+func (c conversationItemDo) WriteDB() IConversationItemDo {
+	return c.Clauses(dbresolver.Write)
+}
+
+func (c conversationItemDo) Session(config *gorm.Session) IConversationItemDo {
+	return c.withDO(c.DO.Session(config))
+}
+
+func (c conversationItemDo) Clauses(conds ...clause.Expression) IConversationItemDo {
+	return c.withDO(c.DO.Clauses(conds...))
+}
+
+func (c conversationItemDo) Returning(value interface{}, columns ...string) IConversationItemDo {
+	return c.withDO(c.DO.Returning(value, columns...))
+}
+
+func (c conversationItemDo) Not(conds ...gen.Condition) IConversationItemDo {
+	return c.withDO(c.DO.Not(conds...))
+}
+
+func (c conversationItemDo) Or(conds ...gen.Condition) IConversationItemDo {
+	return c.withDO(c.DO.Or(conds...))
+}
+
+func (c conversationItemDo) Select(conds ...field.Expr) IConversationItemDo {
+	return c.withDO(c.DO.Select(conds...))
+}
+
+func (c conversationItemDo) Where(conds ...gen.Condition) IConversationItemDo {
+	return c.withDO(c.DO.Where(conds...))
+}
+
+func (c conversationItemDo) Order(conds ...field.Expr) IConversationItemDo {
+	return c.withDO(c.DO.Order(conds...))
+}
+
+func (c conversationItemDo) Distinct(cols ...field.Expr) IConversationItemDo {
+	return c.withDO(c.DO.Distinct(cols...))
+}
+
+func (c conversationItemDo) Omit(cols ...field.Expr) IConversationItemDo {
+	return c.withDO(c.DO.Omit(cols...))
+}
+
+func (c conversationItemDo) Join(table schema.Tabler, on ...field.Expr) IConversationItemDo {
+	return c.withDO(c.DO.Join(table, on...))
+}
+
+func (c conversationItemDo) LeftJoin(table schema.Tabler, on ...field.Expr) IConversationItemDo {
+	return c.withDO(c.DO.LeftJoin(table, on...))
+}
+
+func (c conversationItemDo) RightJoin(table schema.Tabler, on ...field.Expr) IConversationItemDo {
+	return c.withDO(c.DO.RightJoin(table, on...))
+}
+
+func (c conversationItemDo) Group(cols ...field.Expr) IConversationItemDo {
+	return c.withDO(c.DO.Group(cols...))
+}
+
+func (c conversationItemDo) Having(conds ...gen.Condition) IConversationItemDo {
+	return c.withDO(c.DO.Having(conds...))
+}
+
+func (c conversationItemDo) Limit(limit int) IConversationItemDo {
+	return c.withDO(c.DO.Limit(limit))
+}
+
+func (c conversationItemDo) Offset(offset int) IConversationItemDo {
+	return c.withDO(c.DO.Offset(offset))
+}
+
+func (c conversationItemDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IConversationItemDo {
+	return c.withDO(c.DO.Scopes(funcs...))
+}
+
+func (c conversationItemDo) Unscoped() IConversationItemDo {
+	return c.withDO(c.DO.Unscoped())
+}
+
+func (c conversationItemDo) Create(values ...*dbschema.ConversationItem) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return c.DO.Create(values)
+}
+
+func (c conversationItemDo) CreateInBatches(values []*dbschema.ConversationItem, batchSize int) error {
+	return c.DO.CreateInBatches(values, batchSize)
+}
+
+// Save : !!! underlying implementation is different with GORM
+// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
+func (c conversationItemDo) Save(values ...*dbschema.ConversationItem) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return c.DO.Save(values)
+}
+
+func (c conversationItemDo) First() (*dbschema.ConversationItem, error) {
+	if result, err := c.DO.First(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationItem), nil
+	}
+}
+
+func (c conversationItemDo) Take() (*dbschema.ConversationItem, error) {
+	if result, err := c.DO.Take(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationItem), nil
+	}
+}
+
+func (c conversationItemDo) Last() (*dbschema.ConversationItem, error) {
+	if result, err := c.DO.Last(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationItem), nil
+	}
+}
+
+func (c conversationItemDo) Find() ([]*dbschema.ConversationItem, error) {
+	result, err := c.DO.Find()
+	return result.([]*dbschema.ConversationItem), err
+}
+
+func (c conversationItemDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ConversationItem, err error) {
+	buf := make([]*dbschema.ConversationItem, 0, batchSize)
+	err = c.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
+		defer func() { results = append(results, buf...) }()
+		return fc(tx, batch)
+	})
+	return results, err
+}
+
+func (c conversationItemDo) FindInBatches(result *[]*dbschema.ConversationItem, batchSize int, fc func(tx gen.Dao, batch int) error) error {
+	return c.DO.FindInBatches(result, batchSize, fc)
+}
+
+func (c conversationItemDo) Attrs(attrs ...field.AssignExpr) IConversationItemDo {
+	return c.withDO(c.DO.Attrs(attrs...))
+}
+
+func (c conversationItemDo) Assign(attrs ...field.AssignExpr) IConversationItemDo {
+	return c.withDO(c.DO.Assign(attrs...))
+}
+
+func (c conversationItemDo) Joins(fields ...field.RelationField) IConversationItemDo {
+	for _, _f := range fields {
+		c = *c.withDO(c.DO.Joins(_f))
+	}
+	return &c
+}
+
+func (c conversationItemDo) Preload(fields ...field.RelationField) IConversationItemDo {
+	for _, _f := range fields {
+		c = *c.withDO(c.DO.Preload(_f))
+	}
+	return &c
+}
+
+func (c conversationItemDo) FirstOrInit() (*dbschema.ConversationItem, error) {
+	if result, err := c.DO.FirstOrInit(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationItem), nil
+	}
+}
+
+func (c conversationItemDo) FirstOrCreate() (*dbschema.ConversationItem, error) {
+	if result, err := c.DO.FirstOrCreate(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ConversationItem), nil
+	}
+}
+
+func (c conversationItemDo) FindByPage(offset int, limit int) (result []*dbschema.ConversationItem, count int64, err error) {
+	result, err = c.Offset(offset).Limit(limit).Find()
+	if err != nil {
+		return
+	}
+
+	if size := len(result); 0 < limit && 0 < size && size < limit {
+		count = int64(size + offset)
+		return
+	}
+
+	count, err = c.Offset(-1).Limit(-1).Count()
+	return
+}
+
+func (c conversationItemDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
+	count, err = c.Count()
+	if err != nil {
+		return
+	}
+
+	err = c.Offset(offset).Limit(limit).Scan(result)
+	return
+}
+
+func (c conversationItemDo) Scan(result interface{}) (err error) {
+	return c.DO.Scan(result)
+}
+
+func (c conversationItemDo) Delete(models ...*dbschema.ConversationItem) (result gen.ResultInfo, err error) {
+	return c.DO.Delete(models)
+}
+
+func (c *conversationItemDo) withDO(do gen.Dao) *conversationItemDo {
+	c.DO = *do.(*gen.DO)
+	return c
+}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/conversations.gen.go b/services/llm-api/internal/infrastructure/database/gormgen/conversations.gen.go
similarity index 79%
rename from apps/jan-api-gateway/application/app/infrastructure/database/gormgen/conversations.gen.go
rename to services/llm-api/internal/infrastructure/database/gormgen/conversations.gen.go
index fdf8fb89..480557f2 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/conversations.gen.go
+++ b/services/llm-api/internal/infrastructure/database/gormgen/conversations.gen.go
@@ -17,7 +17,7 @@ import (
 
 	"gorm.io/plugin/dbresolver"
 
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
 )
 
 func newConversation(db *gorm.DB, opts ...gen.DOOption) conversation {
@@ -33,89 +33,66 @@ func newConversation(db *gorm.DB, opts ...gen.DOOption) conversation {
 	_conversation.UpdatedAt = field.NewTime(tableName, "updated_at")
 	_conversation.DeletedAt = field.NewField(tableName, "deleted_at")
 	_conversation.PublicID = field.NewString(tableName, "public_id")
+	_conversation.Object = field.NewString(tableName, "object")
 	_conversation.Title = field.NewString(tableName, "title")
 	_conversation.UserID = field.NewUint(tableName, "user_id")
 	_conversation.Status = field.NewString(tableName, "status")
-	_conversation.Metadata = field.NewString(tableName, "metadata")
+	_conversation.ActiveBranch = field.NewString(tableName, "active_branch")
+	_conversation.Referrer = field.NewString(tableName, "referrer")
+	_conversation.Metadata = field.NewField(tableName, "metadata")
 	_conversation.IsPrivate = field.NewBool(tableName, "is_private")
 	_conversation.Items = conversationHasManyItems{
 		db: db.Session(&gorm.Session{}),
 
-		RelationField: field.NewRelation("Items", "dbschema.Item"),
+		RelationField: field.NewRelation("Items", "dbschema.ConversationItem"),
 		Conversation: struct {
 			field.RelationField
 			User struct {
 				field.RelationField
-				Organizations struct {
-					field.RelationField
-				}
-				Projects struct {
-					field.RelationField
-				}
 			}
 			Items struct {
 				field.RelationField
 			}
-		}{
-			RelationField: field.NewRelation("Items.Conversation", "dbschema.Conversation"),
-			User: struct {
+			Branches struct {
 				field.RelationField
-				Organizations struct {
-					field.RelationField
-				}
-				Projects struct {
+				Conversation struct {
 					field.RelationField
 				}
-			}{
-				RelationField: field.NewRelation("Items.Conversation.User", "dbschema.User"),
-				Organizations: struct {
-					field.RelationField
-				}{
-					RelationField: field.NewRelation("Items.Conversation.User.Organizations", "dbschema.OrganizationMember"),
-				},
-				Projects: struct {
-					field.RelationField
-				}{
-					RelationField: field.NewRelation("Items.Conversation.User.Projects", "dbschema.ProjectMember"),
-				},
-			},
-			Items: struct {
-				field.RelationField
-			}{
-				RelationField: field.NewRelation("Items.Conversation.Items", "dbschema.Item"),
-			},
-		},
-		Response: struct {
-			field.RelationField
-			UserEntity struct {
-				field.RelationField
-			}
-			Conversation struct {
-				field.RelationField
-			}
-			Items struct {
-				field.RelationField
 			}
 		}{
-			RelationField: field.NewRelation("Items.Response", "dbschema.Response"),
-			UserEntity: struct {
+			RelationField: field.NewRelation("Items.Conversation", "dbschema.Conversation"),
+			User: struct {
 				field.RelationField
 			}{
-				RelationField: field.NewRelation("Items.Response.UserEntity", "dbschema.User"),
+				RelationField: field.NewRelation("Items.Conversation.User", "dbschema.User"),
 			},
-			Conversation: struct {
+			Items: struct {
 				field.RelationField
 			}{
-				RelationField: field.NewRelation("Items.Response.Conversation", "dbschema.Conversation"),
+				RelationField: field.NewRelation("Items.Conversation.Items", "dbschema.ConversationItem"),
 			},
-			Items: struct {
+			Branches: struct {
 				field.RelationField
+				Conversation struct {
+					field.RelationField
+				}
 			}{
-				RelationField: field.NewRelation("Items.Response.Items", "dbschema.Item"),
+				RelationField: field.NewRelation("Items.Conversation.Branches", "dbschema.ConversationBranch"),
+				Conversation: struct {
+					field.RelationField
+				}{
+					RelationField: field.NewRelation("Items.Conversation.Branches.Conversation", "dbschema.Conversation"),
+				},
 			},
 		},
 	}
 
+	_conversation.Branches = conversationHasManyBranches{
+		db: db.Session(&gorm.Session{}),
+
+		RelationField: field.NewRelation("Branches", "dbschema.ConversationBranch"),
+	}
+
 	_conversation.User = conversationBelongsToUser{
 		db: db.Session(&gorm.Session{}),
 
@@ -130,18 +107,23 @@ func newConversation(db *gorm.DB, opts ...gen.DOOption) conversation {
 type conversation struct {
 	conversationDo
 
-	ALL       field.Asterisk
-	ID        field.Uint
-	CreatedAt field.Time
-	UpdatedAt field.Time
-	DeletedAt field.Field
-	PublicID  field.String
-	Title     field.String
-	UserID    field.Uint
-	Status    field.String
-	Metadata  field.String
-	IsPrivate field.Bool
-	Items     conversationHasManyItems
+	ALL          field.Asterisk
+	ID           field.Uint
+	CreatedAt    field.Time
+	UpdatedAt    field.Time
+	DeletedAt    field.Field
+	PublicID     field.String
+	Object       field.String
+	Title        field.String
+	UserID       field.Uint
+	Status       field.String
+	ActiveBranch field.String
+	Referrer     field.String
+	Metadata     field.Field
+	IsPrivate    field.Bool
+	Items        conversationHasManyItems
+
+	Branches conversationHasManyBranches
 
 	User conversationBelongsToUser
 
@@ -165,10 +147,13 @@ func (c *conversation) updateTableName(table string) *conversation {
 	c.UpdatedAt = field.NewTime(table, "updated_at")
 	c.DeletedAt = field.NewField(table, "deleted_at")
 	c.PublicID = field.NewString(table, "public_id")
+	c.Object = field.NewString(table, "object")
 	c.Title = field.NewString(table, "title")
 	c.UserID = field.NewUint(table, "user_id")
 	c.Status = field.NewString(table, "status")
-	c.Metadata = field.NewString(table, "metadata")
+	c.ActiveBranch = field.NewString(table, "active_branch")
+	c.Referrer = field.NewString(table, "referrer")
+	c.Metadata = field.NewField(table, "metadata")
 	c.IsPrivate = field.NewBool(table, "is_private")
 
 	c.fillFieldMap()
@@ -186,15 +171,18 @@ func (c *conversation) GetFieldByName(fieldName string) (field.OrderExpr, bool)
 }
 
 func (c *conversation) fillFieldMap() {
-	c.fieldMap = make(map[string]field.Expr, 12)
+	c.fieldMap = make(map[string]field.Expr, 16)
 	c.fieldMap["id"] = c.ID
 	c.fieldMap["created_at"] = c.CreatedAt
 	c.fieldMap["updated_at"] = c.UpdatedAt
 	c.fieldMap["deleted_at"] = c.DeletedAt
 	c.fieldMap["public_id"] = c.PublicID
+	c.fieldMap["object"] = c.Object
 	c.fieldMap["title"] = c.Title
 	c.fieldMap["user_id"] = c.UserID
 	c.fieldMap["status"] = c.Status
+	c.fieldMap["active_branch"] = c.ActiveBranch
+	c.fieldMap["referrer"] = c.Referrer
 	c.fieldMap["metadata"] = c.Metadata
 	c.fieldMap["is_private"] = c.IsPrivate
 
@@ -204,6 +192,8 @@ func (c conversation) clone(db *gorm.DB) conversation {
 	c.conversationDo.ReplaceConnPool(db.Statement.ConnPool)
 	c.Items.db = db.Session(&gorm.Session{Initialized: true})
 	c.Items.db.Statement.ConnPool = db.Statement.ConnPool
+	c.Branches.db = db.Session(&gorm.Session{Initialized: true})
+	c.Branches.db.Statement.ConnPool = db.Statement.ConnPool
 	c.User.db = db.Session(&gorm.Session{Initialized: true})
 	c.User.db.Statement.ConnPool = db.Statement.ConnPool
 	return c
@@ -212,6 +202,7 @@ func (c conversation) clone(db *gorm.DB) conversation {
 func (c conversation) replaceDB(db *gorm.DB) conversation {
 	c.conversationDo.ReplaceDB(db)
 	c.Items.db = db.Session(&gorm.Session{})
+	c.Branches.db = db.Session(&gorm.Session{})
 	c.User.db = db.Session(&gorm.Session{})
 	return c
 }
@@ -225,27 +216,15 @@ type conversationHasManyItems struct {
 		field.RelationField
 		User struct {
 			field.RelationField
-			Organizations struct {
-				field.RelationField
-			}
-			Projects struct {
-				field.RelationField
-			}
 		}
 		Items struct {
 			field.RelationField
 		}
-	}
-	Response struct {
-		field.RelationField
-		UserEntity struct {
-			field.RelationField
-		}
-		Conversation struct {
-			field.RelationField
-		}
-		Items struct {
+		Branches struct {
 			field.RelationField
+			Conversation struct {
+				field.RelationField
+			}
 		}
 	}
 }
@@ -284,11 +263,11 @@ func (a conversationHasManyItems) Unscoped() *conversationHasManyItems {
 
 type conversationHasManyItemsTx struct{ tx *gorm.Association }
 
-func (a conversationHasManyItemsTx) Find() (result []*dbschema.Item, err error) {
+func (a conversationHasManyItemsTx) Find() (result []*dbschema.ConversationItem, err error) {
 	return result, a.tx.Find(&result)
 }
 
-func (a conversationHasManyItemsTx) Append(values ...*dbschema.Item) (err error) {
+func (a conversationHasManyItemsTx) Append(values ...*dbschema.ConversationItem) (err error) {
 	targetValues := make([]interface{}, len(values))
 	for i, v := range values {
 		targetValues[i] = v
@@ -296,7 +275,7 @@ func (a conversationHasManyItemsTx) Append(values ...*dbschema.Item) (err error)
 	return a.tx.Append(targetValues...)
 }
 
-func (a conversationHasManyItemsTx) Replace(values ...*dbschema.Item) (err error) {
+func (a conversationHasManyItemsTx) Replace(values ...*dbschema.ConversationItem) (err error) {
 	targetValues := make([]interface{}, len(values))
 	for i, v := range values {
 		targetValues[i] = v
@@ -304,7 +283,7 @@ func (a conversationHasManyItemsTx) Replace(values ...*dbschema.Item) (err error
 	return a.tx.Replace(targetValues...)
 }
 
-func (a conversationHasManyItemsTx) Delete(values ...*dbschema.Item) (err error) {
+func (a conversationHasManyItemsTx) Delete(values ...*dbschema.ConversationItem) (err error) {
 	targetValues := make([]interface{}, len(values))
 	for i, v := range values {
 		targetValues[i] = v
@@ -325,6 +304,87 @@ func (a conversationHasManyItemsTx) Unscoped() *conversationHasManyItemsTx {
 	return &a
 }
 
+type conversationHasManyBranches struct {
+	db *gorm.DB
+
+	field.RelationField
+}
+
+func (a conversationHasManyBranches) Where(conds ...field.Expr) *conversationHasManyBranches {
+	if len(conds) == 0 {
+		return &a
+	}
+
+	exprs := make([]clause.Expression, 0, len(conds))
+	for _, cond := range conds {
+		exprs = append(exprs, cond.BeCond().(clause.Expression))
+	}
+	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
+	return &a
+}
+
+func (a conversationHasManyBranches) WithContext(ctx context.Context) *conversationHasManyBranches {
+	a.db = a.db.WithContext(ctx)
+	return &a
+}
+
+func (a conversationHasManyBranches) Session(session *gorm.Session) *conversationHasManyBranches {
+	a.db = a.db.Session(session)
+	return &a
+}
+
+func (a conversationHasManyBranches) Model(m *dbschema.Conversation) *conversationHasManyBranchesTx {
+	return &conversationHasManyBranchesTx{a.db.Model(m).Association(a.Name())}
+}
+
+func (a conversationHasManyBranches) Unscoped() *conversationHasManyBranches {
+	a.db = a.db.Unscoped()
+	return &a
+}
+
+type conversationHasManyBranchesTx struct{ tx *gorm.Association }
+
+func (a conversationHasManyBranchesTx) Find() (result []*dbschema.ConversationBranch, err error) {
+	return result, a.tx.Find(&result)
+}
+
+func (a conversationHasManyBranchesTx) Append(values ...*dbschema.ConversationBranch) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Append(targetValues...)
+}
+
+func (a conversationHasManyBranchesTx) Replace(values ...*dbschema.ConversationBranch) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Replace(targetValues...)
+}
+
+func (a conversationHasManyBranchesTx) Delete(values ...*dbschema.ConversationBranch) (err error) {
+	targetValues := make([]interface{}, len(values))
+	for i, v := range values {
+		targetValues[i] = v
+	}
+	return a.tx.Delete(targetValues...)
+}
+
+func (a conversationHasManyBranchesTx) Clear() error {
+	return a.tx.Clear()
+}
+
+func (a conversationHasManyBranchesTx) Count() int64 {
+	return a.tx.Count()
+}
+
+func (a conversationHasManyBranchesTx) Unscoped() *conversationHasManyBranchesTx {
+	a.tx = a.tx.Unscoped()
+	return &a
+}
+
 type conversationBelongsToUser struct {
 	db *gorm.DB
 
diff --git a/services/llm-api/internal/infrastructure/database/gormgen/gen.go b/services/llm-api/internal/infrastructure/database/gormgen/gen.go
new file mode 100644
index 00000000..1a5ef524
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/gormgen/gen.go
@@ -0,0 +1,159 @@
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+
+package gormgen
+
+import (
+	"context"
+	"database/sql"
+
+	"gorm.io/gorm"
+
+	"gorm.io/gen"
+
+	"gorm.io/plugin/dbresolver"
+)
+
+var (
+	Q                  = new(Query)
+	Conversation       *conversation
+	ConversationBranch *conversationBranch
+	ConversationItem   *conversationItem
+	Model              *model
+	ModelCatalog       *modelCatalog
+	Provider           *provider
+	ProviderModel      *providerModel
+	User               *user
+)
+
+func SetDefault(db *gorm.DB, opts ...gen.DOOption) {
+	*Q = *Use(db, opts...)
+	Conversation = &Q.Conversation
+	ConversationBranch = &Q.ConversationBranch
+	ConversationItem = &Q.ConversationItem
+	Model = &Q.Model
+	ModelCatalog = &Q.ModelCatalog
+	Provider = &Q.Provider
+	ProviderModel = &Q.ProviderModel
+	User = &Q.User
+}
+
+func Use(db *gorm.DB, opts ...gen.DOOption) *Query {
+	return &Query{
+		db:                 db,
+		Conversation:       newConversation(db, opts...),
+		ConversationBranch: newConversationBranch(db, opts...),
+		ConversationItem:   newConversationItem(db, opts...),
+		Model:              newModel(db, opts...),
+		ModelCatalog:       newModelCatalog(db, opts...),
+		Provider:           newProvider(db, opts...),
+		ProviderModel:      newProviderModel(db, opts...),
+		User:               newUser(db, opts...),
+	}
+}
+
+type Query struct {
+	db *gorm.DB
+
+	Conversation       conversation
+	ConversationBranch conversationBranch
+	ConversationItem   conversationItem
+	Model              model
+	ModelCatalog       modelCatalog
+	Provider           provider
+	ProviderModel      providerModel
+	User               user
+}
+
+func (q *Query) Available() bool { return q.db != nil }
+
+func (q *Query) clone(db *gorm.DB) *Query {
+	return &Query{
+		db:                 db,
+		Conversation:       q.Conversation.clone(db),
+		ConversationBranch: q.ConversationBranch.clone(db),
+		ConversationItem:   q.ConversationItem.clone(db),
+		Model:              q.Model.clone(db),
+		ModelCatalog:       q.ModelCatalog.clone(db),
+		Provider:           q.Provider.clone(db),
+		ProviderModel:      q.ProviderModel.clone(db),
+		User:               q.User.clone(db),
+	}
+}
+
+func (q *Query) ReadDB() *Query {
+	return q.ReplaceDB(q.db.Clauses(dbresolver.Read))
+}
+
+func (q *Query) WriteDB() *Query {
+	return q.ReplaceDB(q.db.Clauses(dbresolver.Write))
+}
+
+func (q *Query) ReplaceDB(db *gorm.DB) *Query {
+	return &Query{
+		db:                 db,
+		Conversation:       q.Conversation.replaceDB(db),
+		ConversationBranch: q.ConversationBranch.replaceDB(db),
+		ConversationItem:   q.ConversationItem.replaceDB(db),
+		Model:              q.Model.replaceDB(db),
+		ModelCatalog:       q.ModelCatalog.replaceDB(db),
+		Provider:           q.Provider.replaceDB(db),
+		ProviderModel:      q.ProviderModel.replaceDB(db),
+		User:               q.User.replaceDB(db),
+	}
+}
+
+type queryCtx struct {
+	Conversation       IConversationDo
+	ConversationBranch IConversationBranchDo
+	ConversationItem   IConversationItemDo
+	Model              IModelDo
+	ModelCatalog       IModelCatalogDo
+	Provider           IProviderDo
+	ProviderModel      IProviderModelDo
+	User               IUserDo
+}
+
+func (q *Query) WithContext(ctx context.Context) *queryCtx {
+	return &queryCtx{
+		Conversation:       q.Conversation.WithContext(ctx),
+		ConversationBranch: q.ConversationBranch.WithContext(ctx),
+		ConversationItem:   q.ConversationItem.WithContext(ctx),
+		Model:              q.Model.WithContext(ctx),
+		ModelCatalog:       q.ModelCatalog.WithContext(ctx),
+		Provider:           q.Provider.WithContext(ctx),
+		ProviderModel:      q.ProviderModel.WithContext(ctx),
+		User:               q.User.WithContext(ctx),
+	}
+}
+
+func (q *Query) Transaction(fc func(tx *Query) error, opts ...*sql.TxOptions) error {
+	return q.db.Transaction(func(tx *gorm.DB) error { return fc(q.clone(tx)) }, opts...)
+}
+
+func (q *Query) Begin(opts ...*sql.TxOptions) *QueryTx {
+	tx := q.db.Begin(opts...)
+	return &QueryTx{Query: q.clone(tx), Error: tx.Error}
+}
+
+type QueryTx struct {
+	*Query
+	Error error
+}
+
+func (q *QueryTx) Commit() error {
+	return q.db.Commit().Error
+}
+
+func (q *QueryTx) Rollback() error {
+	return q.db.Rollback().Error
+}
+
+func (q *QueryTx) SavePoint(name string) error {
+	return q.db.SavePoint(name).Error
+}
+
+func (q *QueryTx) RollbackTo(name string) error {
+	return q.db.RollbackTo(name).Error
+}
diff --git a/services/llm-api/internal/infrastructure/database/gormgen/model_catalogs.gen.go b/services/llm-api/internal/infrastructure/database/gormgen/model_catalogs.gen.go
new file mode 100644
index 00000000..af86fdbb
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/gormgen/model_catalogs.gen.go
@@ -0,0 +1,431 @@
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+
+package gormgen
+
+import (
+	"context"
+	"database/sql"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+	"gorm.io/gorm/schema"
+
+	"gorm.io/gen"
+	"gorm.io/gen/field"
+
+	"gorm.io/plugin/dbresolver"
+
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+)
+
+func newModelCatalog(db *gorm.DB, opts ...gen.DOOption) modelCatalog {
+	_modelCatalog := modelCatalog{}
+
+	_modelCatalog.modelCatalogDo.UseDB(db, opts...)
+	_modelCatalog.modelCatalogDo.UseModel(&dbschema.ModelCatalog{})
+
+	tableName := _modelCatalog.modelCatalogDo.TableName()
+	_modelCatalog.ALL = field.NewAsterisk(tableName)
+	_modelCatalog.ID = field.NewUint(tableName, "id")
+	_modelCatalog.CreatedAt = field.NewTime(tableName, "created_at")
+	_modelCatalog.UpdatedAt = field.NewTime(tableName, "updated_at")
+	_modelCatalog.DeletedAt = field.NewField(tableName, "deleted_at")
+	_modelCatalog.PublicID = field.NewString(tableName, "public_id")
+	_modelCatalog.SupportedParameters = field.NewField(tableName, "supported_parameters")
+	_modelCatalog.Architecture = field.NewField(tableName, "architecture")
+	_modelCatalog.Tags = field.NewField(tableName, "tags")
+	_modelCatalog.Notes = field.NewString(tableName, "notes")
+	_modelCatalog.IsModerated = field.NewBool(tableName, "is_moderated")
+	_modelCatalog.Active = field.NewBool(tableName, "active")
+	_modelCatalog.Status = field.NewString(tableName, "status")
+	_modelCatalog.Extras = field.NewField(tableName, "extras")
+
+	_modelCatalog.fillFieldMap()
+
+	return _modelCatalog
+}
+
+type modelCatalog struct {
+	modelCatalogDo
+
+	ALL                 field.Asterisk
+	ID                  field.Uint
+	CreatedAt           field.Time
+	UpdatedAt           field.Time
+	DeletedAt           field.Field
+	PublicID            field.String
+	SupportedParameters field.Field
+	Architecture        field.Field
+	Tags                field.Field
+	Notes               field.String
+	IsModerated         field.Bool
+	Active              field.Bool
+	Status              field.String
+	Extras              field.Field
+
+	fieldMap map[string]field.Expr
+}
+
+func (m modelCatalog) Table(newTableName string) *modelCatalog {
+	m.modelCatalogDo.UseTable(newTableName)
+	return m.updateTableName(newTableName)
+}
+
+func (m modelCatalog) As(alias string) *modelCatalog {
+	m.modelCatalogDo.DO = *(m.modelCatalogDo.As(alias).(*gen.DO))
+	return m.updateTableName(alias)
+}
+
+func (m *modelCatalog) updateTableName(table string) *modelCatalog {
+	m.ALL = field.NewAsterisk(table)
+	m.ID = field.NewUint(table, "id")
+	m.CreatedAt = field.NewTime(table, "created_at")
+	m.UpdatedAt = field.NewTime(table, "updated_at")
+	m.DeletedAt = field.NewField(table, "deleted_at")
+	m.PublicID = field.NewString(table, "public_id")
+	m.SupportedParameters = field.NewField(table, "supported_parameters")
+	m.Architecture = field.NewField(table, "architecture")
+	m.Tags = field.NewField(table, "tags")
+	m.Notes = field.NewString(table, "notes")
+	m.IsModerated = field.NewBool(table, "is_moderated")
+	m.Active = field.NewBool(table, "active")
+	m.Status = field.NewString(table, "status")
+	m.Extras = field.NewField(table, "extras")
+
+	m.fillFieldMap()
+
+	return m
+}
+
+func (m *modelCatalog) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
+	_f, ok := m.fieldMap[fieldName]
+	if !ok || _f == nil {
+		return nil, false
+	}
+	_oe, ok := _f.(field.OrderExpr)
+	return _oe, ok
+}
+
+func (m *modelCatalog) fillFieldMap() {
+	m.fieldMap = make(map[string]field.Expr, 13)
+	m.fieldMap["id"] = m.ID
+	m.fieldMap["created_at"] = m.CreatedAt
+	m.fieldMap["updated_at"] = m.UpdatedAt
+	m.fieldMap["deleted_at"] = m.DeletedAt
+	m.fieldMap["public_id"] = m.PublicID
+	m.fieldMap["supported_parameters"] = m.SupportedParameters
+	m.fieldMap["architecture"] = m.Architecture
+	m.fieldMap["tags"] = m.Tags
+	m.fieldMap["notes"] = m.Notes
+	m.fieldMap["is_moderated"] = m.IsModerated
+	m.fieldMap["active"] = m.Active
+	m.fieldMap["status"] = m.Status
+	m.fieldMap["extras"] = m.Extras
+}
+
+func (m modelCatalog) clone(db *gorm.DB) modelCatalog {
+	m.modelCatalogDo.ReplaceConnPool(db.Statement.ConnPool)
+	return m
+}
+
+func (m modelCatalog) replaceDB(db *gorm.DB) modelCatalog {
+	m.modelCatalogDo.ReplaceDB(db)
+	return m
+}
+
+type modelCatalogDo struct{ gen.DO }
+
+type IModelCatalogDo interface {
+	gen.SubQuery
+	Debug() IModelCatalogDo
+	WithContext(ctx context.Context) IModelCatalogDo
+	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
+	ReplaceDB(db *gorm.DB)
+	ReadDB() IModelCatalogDo
+	WriteDB() IModelCatalogDo
+	As(alias string) gen.Dao
+	Session(config *gorm.Session) IModelCatalogDo
+	Columns(cols ...field.Expr) gen.Columns
+	Clauses(conds ...clause.Expression) IModelCatalogDo
+	Not(conds ...gen.Condition) IModelCatalogDo
+	Or(conds ...gen.Condition) IModelCatalogDo
+	Select(conds ...field.Expr) IModelCatalogDo
+	Where(conds ...gen.Condition) IModelCatalogDo
+	Order(conds ...field.Expr) IModelCatalogDo
+	Distinct(cols ...field.Expr) IModelCatalogDo
+	Omit(cols ...field.Expr) IModelCatalogDo
+	Join(table schema.Tabler, on ...field.Expr) IModelCatalogDo
+	LeftJoin(table schema.Tabler, on ...field.Expr) IModelCatalogDo
+	RightJoin(table schema.Tabler, on ...field.Expr) IModelCatalogDo
+	Group(cols ...field.Expr) IModelCatalogDo
+	Having(conds ...gen.Condition) IModelCatalogDo
+	Limit(limit int) IModelCatalogDo
+	Offset(offset int) IModelCatalogDo
+	Count() (count int64, err error)
+	Scopes(funcs ...func(gen.Dao) gen.Dao) IModelCatalogDo
+	Unscoped() IModelCatalogDo
+	Create(values ...*dbschema.ModelCatalog) error
+	CreateInBatches(values []*dbschema.ModelCatalog, batchSize int) error
+	Save(values ...*dbschema.ModelCatalog) error
+	First() (*dbschema.ModelCatalog, error)
+	Take() (*dbschema.ModelCatalog, error)
+	Last() (*dbschema.ModelCatalog, error)
+	Find() ([]*dbschema.ModelCatalog, error)
+	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ModelCatalog, err error)
+	FindInBatches(result *[]*dbschema.ModelCatalog, batchSize int, fc func(tx gen.Dao, batch int) error) error
+	Pluck(column field.Expr, dest interface{}) error
+	Delete(...*dbschema.ModelCatalog) (info gen.ResultInfo, err error)
+	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	Updates(value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
+	UpdateFrom(q gen.SubQuery) gen.Dao
+	Attrs(attrs ...field.AssignExpr) IModelCatalogDo
+	Assign(attrs ...field.AssignExpr) IModelCatalogDo
+	Joins(fields ...field.RelationField) IModelCatalogDo
+	Preload(fields ...field.RelationField) IModelCatalogDo
+	FirstOrInit() (*dbschema.ModelCatalog, error)
+	FirstOrCreate() (*dbschema.ModelCatalog, error)
+	FindByPage(offset int, limit int) (result []*dbschema.ModelCatalog, count int64, err error)
+	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
+	Rows() (*sql.Rows, error)
+	Row() *sql.Row
+	Scan(result interface{}) (err error)
+	Returning(value interface{}, columns ...string) IModelCatalogDo
+	UnderlyingDB() *gorm.DB
+	schema.Tabler
+}
+
+func (m modelCatalogDo) Debug() IModelCatalogDo {
+	return m.withDO(m.DO.Debug())
+}
+
+func (m modelCatalogDo) WithContext(ctx context.Context) IModelCatalogDo {
+	return m.withDO(m.DO.WithContext(ctx))
+}
+
+func (m modelCatalogDo) ReadDB() IModelCatalogDo {
+	return m.Clauses(dbresolver.Read)
+}
+
+func (m modelCatalogDo) WriteDB() IModelCatalogDo {
+	return m.Clauses(dbresolver.Write)
+}
+
+func (m modelCatalogDo) Session(config *gorm.Session) IModelCatalogDo {
+	return m.withDO(m.DO.Session(config))
+}
+
+func (m modelCatalogDo) Clauses(conds ...clause.Expression) IModelCatalogDo {
+	return m.withDO(m.DO.Clauses(conds...))
+}
+
+func (m modelCatalogDo) Returning(value interface{}, columns ...string) IModelCatalogDo {
+	return m.withDO(m.DO.Returning(value, columns...))
+}
+
+func (m modelCatalogDo) Not(conds ...gen.Condition) IModelCatalogDo {
+	return m.withDO(m.DO.Not(conds...))
+}
+
+func (m modelCatalogDo) Or(conds ...gen.Condition) IModelCatalogDo {
+	return m.withDO(m.DO.Or(conds...))
+}
+
+func (m modelCatalogDo) Select(conds ...field.Expr) IModelCatalogDo {
+	return m.withDO(m.DO.Select(conds...))
+}
+
+func (m modelCatalogDo) Where(conds ...gen.Condition) IModelCatalogDo {
+	return m.withDO(m.DO.Where(conds...))
+}
+
+func (m modelCatalogDo) Order(conds ...field.Expr) IModelCatalogDo {
+	return m.withDO(m.DO.Order(conds...))
+}
+
+func (m modelCatalogDo) Distinct(cols ...field.Expr) IModelCatalogDo {
+	return m.withDO(m.DO.Distinct(cols...))
+}
+
+func (m modelCatalogDo) Omit(cols ...field.Expr) IModelCatalogDo {
+	return m.withDO(m.DO.Omit(cols...))
+}
+
+func (m modelCatalogDo) Join(table schema.Tabler, on ...field.Expr) IModelCatalogDo {
+	return m.withDO(m.DO.Join(table, on...))
+}
+
+func (m modelCatalogDo) LeftJoin(table schema.Tabler, on ...field.Expr) IModelCatalogDo {
+	return m.withDO(m.DO.LeftJoin(table, on...))
+}
+
+func (m modelCatalogDo) RightJoin(table schema.Tabler, on ...field.Expr) IModelCatalogDo {
+	return m.withDO(m.DO.RightJoin(table, on...))
+}
+
+func (m modelCatalogDo) Group(cols ...field.Expr) IModelCatalogDo {
+	return m.withDO(m.DO.Group(cols...))
+}
+
+func (m modelCatalogDo) Having(conds ...gen.Condition) IModelCatalogDo {
+	return m.withDO(m.DO.Having(conds...))
+}
+
+func (m modelCatalogDo) Limit(limit int) IModelCatalogDo {
+	return m.withDO(m.DO.Limit(limit))
+}
+
+func (m modelCatalogDo) Offset(offset int) IModelCatalogDo {
+	return m.withDO(m.DO.Offset(offset))
+}
+
+func (m modelCatalogDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IModelCatalogDo {
+	return m.withDO(m.DO.Scopes(funcs...))
+}
+
+func (m modelCatalogDo) Unscoped() IModelCatalogDo {
+	return m.withDO(m.DO.Unscoped())
+}
+
+func (m modelCatalogDo) Create(values ...*dbschema.ModelCatalog) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return m.DO.Create(values)
+}
+
+func (m modelCatalogDo) CreateInBatches(values []*dbschema.ModelCatalog, batchSize int) error {
+	return m.DO.CreateInBatches(values, batchSize)
+}
+
+// Save : !!! underlying implementation is different with GORM
+// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
+func (m modelCatalogDo) Save(values ...*dbschema.ModelCatalog) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return m.DO.Save(values)
+}
+
+func (m modelCatalogDo) First() (*dbschema.ModelCatalog, error) {
+	if result, err := m.DO.First(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ModelCatalog), nil
+	}
+}
+
+func (m modelCatalogDo) Take() (*dbschema.ModelCatalog, error) {
+	if result, err := m.DO.Take(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ModelCatalog), nil
+	}
+}
+
+func (m modelCatalogDo) Last() (*dbschema.ModelCatalog, error) {
+	if result, err := m.DO.Last(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ModelCatalog), nil
+	}
+}
+
+func (m modelCatalogDo) Find() ([]*dbschema.ModelCatalog, error) {
+	result, err := m.DO.Find()
+	return result.([]*dbschema.ModelCatalog), err
+}
+
+func (m modelCatalogDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ModelCatalog, err error) {
+	buf := make([]*dbschema.ModelCatalog, 0, batchSize)
+	err = m.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
+		defer func() { results = append(results, buf...) }()
+		return fc(tx, batch)
+	})
+	return results, err
+}
+
+func (m modelCatalogDo) FindInBatches(result *[]*dbschema.ModelCatalog, batchSize int, fc func(tx gen.Dao, batch int) error) error {
+	return m.DO.FindInBatches(result, batchSize, fc)
+}
+
+func (m modelCatalogDo) Attrs(attrs ...field.AssignExpr) IModelCatalogDo {
+	return m.withDO(m.DO.Attrs(attrs...))
+}
+
+func (m modelCatalogDo) Assign(attrs ...field.AssignExpr) IModelCatalogDo {
+	return m.withDO(m.DO.Assign(attrs...))
+}
+
+func (m modelCatalogDo) Joins(fields ...field.RelationField) IModelCatalogDo {
+	for _, _f := range fields {
+		m = *m.withDO(m.DO.Joins(_f))
+	}
+	return &m
+}
+
+func (m modelCatalogDo) Preload(fields ...field.RelationField) IModelCatalogDo {
+	for _, _f := range fields {
+		m = *m.withDO(m.DO.Preload(_f))
+	}
+	return &m
+}
+
+func (m modelCatalogDo) FirstOrInit() (*dbschema.ModelCatalog, error) {
+	if result, err := m.DO.FirstOrInit(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ModelCatalog), nil
+	}
+}
+
+func (m modelCatalogDo) FirstOrCreate() (*dbschema.ModelCatalog, error) {
+	if result, err := m.DO.FirstOrCreate(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ModelCatalog), nil
+	}
+}
+
+func (m modelCatalogDo) FindByPage(offset int, limit int) (result []*dbschema.ModelCatalog, count int64, err error) {
+	result, err = m.Offset(offset).Limit(limit).Find()
+	if err != nil {
+		return
+	}
+
+	if size := len(result); 0 < limit && 0 < size && size < limit {
+		count = int64(size + offset)
+		return
+	}
+
+	count, err = m.Offset(-1).Limit(-1).Count()
+	return
+}
+
+func (m modelCatalogDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
+	count, err = m.Count()
+	if err != nil {
+		return
+	}
+
+	err = m.Offset(offset).Limit(limit).Scan(result)
+	return
+}
+
+func (m modelCatalogDo) Scan(result interface{}) (err error) {
+	return m.DO.Scan(result)
+}
+
+func (m modelCatalogDo) Delete(models ...*dbschema.ModelCatalog) (result gen.ResultInfo, err error) {
+	return m.DO.Delete(models)
+}
+
+func (m *modelCatalogDo) withDO(do gen.Dao) *modelCatalogDo {
+	m.DO = *do.(*gen.DO)
+	return m
+}
diff --git a/services/llm-api/internal/infrastructure/database/gormgen/models.gen.go b/services/llm-api/internal/infrastructure/database/gormgen/models.gen.go
new file mode 100644
index 00000000..c70a5cb9
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/gormgen/models.gen.go
@@ -0,0 +1,411 @@
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+
+package gormgen
+
+import (
+	"context"
+	"database/sql"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+	"gorm.io/gorm/schema"
+
+	"gorm.io/gen"
+	"gorm.io/gen/field"
+
+	"gorm.io/plugin/dbresolver"
+
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+)
+
+func newModel(db *gorm.DB, opts ...gen.DOOption) model {
+	_model := model{}
+
+	_model.modelDo.UseDB(db, opts...)
+	_model.modelDo.UseModel(&dbschema.Model{})
+
+	tableName := _model.modelDo.TableName()
+	_model.ALL = field.NewAsterisk(tableName)
+	_model.ID = field.NewString(tableName, "id")
+	_model.Provider = field.NewString(tableName, "provider")
+	_model.DisplayName = field.NewString(tableName, "display_name")
+	_model.Family = field.NewString(tableName, "family")
+	_model.Capabilities = field.NewField(tableName, "capabilities")
+	_model.Active = field.NewBool(tableName, "active")
+	_model.CreatedAt = field.NewTime(tableName, "created_at")
+	_model.UpdatedAt = field.NewTime(tableName, "updated_at")
+
+	_model.fillFieldMap()
+
+	return _model
+}
+
+type model struct {
+	modelDo
+
+	ALL          field.Asterisk
+	ID           field.String
+	Provider     field.String
+	DisplayName  field.String
+	Family       field.String
+	Capabilities field.Field
+	Active       field.Bool
+	CreatedAt    field.Time
+	UpdatedAt    field.Time
+
+	fieldMap map[string]field.Expr
+}
+
+func (m model) Table(newTableName string) *model {
+	m.modelDo.UseTable(newTableName)
+	return m.updateTableName(newTableName)
+}
+
+func (m model) As(alias string) *model {
+	m.modelDo.DO = *(m.modelDo.As(alias).(*gen.DO))
+	return m.updateTableName(alias)
+}
+
+func (m *model) updateTableName(table string) *model {
+	m.ALL = field.NewAsterisk(table)
+	m.ID = field.NewString(table, "id")
+	m.Provider = field.NewString(table, "provider")
+	m.DisplayName = field.NewString(table, "display_name")
+	m.Family = field.NewString(table, "family")
+	m.Capabilities = field.NewField(table, "capabilities")
+	m.Active = field.NewBool(table, "active")
+	m.CreatedAt = field.NewTime(table, "created_at")
+	m.UpdatedAt = field.NewTime(table, "updated_at")
+
+	m.fillFieldMap()
+
+	return m
+}
+
+func (m *model) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
+	_f, ok := m.fieldMap[fieldName]
+	if !ok || _f == nil {
+		return nil, false
+	}
+	_oe, ok := _f.(field.OrderExpr)
+	return _oe, ok
+}
+
+func (m *model) fillFieldMap() {
+	m.fieldMap = make(map[string]field.Expr, 8)
+	m.fieldMap["id"] = m.ID
+	m.fieldMap["provider"] = m.Provider
+	m.fieldMap["display_name"] = m.DisplayName
+	m.fieldMap["family"] = m.Family
+	m.fieldMap["capabilities"] = m.Capabilities
+	m.fieldMap["active"] = m.Active
+	m.fieldMap["created_at"] = m.CreatedAt
+	m.fieldMap["updated_at"] = m.UpdatedAt
+}
+
+func (m model) clone(db *gorm.DB) model {
+	m.modelDo.ReplaceConnPool(db.Statement.ConnPool)
+	return m
+}
+
+func (m model) replaceDB(db *gorm.DB) model {
+	m.modelDo.ReplaceDB(db)
+	return m
+}
+
+type modelDo struct{ gen.DO }
+
+type IModelDo interface {
+	gen.SubQuery
+	Debug() IModelDo
+	WithContext(ctx context.Context) IModelDo
+	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
+	ReplaceDB(db *gorm.DB)
+	ReadDB() IModelDo
+	WriteDB() IModelDo
+	As(alias string) gen.Dao
+	Session(config *gorm.Session) IModelDo
+	Columns(cols ...field.Expr) gen.Columns
+	Clauses(conds ...clause.Expression) IModelDo
+	Not(conds ...gen.Condition) IModelDo
+	Or(conds ...gen.Condition) IModelDo
+	Select(conds ...field.Expr) IModelDo
+	Where(conds ...gen.Condition) IModelDo
+	Order(conds ...field.Expr) IModelDo
+	Distinct(cols ...field.Expr) IModelDo
+	Omit(cols ...field.Expr) IModelDo
+	Join(table schema.Tabler, on ...field.Expr) IModelDo
+	LeftJoin(table schema.Tabler, on ...field.Expr) IModelDo
+	RightJoin(table schema.Tabler, on ...field.Expr) IModelDo
+	Group(cols ...field.Expr) IModelDo
+	Having(conds ...gen.Condition) IModelDo
+	Limit(limit int) IModelDo
+	Offset(offset int) IModelDo
+	Count() (count int64, err error)
+	Scopes(funcs ...func(gen.Dao) gen.Dao) IModelDo
+	Unscoped() IModelDo
+	Create(values ...*dbschema.Model) error
+	CreateInBatches(values []*dbschema.Model, batchSize int) error
+	Save(values ...*dbschema.Model) error
+	First() (*dbschema.Model, error)
+	Take() (*dbschema.Model, error)
+	Last() (*dbschema.Model, error)
+	Find() ([]*dbschema.Model, error)
+	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Model, err error)
+	FindInBatches(result *[]*dbschema.Model, batchSize int, fc func(tx gen.Dao, batch int) error) error
+	Pluck(column field.Expr, dest interface{}) error
+	Delete(...*dbschema.Model) (info gen.ResultInfo, err error)
+	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	Updates(value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
+	UpdateFrom(q gen.SubQuery) gen.Dao
+	Attrs(attrs ...field.AssignExpr) IModelDo
+	Assign(attrs ...field.AssignExpr) IModelDo
+	Joins(fields ...field.RelationField) IModelDo
+	Preload(fields ...field.RelationField) IModelDo
+	FirstOrInit() (*dbschema.Model, error)
+	FirstOrCreate() (*dbschema.Model, error)
+	FindByPage(offset int, limit int) (result []*dbschema.Model, count int64, err error)
+	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
+	Rows() (*sql.Rows, error)
+	Row() *sql.Row
+	Scan(result interface{}) (err error)
+	Returning(value interface{}, columns ...string) IModelDo
+	UnderlyingDB() *gorm.DB
+	schema.Tabler
+}
+
+func (m modelDo) Debug() IModelDo {
+	return m.withDO(m.DO.Debug())
+}
+
+func (m modelDo) WithContext(ctx context.Context) IModelDo {
+	return m.withDO(m.DO.WithContext(ctx))
+}
+
+func (m modelDo) ReadDB() IModelDo {
+	return m.Clauses(dbresolver.Read)
+}
+
+func (m modelDo) WriteDB() IModelDo {
+	return m.Clauses(dbresolver.Write)
+}
+
+func (m modelDo) Session(config *gorm.Session) IModelDo {
+	return m.withDO(m.DO.Session(config))
+}
+
+func (m modelDo) Clauses(conds ...clause.Expression) IModelDo {
+	return m.withDO(m.DO.Clauses(conds...))
+}
+
+func (m modelDo) Returning(value interface{}, columns ...string) IModelDo {
+	return m.withDO(m.DO.Returning(value, columns...))
+}
+
+func (m modelDo) Not(conds ...gen.Condition) IModelDo {
+	return m.withDO(m.DO.Not(conds...))
+}
+
+func (m modelDo) Or(conds ...gen.Condition) IModelDo {
+	return m.withDO(m.DO.Or(conds...))
+}
+
+func (m modelDo) Select(conds ...field.Expr) IModelDo {
+	return m.withDO(m.DO.Select(conds...))
+}
+
+func (m modelDo) Where(conds ...gen.Condition) IModelDo {
+	return m.withDO(m.DO.Where(conds...))
+}
+
+func (m modelDo) Order(conds ...field.Expr) IModelDo {
+	return m.withDO(m.DO.Order(conds...))
+}
+
+func (m modelDo) Distinct(cols ...field.Expr) IModelDo {
+	return m.withDO(m.DO.Distinct(cols...))
+}
+
+func (m modelDo) Omit(cols ...field.Expr) IModelDo {
+	return m.withDO(m.DO.Omit(cols...))
+}
+
+func (m modelDo) Join(table schema.Tabler, on ...field.Expr) IModelDo {
+	return m.withDO(m.DO.Join(table, on...))
+}
+
+func (m modelDo) LeftJoin(table schema.Tabler, on ...field.Expr) IModelDo {
+	return m.withDO(m.DO.LeftJoin(table, on...))
+}
+
+func (m modelDo) RightJoin(table schema.Tabler, on ...field.Expr) IModelDo {
+	return m.withDO(m.DO.RightJoin(table, on...))
+}
+
+func (m modelDo) Group(cols ...field.Expr) IModelDo {
+	return m.withDO(m.DO.Group(cols...))
+}
+
+func (m modelDo) Having(conds ...gen.Condition) IModelDo {
+	return m.withDO(m.DO.Having(conds...))
+}
+
+func (m modelDo) Limit(limit int) IModelDo {
+	return m.withDO(m.DO.Limit(limit))
+}
+
+func (m modelDo) Offset(offset int) IModelDo {
+	return m.withDO(m.DO.Offset(offset))
+}
+
+func (m modelDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IModelDo {
+	return m.withDO(m.DO.Scopes(funcs...))
+}
+
+func (m modelDo) Unscoped() IModelDo {
+	return m.withDO(m.DO.Unscoped())
+}
+
+func (m modelDo) Create(values ...*dbschema.Model) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return m.DO.Create(values)
+}
+
+func (m modelDo) CreateInBatches(values []*dbschema.Model, batchSize int) error {
+	return m.DO.CreateInBatches(values, batchSize)
+}
+
+// Save : !!! underlying implementation is different with GORM
+// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
+func (m modelDo) Save(values ...*dbschema.Model) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return m.DO.Save(values)
+}
+
+func (m modelDo) First() (*dbschema.Model, error) {
+	if result, err := m.DO.First(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Model), nil
+	}
+}
+
+func (m modelDo) Take() (*dbschema.Model, error) {
+	if result, err := m.DO.Take(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Model), nil
+	}
+}
+
+func (m modelDo) Last() (*dbschema.Model, error) {
+	if result, err := m.DO.Last(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Model), nil
+	}
+}
+
+func (m modelDo) Find() ([]*dbschema.Model, error) {
+	result, err := m.DO.Find()
+	return result.([]*dbschema.Model), err
+}
+
+func (m modelDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Model, err error) {
+	buf := make([]*dbschema.Model, 0, batchSize)
+	err = m.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
+		defer func() { results = append(results, buf...) }()
+		return fc(tx, batch)
+	})
+	return results, err
+}
+
+func (m modelDo) FindInBatches(result *[]*dbschema.Model, batchSize int, fc func(tx gen.Dao, batch int) error) error {
+	return m.DO.FindInBatches(result, batchSize, fc)
+}
+
+func (m modelDo) Attrs(attrs ...field.AssignExpr) IModelDo {
+	return m.withDO(m.DO.Attrs(attrs...))
+}
+
+func (m modelDo) Assign(attrs ...field.AssignExpr) IModelDo {
+	return m.withDO(m.DO.Assign(attrs...))
+}
+
+func (m modelDo) Joins(fields ...field.RelationField) IModelDo {
+	for _, _f := range fields {
+		m = *m.withDO(m.DO.Joins(_f))
+	}
+	return &m
+}
+
+func (m modelDo) Preload(fields ...field.RelationField) IModelDo {
+	for _, _f := range fields {
+		m = *m.withDO(m.DO.Preload(_f))
+	}
+	return &m
+}
+
+func (m modelDo) FirstOrInit() (*dbschema.Model, error) {
+	if result, err := m.DO.FirstOrInit(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Model), nil
+	}
+}
+
+func (m modelDo) FirstOrCreate() (*dbschema.Model, error) {
+	if result, err := m.DO.FirstOrCreate(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Model), nil
+	}
+}
+
+func (m modelDo) FindByPage(offset int, limit int) (result []*dbschema.Model, count int64, err error) {
+	result, err = m.Offset(offset).Limit(limit).Find()
+	if err != nil {
+		return
+	}
+
+	if size := len(result); 0 < limit && 0 < size && size < limit {
+		count = int64(size + offset)
+		return
+	}
+
+	count, err = m.Offset(-1).Limit(-1).Count()
+	return
+}
+
+func (m modelDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
+	count, err = m.Count()
+	if err != nil {
+		return
+	}
+
+	err = m.Offset(offset).Limit(limit).Scan(result)
+	return
+}
+
+func (m modelDo) Scan(result interface{}) (err error) {
+	return m.DO.Scan(result)
+}
+
+func (m modelDo) Delete(models ...*dbschema.Model) (result gen.ResultInfo, err error) {
+	return m.DO.Delete(models)
+}
+
+func (m *modelDo) withDO(do gen.Dao) *modelDo {
+	m.DO = *do.(*gen.DO)
+	return m
+}
diff --git a/services/llm-api/internal/infrastructure/database/gormgen/provider_models.gen.go b/services/llm-api/internal/infrastructure/database/gormgen/provider_models.gen.go
new file mode 100644
index 00000000..12aa5d3c
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/gormgen/provider_models.gen.go
@@ -0,0 +1,459 @@
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+
+package gormgen
+
+import (
+	"context"
+	"database/sql"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+	"gorm.io/gorm/schema"
+
+	"gorm.io/gen"
+	"gorm.io/gen/field"
+
+	"gorm.io/plugin/dbresolver"
+
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+)
+
+func newProviderModel(db *gorm.DB, opts ...gen.DOOption) providerModel {
+	_providerModel := providerModel{}
+
+	_providerModel.providerModelDo.UseDB(db, opts...)
+	_providerModel.providerModelDo.UseModel(&dbschema.ProviderModel{})
+
+	tableName := _providerModel.providerModelDo.TableName()
+	_providerModel.ALL = field.NewAsterisk(tableName)
+	_providerModel.ID = field.NewUint(tableName, "id")
+	_providerModel.CreatedAt = field.NewTime(tableName, "created_at")
+	_providerModel.UpdatedAt = field.NewTime(tableName, "updated_at")
+	_providerModel.DeletedAt = field.NewField(tableName, "deleted_at")
+	_providerModel.ProviderID = field.NewUint(tableName, "provider_id")
+	_providerModel.PublicID = field.NewString(tableName, "public_id")
+	_providerModel.Kind = field.NewString(tableName, "kind")
+	_providerModel.ModelCatalogID = field.NewUint(tableName, "model_catalog_id")
+	_providerModel.ModelPublicID = field.NewString(tableName, "model_public_id")
+	_providerModel.ProviderOriginalModelID = field.NewString(tableName, "provider_original_model_id")
+	_providerModel.DisplayName = field.NewString(tableName, "display_name")
+	_providerModel.Pricing = field.NewField(tableName, "pricing")
+	_providerModel.TokenLimits = field.NewField(tableName, "token_limits")
+	_providerModel.Family = field.NewString(tableName, "family")
+	_providerModel.SupportsImages = field.NewBool(tableName, "supports_images")
+	_providerModel.SupportsEmbeddings = field.NewBool(tableName, "supports_embeddings")
+	_providerModel.SupportsReasoning = field.NewBool(tableName, "supports_reasoning")
+	_providerModel.SupportsAudio = field.NewBool(tableName, "supports_audio")
+	_providerModel.SupportsVideo = field.NewBool(tableName, "supports_video")
+	_providerModel.Active = field.NewBool(tableName, "active")
+
+	_providerModel.fillFieldMap()
+
+	return _providerModel
+}
+
+type providerModel struct {
+	providerModelDo
+
+	ALL                     field.Asterisk
+	ID                      field.Uint
+	CreatedAt               field.Time
+	UpdatedAt               field.Time
+	DeletedAt               field.Field
+	ProviderID              field.Uint
+	PublicID                field.String
+	Kind                    field.String
+	ModelCatalogID          field.Uint
+	ModelPublicID           field.String
+	ProviderOriginalModelID field.String
+	DisplayName             field.String
+	Pricing                 field.Field
+	TokenLimits             field.Field
+	Family                  field.String
+	SupportsImages          field.Bool
+	SupportsEmbeddings      field.Bool
+	SupportsReasoning       field.Bool
+	SupportsAudio           field.Bool
+	SupportsVideo           field.Bool
+	Active                  field.Bool
+
+	fieldMap map[string]field.Expr
+}
+
+func (p providerModel) Table(newTableName string) *providerModel {
+	p.providerModelDo.UseTable(newTableName)
+	return p.updateTableName(newTableName)
+}
+
+func (p providerModel) As(alias string) *providerModel {
+	p.providerModelDo.DO = *(p.providerModelDo.As(alias).(*gen.DO))
+	return p.updateTableName(alias)
+}
+
+func (p *providerModel) updateTableName(table string) *providerModel {
+	p.ALL = field.NewAsterisk(table)
+	p.ID = field.NewUint(table, "id")
+	p.CreatedAt = field.NewTime(table, "created_at")
+	p.UpdatedAt = field.NewTime(table, "updated_at")
+	p.DeletedAt = field.NewField(table, "deleted_at")
+	p.ProviderID = field.NewUint(table, "provider_id")
+	p.PublicID = field.NewString(table, "public_id")
+	p.Kind = field.NewString(table, "kind")
+	p.ModelCatalogID = field.NewUint(table, "model_catalog_id")
+	p.ModelPublicID = field.NewString(table, "model_public_id")
+	p.ProviderOriginalModelID = field.NewString(table, "provider_original_model_id")
+	p.DisplayName = field.NewString(table, "display_name")
+	p.Pricing = field.NewField(table, "pricing")
+	p.TokenLimits = field.NewField(table, "token_limits")
+	p.Family = field.NewString(table, "family")
+	p.SupportsImages = field.NewBool(table, "supports_images")
+	p.SupportsEmbeddings = field.NewBool(table, "supports_embeddings")
+	p.SupportsReasoning = field.NewBool(table, "supports_reasoning")
+	p.SupportsAudio = field.NewBool(table, "supports_audio")
+	p.SupportsVideo = field.NewBool(table, "supports_video")
+	p.Active = field.NewBool(table, "active")
+
+	p.fillFieldMap()
+
+	return p
+}
+
+func (p *providerModel) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
+	_f, ok := p.fieldMap[fieldName]
+	if !ok || _f == nil {
+		return nil, false
+	}
+	_oe, ok := _f.(field.OrderExpr)
+	return _oe, ok
+}
+
+func (p *providerModel) fillFieldMap() {
+	p.fieldMap = make(map[string]field.Expr, 20)
+	p.fieldMap["id"] = p.ID
+	p.fieldMap["created_at"] = p.CreatedAt
+	p.fieldMap["updated_at"] = p.UpdatedAt
+	p.fieldMap["deleted_at"] = p.DeletedAt
+	p.fieldMap["provider_id"] = p.ProviderID
+	p.fieldMap["public_id"] = p.PublicID
+	p.fieldMap["kind"] = p.Kind
+	p.fieldMap["model_catalog_id"] = p.ModelCatalogID
+	p.fieldMap["model_public_id"] = p.ModelPublicID
+	p.fieldMap["provider_original_model_id"] = p.ProviderOriginalModelID
+	p.fieldMap["display_name"] = p.DisplayName
+	p.fieldMap["pricing"] = p.Pricing
+	p.fieldMap["token_limits"] = p.TokenLimits
+	p.fieldMap["family"] = p.Family
+	p.fieldMap["supports_images"] = p.SupportsImages
+	p.fieldMap["supports_embeddings"] = p.SupportsEmbeddings
+	p.fieldMap["supports_reasoning"] = p.SupportsReasoning
+	p.fieldMap["supports_audio"] = p.SupportsAudio
+	p.fieldMap["supports_video"] = p.SupportsVideo
+	p.fieldMap["active"] = p.Active
+}
+
+func (p providerModel) clone(db *gorm.DB) providerModel {
+	p.providerModelDo.ReplaceConnPool(db.Statement.ConnPool)
+	return p
+}
+
+func (p providerModel) replaceDB(db *gorm.DB) providerModel {
+	p.providerModelDo.ReplaceDB(db)
+	return p
+}
+
+type providerModelDo struct{ gen.DO }
+
+type IProviderModelDo interface {
+	gen.SubQuery
+	Debug() IProviderModelDo
+	WithContext(ctx context.Context) IProviderModelDo
+	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
+	ReplaceDB(db *gorm.DB)
+	ReadDB() IProviderModelDo
+	WriteDB() IProviderModelDo
+	As(alias string) gen.Dao
+	Session(config *gorm.Session) IProviderModelDo
+	Columns(cols ...field.Expr) gen.Columns
+	Clauses(conds ...clause.Expression) IProviderModelDo
+	Not(conds ...gen.Condition) IProviderModelDo
+	Or(conds ...gen.Condition) IProviderModelDo
+	Select(conds ...field.Expr) IProviderModelDo
+	Where(conds ...gen.Condition) IProviderModelDo
+	Order(conds ...field.Expr) IProviderModelDo
+	Distinct(cols ...field.Expr) IProviderModelDo
+	Omit(cols ...field.Expr) IProviderModelDo
+	Join(table schema.Tabler, on ...field.Expr) IProviderModelDo
+	LeftJoin(table schema.Tabler, on ...field.Expr) IProviderModelDo
+	RightJoin(table schema.Tabler, on ...field.Expr) IProviderModelDo
+	Group(cols ...field.Expr) IProviderModelDo
+	Having(conds ...gen.Condition) IProviderModelDo
+	Limit(limit int) IProviderModelDo
+	Offset(offset int) IProviderModelDo
+	Count() (count int64, err error)
+	Scopes(funcs ...func(gen.Dao) gen.Dao) IProviderModelDo
+	Unscoped() IProviderModelDo
+	Create(values ...*dbschema.ProviderModel) error
+	CreateInBatches(values []*dbschema.ProviderModel, batchSize int) error
+	Save(values ...*dbschema.ProviderModel) error
+	First() (*dbschema.ProviderModel, error)
+	Take() (*dbschema.ProviderModel, error)
+	Last() (*dbschema.ProviderModel, error)
+	Find() ([]*dbschema.ProviderModel, error)
+	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ProviderModel, err error)
+	FindInBatches(result *[]*dbschema.ProviderModel, batchSize int, fc func(tx gen.Dao, batch int) error) error
+	Pluck(column field.Expr, dest interface{}) error
+	Delete(...*dbschema.ProviderModel) (info gen.ResultInfo, err error)
+	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	Updates(value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
+	UpdateFrom(q gen.SubQuery) gen.Dao
+	Attrs(attrs ...field.AssignExpr) IProviderModelDo
+	Assign(attrs ...field.AssignExpr) IProviderModelDo
+	Joins(fields ...field.RelationField) IProviderModelDo
+	Preload(fields ...field.RelationField) IProviderModelDo
+	FirstOrInit() (*dbschema.ProviderModel, error)
+	FirstOrCreate() (*dbschema.ProviderModel, error)
+	FindByPage(offset int, limit int) (result []*dbschema.ProviderModel, count int64, err error)
+	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
+	Rows() (*sql.Rows, error)
+	Row() *sql.Row
+	Scan(result interface{}) (err error)
+	Returning(value interface{}, columns ...string) IProviderModelDo
+	UnderlyingDB() *gorm.DB
+	schema.Tabler
+}
+
+func (p providerModelDo) Debug() IProviderModelDo {
+	return p.withDO(p.DO.Debug())
+}
+
+func (p providerModelDo) WithContext(ctx context.Context) IProviderModelDo {
+	return p.withDO(p.DO.WithContext(ctx))
+}
+
+func (p providerModelDo) ReadDB() IProviderModelDo {
+	return p.Clauses(dbresolver.Read)
+}
+
+func (p providerModelDo) WriteDB() IProviderModelDo {
+	return p.Clauses(dbresolver.Write)
+}
+
+func (p providerModelDo) Session(config *gorm.Session) IProviderModelDo {
+	return p.withDO(p.DO.Session(config))
+}
+
+func (p providerModelDo) Clauses(conds ...clause.Expression) IProviderModelDo {
+	return p.withDO(p.DO.Clauses(conds...))
+}
+
+func (p providerModelDo) Returning(value interface{}, columns ...string) IProviderModelDo {
+	return p.withDO(p.DO.Returning(value, columns...))
+}
+
+func (p providerModelDo) Not(conds ...gen.Condition) IProviderModelDo {
+	return p.withDO(p.DO.Not(conds...))
+}
+
+func (p providerModelDo) Or(conds ...gen.Condition) IProviderModelDo {
+	return p.withDO(p.DO.Or(conds...))
+}
+
+func (p providerModelDo) Select(conds ...field.Expr) IProviderModelDo {
+	return p.withDO(p.DO.Select(conds...))
+}
+
+func (p providerModelDo) Where(conds ...gen.Condition) IProviderModelDo {
+	return p.withDO(p.DO.Where(conds...))
+}
+
+func (p providerModelDo) Order(conds ...field.Expr) IProviderModelDo {
+	return p.withDO(p.DO.Order(conds...))
+}
+
+func (p providerModelDo) Distinct(cols ...field.Expr) IProviderModelDo {
+	return p.withDO(p.DO.Distinct(cols...))
+}
+
+func (p providerModelDo) Omit(cols ...field.Expr) IProviderModelDo {
+	return p.withDO(p.DO.Omit(cols...))
+}
+
+func (p providerModelDo) Join(table schema.Tabler, on ...field.Expr) IProviderModelDo {
+	return p.withDO(p.DO.Join(table, on...))
+}
+
+func (p providerModelDo) LeftJoin(table schema.Tabler, on ...field.Expr) IProviderModelDo {
+	return p.withDO(p.DO.LeftJoin(table, on...))
+}
+
+func (p providerModelDo) RightJoin(table schema.Tabler, on ...field.Expr) IProviderModelDo {
+	return p.withDO(p.DO.RightJoin(table, on...))
+}
+
+func (p providerModelDo) Group(cols ...field.Expr) IProviderModelDo {
+	return p.withDO(p.DO.Group(cols...))
+}
+
+func (p providerModelDo) Having(conds ...gen.Condition) IProviderModelDo {
+	return p.withDO(p.DO.Having(conds...))
+}
+
+func (p providerModelDo) Limit(limit int) IProviderModelDo {
+	return p.withDO(p.DO.Limit(limit))
+}
+
+func (p providerModelDo) Offset(offset int) IProviderModelDo {
+	return p.withDO(p.DO.Offset(offset))
+}
+
+func (p providerModelDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IProviderModelDo {
+	return p.withDO(p.DO.Scopes(funcs...))
+}
+
+func (p providerModelDo) Unscoped() IProviderModelDo {
+	return p.withDO(p.DO.Unscoped())
+}
+
+func (p providerModelDo) Create(values ...*dbschema.ProviderModel) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return p.DO.Create(values)
+}
+
+func (p providerModelDo) CreateInBatches(values []*dbschema.ProviderModel, batchSize int) error {
+	return p.DO.CreateInBatches(values, batchSize)
+}
+
+// Save : !!! underlying implementation is different with GORM
+// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
+func (p providerModelDo) Save(values ...*dbschema.ProviderModel) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return p.DO.Save(values)
+}
+
+func (p providerModelDo) First() (*dbschema.ProviderModel, error) {
+	if result, err := p.DO.First(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ProviderModel), nil
+	}
+}
+
+func (p providerModelDo) Take() (*dbschema.ProviderModel, error) {
+	if result, err := p.DO.Take(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ProviderModel), nil
+	}
+}
+
+func (p providerModelDo) Last() (*dbschema.ProviderModel, error) {
+	if result, err := p.DO.Last(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ProviderModel), nil
+	}
+}
+
+func (p providerModelDo) Find() ([]*dbschema.ProviderModel, error) {
+	result, err := p.DO.Find()
+	return result.([]*dbschema.ProviderModel), err
+}
+
+func (p providerModelDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.ProviderModel, err error) {
+	buf := make([]*dbschema.ProviderModel, 0, batchSize)
+	err = p.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
+		defer func() { results = append(results, buf...) }()
+		return fc(tx, batch)
+	})
+	return results, err
+}
+
+func (p providerModelDo) FindInBatches(result *[]*dbschema.ProviderModel, batchSize int, fc func(tx gen.Dao, batch int) error) error {
+	return p.DO.FindInBatches(result, batchSize, fc)
+}
+
+func (p providerModelDo) Attrs(attrs ...field.AssignExpr) IProviderModelDo {
+	return p.withDO(p.DO.Attrs(attrs...))
+}
+
+func (p providerModelDo) Assign(attrs ...field.AssignExpr) IProviderModelDo {
+	return p.withDO(p.DO.Assign(attrs...))
+}
+
+func (p providerModelDo) Joins(fields ...field.RelationField) IProviderModelDo {
+	for _, _f := range fields {
+		p = *p.withDO(p.DO.Joins(_f))
+	}
+	return &p
+}
+
+func (p providerModelDo) Preload(fields ...field.RelationField) IProviderModelDo {
+	for _, _f := range fields {
+		p = *p.withDO(p.DO.Preload(_f))
+	}
+	return &p
+}
+
+func (p providerModelDo) FirstOrInit() (*dbschema.ProviderModel, error) {
+	if result, err := p.DO.FirstOrInit(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ProviderModel), nil
+	}
+}
+
+func (p providerModelDo) FirstOrCreate() (*dbschema.ProviderModel, error) {
+	if result, err := p.DO.FirstOrCreate(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.ProviderModel), nil
+	}
+}
+
+func (p providerModelDo) FindByPage(offset int, limit int) (result []*dbschema.ProviderModel, count int64, err error) {
+	result, err = p.Offset(offset).Limit(limit).Find()
+	if err != nil {
+		return
+	}
+
+	if size := len(result); 0 < limit && 0 < size && size < limit {
+		count = int64(size + offset)
+		return
+	}
+
+	count, err = p.Offset(-1).Limit(-1).Count()
+	return
+}
+
+func (p providerModelDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
+	count, err = p.Count()
+	if err != nil {
+		return
+	}
+
+	err = p.Offset(offset).Limit(limit).Scan(result)
+	return
+}
+
+func (p providerModelDo) Scan(result interface{}) (err error) {
+	return p.DO.Scan(result)
+}
+
+func (p providerModelDo) Delete(models ...*dbschema.ProviderModel) (result gen.ResultInfo, err error) {
+	return p.DO.Delete(models)
+}
+
+func (p *providerModelDo) withDO(do gen.Dao) *providerModelDo {
+	p.DO = *do.(*gen.DO)
+	return p
+}
diff --git a/services/llm-api/internal/infrastructure/database/gormgen/providers.gen.go b/services/llm-api/internal/infrastructure/database/gormgen/providers.gen.go
new file mode 100644
index 00000000..e7a7077e
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/gormgen/providers.gen.go
@@ -0,0 +1,435 @@
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+// Code generated by gorm.io/gen. DO NOT EDIT.
+
+package gormgen
+
+import (
+	"context"
+	"database/sql"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+	"gorm.io/gorm/schema"
+
+	"gorm.io/gen"
+	"gorm.io/gen/field"
+
+	"gorm.io/plugin/dbresolver"
+
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+)
+
+func newProvider(db *gorm.DB, opts ...gen.DOOption) provider {
+	_provider := provider{}
+
+	_provider.providerDo.UseDB(db, opts...)
+	_provider.providerDo.UseModel(&dbschema.Provider{})
+
+	tableName := _provider.providerDo.TableName()
+	_provider.ALL = field.NewAsterisk(tableName)
+	_provider.ID = field.NewUint(tableName, "id")
+	_provider.CreatedAt = field.NewTime(tableName, "created_at")
+	_provider.UpdatedAt = field.NewTime(tableName, "updated_at")
+	_provider.DeletedAt = field.NewField(tableName, "deleted_at")
+	_provider.PublicID = field.NewString(tableName, "public_id")
+	_provider.DisplayName = field.NewString(tableName, "display_name")
+	_provider.Kind = field.NewString(tableName, "kind")
+	_provider.BaseURL = field.NewString(tableName, "base_url")
+	_provider.EncryptedAPIKey = field.NewString(tableName, "encrypted_api_key")
+	_provider.APIKeyHint = field.NewString(tableName, "api_key_hint")
+	_provider.IsModerated = field.NewBool(tableName, "is_moderated")
+	_provider.Active = field.NewBool(tableName, "active")
+	_provider.Metadata = field.NewField(tableName, "metadata")
+	_provider.LastSyncedAt = field.NewTime(tableName, "last_synced_at")
+
+	_provider.fillFieldMap()
+
+	return _provider
+}
+
+type provider struct {
+	providerDo
+
+	ALL             field.Asterisk
+	ID              field.Uint
+	CreatedAt       field.Time
+	UpdatedAt       field.Time
+	DeletedAt       field.Field
+	PublicID        field.String
+	DisplayName     field.String
+	Kind            field.String
+	BaseURL         field.String
+	EncryptedAPIKey field.String
+	APIKeyHint      field.String
+	IsModerated     field.Bool
+	Active          field.Bool
+	Metadata        field.Field
+	LastSyncedAt    field.Time
+
+	fieldMap map[string]field.Expr
+}
+
+func (p provider) Table(newTableName string) *provider {
+	p.providerDo.UseTable(newTableName)
+	return p.updateTableName(newTableName)
+}
+
+func (p provider) As(alias string) *provider {
+	p.providerDo.DO = *(p.providerDo.As(alias).(*gen.DO))
+	return p.updateTableName(alias)
+}
+
+func (p *provider) updateTableName(table string) *provider {
+	p.ALL = field.NewAsterisk(table)
+	p.ID = field.NewUint(table, "id")
+	p.CreatedAt = field.NewTime(table, "created_at")
+	p.UpdatedAt = field.NewTime(table, "updated_at")
+	p.DeletedAt = field.NewField(table, "deleted_at")
+	p.PublicID = field.NewString(table, "public_id")
+	p.DisplayName = field.NewString(table, "display_name")
+	p.Kind = field.NewString(table, "kind")
+	p.BaseURL = field.NewString(table, "base_url")
+	p.EncryptedAPIKey = field.NewString(table, "encrypted_api_key")
+	p.APIKeyHint = field.NewString(table, "api_key_hint")
+	p.IsModerated = field.NewBool(table, "is_moderated")
+	p.Active = field.NewBool(table, "active")
+	p.Metadata = field.NewField(table, "metadata")
+	p.LastSyncedAt = field.NewTime(table, "last_synced_at")
+
+	p.fillFieldMap()
+
+	return p
+}
+
+func (p *provider) GetFieldByName(fieldName string) (field.OrderExpr, bool) {
+	_f, ok := p.fieldMap[fieldName]
+	if !ok || _f == nil {
+		return nil, false
+	}
+	_oe, ok := _f.(field.OrderExpr)
+	return _oe, ok
+}
+
+func (p *provider) fillFieldMap() {
+	p.fieldMap = make(map[string]field.Expr, 14)
+	p.fieldMap["id"] = p.ID
+	p.fieldMap["created_at"] = p.CreatedAt
+	p.fieldMap["updated_at"] = p.UpdatedAt
+	p.fieldMap["deleted_at"] = p.DeletedAt
+	p.fieldMap["public_id"] = p.PublicID
+	p.fieldMap["display_name"] = p.DisplayName
+	p.fieldMap["kind"] = p.Kind
+	p.fieldMap["base_url"] = p.BaseURL
+	p.fieldMap["encrypted_api_key"] = p.EncryptedAPIKey
+	p.fieldMap["api_key_hint"] = p.APIKeyHint
+	p.fieldMap["is_moderated"] = p.IsModerated
+	p.fieldMap["active"] = p.Active
+	p.fieldMap["metadata"] = p.Metadata
+	p.fieldMap["last_synced_at"] = p.LastSyncedAt
+}
+
+func (p provider) clone(db *gorm.DB) provider {
+	p.providerDo.ReplaceConnPool(db.Statement.ConnPool)
+	return p
+}
+
+func (p provider) replaceDB(db *gorm.DB) provider {
+	p.providerDo.ReplaceDB(db)
+	return p
+}
+
+type providerDo struct{ gen.DO }
+
+type IProviderDo interface {
+	gen.SubQuery
+	Debug() IProviderDo
+	WithContext(ctx context.Context) IProviderDo
+	WithResult(fc func(tx gen.Dao)) gen.ResultInfo
+	ReplaceDB(db *gorm.DB)
+	ReadDB() IProviderDo
+	WriteDB() IProviderDo
+	As(alias string) gen.Dao
+	Session(config *gorm.Session) IProviderDo
+	Columns(cols ...field.Expr) gen.Columns
+	Clauses(conds ...clause.Expression) IProviderDo
+	Not(conds ...gen.Condition) IProviderDo
+	Or(conds ...gen.Condition) IProviderDo
+	Select(conds ...field.Expr) IProviderDo
+	Where(conds ...gen.Condition) IProviderDo
+	Order(conds ...field.Expr) IProviderDo
+	Distinct(cols ...field.Expr) IProviderDo
+	Omit(cols ...field.Expr) IProviderDo
+	Join(table schema.Tabler, on ...field.Expr) IProviderDo
+	LeftJoin(table schema.Tabler, on ...field.Expr) IProviderDo
+	RightJoin(table schema.Tabler, on ...field.Expr) IProviderDo
+	Group(cols ...field.Expr) IProviderDo
+	Having(conds ...gen.Condition) IProviderDo
+	Limit(limit int) IProviderDo
+	Offset(offset int) IProviderDo
+	Count() (count int64, err error)
+	Scopes(funcs ...func(gen.Dao) gen.Dao) IProviderDo
+	Unscoped() IProviderDo
+	Create(values ...*dbschema.Provider) error
+	CreateInBatches(values []*dbschema.Provider, batchSize int) error
+	Save(values ...*dbschema.Provider) error
+	First() (*dbschema.Provider, error)
+	Take() (*dbschema.Provider, error)
+	Last() (*dbschema.Provider, error)
+	Find() ([]*dbschema.Provider, error)
+	FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Provider, err error)
+	FindInBatches(result *[]*dbschema.Provider, batchSize int, fc func(tx gen.Dao, batch int) error) error
+	Pluck(column field.Expr, dest interface{}) error
+	Delete(...*dbschema.Provider) (info gen.ResultInfo, err error)
+	Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	Updates(value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error)
+	UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error)
+	UpdateColumns(value interface{}) (info gen.ResultInfo, err error)
+	UpdateFrom(q gen.SubQuery) gen.Dao
+	Attrs(attrs ...field.AssignExpr) IProviderDo
+	Assign(attrs ...field.AssignExpr) IProviderDo
+	Joins(fields ...field.RelationField) IProviderDo
+	Preload(fields ...field.RelationField) IProviderDo
+	FirstOrInit() (*dbschema.Provider, error)
+	FirstOrCreate() (*dbschema.Provider, error)
+	FindByPage(offset int, limit int) (result []*dbschema.Provider, count int64, err error)
+	ScanByPage(result interface{}, offset int, limit int) (count int64, err error)
+	Rows() (*sql.Rows, error)
+	Row() *sql.Row
+	Scan(result interface{}) (err error)
+	Returning(value interface{}, columns ...string) IProviderDo
+	UnderlyingDB() *gorm.DB
+	schema.Tabler
+}
+
+func (p providerDo) Debug() IProviderDo {
+	return p.withDO(p.DO.Debug())
+}
+
+func (p providerDo) WithContext(ctx context.Context) IProviderDo {
+	return p.withDO(p.DO.WithContext(ctx))
+}
+
+func (p providerDo) ReadDB() IProviderDo {
+	return p.Clauses(dbresolver.Read)
+}
+
+func (p providerDo) WriteDB() IProviderDo {
+	return p.Clauses(dbresolver.Write)
+}
+
+func (p providerDo) Session(config *gorm.Session) IProviderDo {
+	return p.withDO(p.DO.Session(config))
+}
+
+func (p providerDo) Clauses(conds ...clause.Expression) IProviderDo {
+	return p.withDO(p.DO.Clauses(conds...))
+}
+
+func (p providerDo) Returning(value interface{}, columns ...string) IProviderDo {
+	return p.withDO(p.DO.Returning(value, columns...))
+}
+
+func (p providerDo) Not(conds ...gen.Condition) IProviderDo {
+	return p.withDO(p.DO.Not(conds...))
+}
+
+func (p providerDo) Or(conds ...gen.Condition) IProviderDo {
+	return p.withDO(p.DO.Or(conds...))
+}
+
+func (p providerDo) Select(conds ...field.Expr) IProviderDo {
+	return p.withDO(p.DO.Select(conds...))
+}
+
+func (p providerDo) Where(conds ...gen.Condition) IProviderDo {
+	return p.withDO(p.DO.Where(conds...))
+}
+
+func (p providerDo) Order(conds ...field.Expr) IProviderDo {
+	return p.withDO(p.DO.Order(conds...))
+}
+
+func (p providerDo) Distinct(cols ...field.Expr) IProviderDo {
+	return p.withDO(p.DO.Distinct(cols...))
+}
+
+func (p providerDo) Omit(cols ...field.Expr) IProviderDo {
+	return p.withDO(p.DO.Omit(cols...))
+}
+
+func (p providerDo) Join(table schema.Tabler, on ...field.Expr) IProviderDo {
+	return p.withDO(p.DO.Join(table, on...))
+}
+
+func (p providerDo) LeftJoin(table schema.Tabler, on ...field.Expr) IProviderDo {
+	return p.withDO(p.DO.LeftJoin(table, on...))
+}
+
+func (p providerDo) RightJoin(table schema.Tabler, on ...field.Expr) IProviderDo {
+	return p.withDO(p.DO.RightJoin(table, on...))
+}
+
+func (p providerDo) Group(cols ...field.Expr) IProviderDo {
+	return p.withDO(p.DO.Group(cols...))
+}
+
+func (p providerDo) Having(conds ...gen.Condition) IProviderDo {
+	return p.withDO(p.DO.Having(conds...))
+}
+
+func (p providerDo) Limit(limit int) IProviderDo {
+	return p.withDO(p.DO.Limit(limit))
+}
+
+func (p providerDo) Offset(offset int) IProviderDo {
+	return p.withDO(p.DO.Offset(offset))
+}
+
+func (p providerDo) Scopes(funcs ...func(gen.Dao) gen.Dao) IProviderDo {
+	return p.withDO(p.DO.Scopes(funcs...))
+}
+
+func (p providerDo) Unscoped() IProviderDo {
+	return p.withDO(p.DO.Unscoped())
+}
+
+func (p providerDo) Create(values ...*dbschema.Provider) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return p.DO.Create(values)
+}
+
+func (p providerDo) CreateInBatches(values []*dbschema.Provider, batchSize int) error {
+	return p.DO.CreateInBatches(values, batchSize)
+}
+
+// Save : !!! underlying implementation is different with GORM
+// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values)
+func (p providerDo) Save(values ...*dbschema.Provider) error {
+	if len(values) == 0 {
+		return nil
+	}
+	return p.DO.Save(values)
+}
+
+func (p providerDo) First() (*dbschema.Provider, error) {
+	if result, err := p.DO.First(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Provider), nil
+	}
+}
+
+func (p providerDo) Take() (*dbschema.Provider, error) {
+	if result, err := p.DO.Take(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Provider), nil
+	}
+}
+
+func (p providerDo) Last() (*dbschema.Provider, error) {
+	if result, err := p.DO.Last(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Provider), nil
+	}
+}
+
+func (p providerDo) Find() ([]*dbschema.Provider, error) {
+	result, err := p.DO.Find()
+	return result.([]*dbschema.Provider), err
+}
+
+func (p providerDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*dbschema.Provider, err error) {
+	buf := make([]*dbschema.Provider, 0, batchSize)
+	err = p.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error {
+		defer func() { results = append(results, buf...) }()
+		return fc(tx, batch)
+	})
+	return results, err
+}
+
+func (p providerDo) FindInBatches(result *[]*dbschema.Provider, batchSize int, fc func(tx gen.Dao, batch int) error) error {
+	return p.DO.FindInBatches(result, batchSize, fc)
+}
+
+func (p providerDo) Attrs(attrs ...field.AssignExpr) IProviderDo {
+	return p.withDO(p.DO.Attrs(attrs...))
+}
+
+func (p providerDo) Assign(attrs ...field.AssignExpr) IProviderDo {
+	return p.withDO(p.DO.Assign(attrs...))
+}
+
+func (p providerDo) Joins(fields ...field.RelationField) IProviderDo {
+	for _, _f := range fields {
+		p = *p.withDO(p.DO.Joins(_f))
+	}
+	return &p
+}
+
+func (p providerDo) Preload(fields ...field.RelationField) IProviderDo {
+	for _, _f := range fields {
+		p = *p.withDO(p.DO.Preload(_f))
+	}
+	return &p
+}
+
+func (p providerDo) FirstOrInit() (*dbschema.Provider, error) {
+	if result, err := p.DO.FirstOrInit(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Provider), nil
+	}
+}
+
+func (p providerDo) FirstOrCreate() (*dbschema.Provider, error) {
+	if result, err := p.DO.FirstOrCreate(); err != nil {
+		return nil, err
+	} else {
+		return result.(*dbschema.Provider), nil
+	}
+}
+
+func (p providerDo) FindByPage(offset int, limit int) (result []*dbschema.Provider, count int64, err error) {
+	result, err = p.Offset(offset).Limit(limit).Find()
+	if err != nil {
+		return
+	}
+
+	if size := len(result); 0 < limit && 0 < size && size < limit {
+		count = int64(size + offset)
+		return
+	}
+
+	count, err = p.Offset(-1).Limit(-1).Count()
+	return
+}
+
+func (p providerDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) {
+	count, err = p.Count()
+	if err != nil {
+		return
+	}
+
+	err = p.Offset(offset).Limit(limit).Scan(result)
+	return
+}
+
+func (p providerDo) Scan(result interface{}) (err error) {
+	return p.DO.Scan(result)
+}
+
+func (p providerDo) Delete(models ...*dbschema.Provider) (result gen.ResultInfo, err error) {
+	return p.DO.Delete(models)
+}
+
+func (p *providerDo) withDO(do gen.Dao) *providerDo {
+	p.DO = *do.(*gen.DO)
+	return p
+}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/users.gen.go b/services/llm-api/internal/infrastructure/database/gormgen/users.gen.go
similarity index 65%
rename from apps/jan-api-gateway/application/app/infrastructure/database/gormgen/users.gen.go
rename to services/llm-api/internal/infrastructure/database/gormgen/users.gen.go
index 254be21f..928f7b34 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/gormgen/users.gen.go
+++ b/services/llm-api/internal/infrastructure/database/gormgen/users.gen.go
@@ -17,7 +17,7 @@ import (
 
 	"gorm.io/plugin/dbresolver"
 
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
 )
 
 func newUser(db *gorm.DB, opts ...gen.DOOption) user {
@@ -32,22 +32,13 @@ func newUser(db *gorm.DB, opts ...gen.DOOption) user {
 	_user.CreatedAt = field.NewTime(tableName, "created_at")
 	_user.UpdatedAt = field.NewTime(tableName, "updated_at")
 	_user.DeletedAt = field.NewField(tableName, "deleted_at")
-	_user.Name = field.NewString(tableName, "name")
+	_user.AuthProvider = field.NewString(tableName, "auth_provider")
+	_user.Issuer = field.NewString(tableName, "issuer")
+	_user.Subject = field.NewString(tableName, "subject")
+	_user.Username = field.NewString(tableName, "username")
 	_user.Email = field.NewString(tableName, "email")
-	_user.PublicID = field.NewString(tableName, "public_id")
-	_user.Enabled = field.NewBool(tableName, "enabled")
-	_user.IsGuest = field.NewBool(tableName, "is_guest")
-	_user.Organizations = userHasManyOrganizations{
-		db: db.Session(&gorm.Session{}),
-
-		RelationField: field.NewRelation("Organizations", "dbschema.OrganizationMember"),
-	}
-
-	_user.Projects = userHasManyProjects{
-		db: db.Session(&gorm.Session{}),
-
-		RelationField: field.NewRelation("Projects", "dbschema.ProjectMember"),
-	}
+	_user.Name = field.NewString(tableName, "name")
+	_user.Picture = field.NewString(tableName, "picture")
 
 	_user.fillFieldMap()
 
@@ -57,19 +48,18 @@ func newUser(db *gorm.DB, opts ...gen.DOOption) user {
 type user struct {
 	userDo
 
-	ALL           field.Asterisk
-	ID            field.Uint
-	CreatedAt     field.Time
-	UpdatedAt     field.Time
-	DeletedAt     field.Field
-	Name          field.String
-	Email         field.String
-	PublicID      field.String
-	Enabled       field.Bool
-	IsGuest       field.Bool
-	Organizations userHasManyOrganizations
-
-	Projects userHasManyProjects
+	ALL          field.Asterisk
+	ID           field.Uint
+	CreatedAt    field.Time
+	UpdatedAt    field.Time
+	DeletedAt    field.Field
+	AuthProvider field.String
+	Issuer       field.String
+	Subject      field.String
+	Username     field.String
+	Email        field.String
+	Name         field.String
+	Picture      field.String
 
 	fieldMap map[string]field.Expr
 }
@@ -90,11 +80,13 @@ func (u *user) updateTableName(table string) *user {
 	u.CreatedAt = field.NewTime(table, "created_at")
 	u.UpdatedAt = field.NewTime(table, "updated_at")
 	u.DeletedAt = field.NewField(table, "deleted_at")
-	u.Name = field.NewString(table, "name")
+	u.AuthProvider = field.NewString(table, "auth_provider")
+	u.Issuer = field.NewString(table, "issuer")
+	u.Subject = field.NewString(table, "subject")
+	u.Username = field.NewString(table, "username")
 	u.Email = field.NewString(table, "email")
-	u.PublicID = field.NewString(table, "public_id")
-	u.Enabled = field.NewBool(table, "enabled")
-	u.IsGuest = field.NewBool(table, "is_guest")
+	u.Name = field.NewString(table, "name")
+	u.Picture = field.NewString(table, "picture")
 
 	u.fillFieldMap()
 
@@ -116,192 +108,25 @@ func (u *user) fillFieldMap() {
 	u.fieldMap["created_at"] = u.CreatedAt
 	u.fieldMap["updated_at"] = u.UpdatedAt
 	u.fieldMap["deleted_at"] = u.DeletedAt
-	u.fieldMap["name"] = u.Name
+	u.fieldMap["auth_provider"] = u.AuthProvider
+	u.fieldMap["issuer"] = u.Issuer
+	u.fieldMap["subject"] = u.Subject
+	u.fieldMap["username"] = u.Username
 	u.fieldMap["email"] = u.Email
-	u.fieldMap["public_id"] = u.PublicID
-	u.fieldMap["enabled"] = u.Enabled
-	u.fieldMap["is_guest"] = u.IsGuest
-
+	u.fieldMap["name"] = u.Name
+	u.fieldMap["picture"] = u.Picture
 }
 
 func (u user) clone(db *gorm.DB) user {
 	u.userDo.ReplaceConnPool(db.Statement.ConnPool)
-	u.Organizations.db = db.Session(&gorm.Session{Initialized: true})
-	u.Organizations.db.Statement.ConnPool = db.Statement.ConnPool
-	u.Projects.db = db.Session(&gorm.Session{Initialized: true})
-	u.Projects.db.Statement.ConnPool = db.Statement.ConnPool
 	return u
 }
 
 func (u user) replaceDB(db *gorm.DB) user {
 	u.userDo.ReplaceDB(db)
-	u.Organizations.db = db.Session(&gorm.Session{})
-	u.Projects.db = db.Session(&gorm.Session{})
 	return u
 }
 
-type userHasManyOrganizations struct {
-	db *gorm.DB
-
-	field.RelationField
-}
-
-func (a userHasManyOrganizations) Where(conds ...field.Expr) *userHasManyOrganizations {
-	if len(conds) == 0 {
-		return &a
-	}
-
-	exprs := make([]clause.Expression, 0, len(conds))
-	for _, cond := range conds {
-		exprs = append(exprs, cond.BeCond().(clause.Expression))
-	}
-	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
-	return &a
-}
-
-func (a userHasManyOrganizations) WithContext(ctx context.Context) *userHasManyOrganizations {
-	a.db = a.db.WithContext(ctx)
-	return &a
-}
-
-func (a userHasManyOrganizations) Session(session *gorm.Session) *userHasManyOrganizations {
-	a.db = a.db.Session(session)
-	return &a
-}
-
-func (a userHasManyOrganizations) Model(m *dbschema.User) *userHasManyOrganizationsTx {
-	return &userHasManyOrganizationsTx{a.db.Model(m).Association(a.Name())}
-}
-
-func (a userHasManyOrganizations) Unscoped() *userHasManyOrganizations {
-	a.db = a.db.Unscoped()
-	return &a
-}
-
-type userHasManyOrganizationsTx struct{ tx *gorm.Association }
-
-func (a userHasManyOrganizationsTx) Find() (result []*dbschema.OrganizationMember, err error) {
-	return result, a.tx.Find(&result)
-}
-
-func (a userHasManyOrganizationsTx) Append(values ...*dbschema.OrganizationMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Append(targetValues...)
-}
-
-func (a userHasManyOrganizationsTx) Replace(values ...*dbschema.OrganizationMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Replace(targetValues...)
-}
-
-func (a userHasManyOrganizationsTx) Delete(values ...*dbschema.OrganizationMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Delete(targetValues...)
-}
-
-func (a userHasManyOrganizationsTx) Clear() error {
-	return a.tx.Clear()
-}
-
-func (a userHasManyOrganizationsTx) Count() int64 {
-	return a.tx.Count()
-}
-
-func (a userHasManyOrganizationsTx) Unscoped() *userHasManyOrganizationsTx {
-	a.tx = a.tx.Unscoped()
-	return &a
-}
-
-type userHasManyProjects struct {
-	db *gorm.DB
-
-	field.RelationField
-}
-
-func (a userHasManyProjects) Where(conds ...field.Expr) *userHasManyProjects {
-	if len(conds) == 0 {
-		return &a
-	}
-
-	exprs := make([]clause.Expression, 0, len(conds))
-	for _, cond := range conds {
-		exprs = append(exprs, cond.BeCond().(clause.Expression))
-	}
-	a.db = a.db.Clauses(clause.Where{Exprs: exprs})
-	return &a
-}
-
-func (a userHasManyProjects) WithContext(ctx context.Context) *userHasManyProjects {
-	a.db = a.db.WithContext(ctx)
-	return &a
-}
-
-func (a userHasManyProjects) Session(session *gorm.Session) *userHasManyProjects {
-	a.db = a.db.Session(session)
-	return &a
-}
-
-func (a userHasManyProjects) Model(m *dbschema.User) *userHasManyProjectsTx {
-	return &userHasManyProjectsTx{a.db.Model(m).Association(a.Name())}
-}
-
-func (a userHasManyProjects) Unscoped() *userHasManyProjects {
-	a.db = a.db.Unscoped()
-	return &a
-}
-
-type userHasManyProjectsTx struct{ tx *gorm.Association }
-
-func (a userHasManyProjectsTx) Find() (result []*dbschema.ProjectMember, err error) {
-	return result, a.tx.Find(&result)
-}
-
-func (a userHasManyProjectsTx) Append(values ...*dbschema.ProjectMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Append(targetValues...)
-}
-
-func (a userHasManyProjectsTx) Replace(values ...*dbschema.ProjectMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Replace(targetValues...)
-}
-
-func (a userHasManyProjectsTx) Delete(values ...*dbschema.ProjectMember) (err error) {
-	targetValues := make([]interface{}, len(values))
-	for i, v := range values {
-		targetValues[i] = v
-	}
-	return a.tx.Delete(targetValues...)
-}
-
-func (a userHasManyProjectsTx) Clear() error {
-	return a.tx.Clear()
-}
-
-func (a userHasManyProjectsTx) Count() int64 {
-	return a.tx.Count()
-}
-
-func (a userHasManyProjectsTx) Unscoped() *userHasManyProjectsTx {
-	a.tx = a.tx.Unscoped()
-	return &a
-}
-
 type userDo struct{ gen.DO }
 
 type IUserDo interface {
diff --git a/services/llm-api/internal/infrastructure/database/migrate.go b/services/llm-api/internal/infrastructure/database/migrate.go
new file mode 100644
index 00000000..65b96306
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/migrate.go
@@ -0,0 +1,121 @@
+package database
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io/fs"
+
+	"github.com/golang-migrate/migrate/v4"
+	"github.com/golang-migrate/migrate/v4/database/postgres"
+	iofs "github.com/golang-migrate/migrate/v4/source/iofs"
+	"gorm.io/gorm"
+
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+	"jan-server/services/llm-api/migrations"
+)
+
+// AutoMigrate applies all pending SQL migrations bundled with the service.
+func AutoMigrate(gormDB *gorm.DB) (err error) {
+	log := logger.GetLogger()
+
+	// List migration files
+	log.Info().Msg("Scanning migration files...")
+	entries, err := fs.ReadDir(migrations.FS, ".")
+	if err != nil {
+		return fmt.Errorf("read migration directory: %w", err)
+	}
+
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			log.Info().Str("file", entry.Name()).Msg("Found migration file")
+		}
+	}
+
+	sqlDB, err := gormDB.DB()
+	if err != nil {
+		return fmt.Errorf("retrieve sql db: %w", err)
+	}
+
+	// Ensure llm_api schema exists before running migrations
+	if err := gormDB.Exec("CREATE SCHEMA IF NOT EXISTS llm_api").Error; err != nil {
+		log.Warn().Err(err).Msg("Failed to create llm_api schema, may already exist")
+	} else {
+		log.Info().Msg("Created llm_api schema")
+	}
+
+	conn, err := sqlDB.Conn(context.Background())
+	if err != nil {
+		return fmt.Errorf("acquire dedicated connection: %w", err)
+	}
+
+	driver, err := postgres.WithConnection(context.Background(), conn, &postgres.Config{
+		MigrationsTable: "schema_migrations",
+		SchemaName:      "llm_api",
+	})
+	if err != nil {
+		_ = conn.Close()
+		return fmt.Errorf("initialize postgres driver: %w", err)
+	}
+	defer func() {
+		if closeErr := driver.Close(); err == nil && closeErr != nil {
+			err = fmt.Errorf("close migration connection: %w", closeErr)
+		}
+	}()
+
+	source, err := iofs.New(migrations.FS, ".")
+	if err != nil {
+		return fmt.Errorf("load migrations: %w", err)
+	}
+	defer func() {
+		if closeErr := source.Close(); err == nil && closeErr != nil {
+			err = fmt.Errorf("close migration source: %w", closeErr)
+		}
+	}()
+
+	migrator, err := migrate.NewWithInstance("iofs", source, "postgres", driver)
+	if err != nil {
+		return fmt.Errorf("create migrator: %w", err)
+	}
+
+	// Check current version and dirty state
+	version, dirty, err := migrator.Version()
+	if err != nil && !errors.Is(err, migrate.ErrNilVersion) {
+		log.Warn().Err(err).Msg("Error getting migration version")
+	} else if errors.Is(err, migrate.ErrNilVersion) {
+		log.Info().Msg("No migrations have been applied yet")
+	} else {
+		log.Info().Uint("version", version).Bool("dirty", dirty).Msg("Current migration state")
+	}
+
+	// If database is dirty, force the version to allow re-running
+	if dirty {
+		log.Warn().Uint("version", version).Msg("Database is in dirty state, forcing version...")
+		// Force to the current version to clear dirty state
+		if forceErr := migrator.Force(int(version)); forceErr != nil {
+			return fmt.Errorf("force version %d to clear dirty state: %w", version, forceErr)
+		}
+		log.Info().Msg("Dirty state cleared")
+	}
+
+	log.Info().Msg("Applying migrations...")
+	err = migrator.Up()
+	if err != nil {
+		if errors.Is(err, migrate.ErrNoChange) {
+			log.Info().Msg("No new migrations to apply")
+		} else {
+			log.Error().Err(err).Msg("Failed to apply migrations")
+			return fmt.Errorf("apply migrations: %w", err)
+		}
+	} else {
+		log.Info().Msg("Migrations applied successfully")
+	}
+
+	// Get final version
+	finalVersion, _, versionErr := migrator.Version()
+	if versionErr == nil {
+		log.Info().Uint("version", finalVersion).Msg("Current migration version")
+	}
+
+	return nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/repository/apikeyrepo/api_key_repository.go b/services/llm-api/internal/infrastructure/database/repository/apikeyrepo/api_key_repository.go
new file mode 100644
index 00000000..669281c2
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/apikeyrepo/api_key_repository.go
@@ -0,0 +1,95 @@
+package apikeyrepo
+
+import (
+	"context"
+	"time"
+
+	"gorm.io/gorm"
+
+	"jan-server/services/llm-api/internal/domain/apikey"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type Repository struct {
+	db *gorm.DB
+}
+
+func NewAPIKeyRepository(db *gorm.DB) apikey.Repository {
+	return &Repository{db: db}
+}
+
+func (r *Repository) Create(ctx context.Context, key *apikey.APIKey) (*apikey.APIKey, error) {
+	model := dbschema.FromDomain(key)
+	if err := r.db.WithContext(ctx).Create(model).Error; err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to create api key")
+	}
+	return model.EtoD(), nil
+}
+
+func (r *Repository) ListByUser(ctx context.Context, userID uint) ([]apikey.APIKey, error) {
+	var models []dbschema.APIKey
+	if err := r.db.WithContext(ctx).
+		Where("user_id = ?", userID).
+		Order("created_at DESC").
+		Find(&models).Error; err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to list api keys")
+	}
+	result := make([]apikey.APIKey, 0, len(models))
+	for _, m := range models {
+		if domain := m.EtoD(); domain != nil {
+			result = append(result, *domain)
+		}
+	}
+	return result, nil
+}
+
+func (r *Repository) FindByID(ctx context.Context, id string) (*apikey.APIKey, error) {
+	var model dbschema.APIKey
+	if err := r.db.WithContext(ctx).First(&model, "id = ?", id).Error; err != nil {
+		if err == gorm.ErrRecordNotFound {
+			return nil, nil
+		}
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to fetch api key")
+	}
+	return model.EtoD(), nil
+}
+
+func (r *Repository) CountActiveByUser(ctx context.Context, userID uint) (int64, error) {
+	var count int64
+	now := time.Now()
+	err := r.db.WithContext(ctx).
+		Model(&dbschema.APIKey{}).
+		Where("user_id = ? AND revoked_at IS NULL AND expires_at > ?", userID, now).
+		Count(&count).Error
+	if err != nil {
+		return 0, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to count api keys")
+	}
+	return count, nil
+}
+
+func (r *Repository) FindByHash(ctx context.Context, hash string) (*apikey.APIKey, error) {
+	var model dbschema.APIKey
+	if err := r.db.WithContext(ctx).Where("hash = ?", hash).First(&model).Error; err != nil {
+		if err == gorm.ErrRecordNotFound {
+			return nil, nil
+		}
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to fetch api key by hash")
+	}
+	return model.EtoD(), nil
+}
+
+func (r *Repository) MarkRevoked(ctx context.Context, id string, revokedAt time.Time) error {
+	updateErr := r.db.WithContext(ctx).Model(&dbschema.APIKey{}).
+		Where("id = ?", id).
+		Update("revoked_at", revokedAt).Error
+	if updateErr != nil {
+		return platformerrors.AsError(
+			ctx,
+			platformerrors.LayerRepository,
+			updateErr,
+			"failed to revoke api key",
+		)
+	}
+	return nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/repository/conversationrepo/conversation_repository.go b/services/llm-api/internal/infrastructure/database/repository/conversationrepo/conversation_repository.go
new file mode 100644
index 00000000..3bdf051a
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/conversationrepo/conversation_repository.go
@@ -0,0 +1,467 @@
+package conversationrepo
+
+import (
+	"context"
+
+	"jan-server/services/llm-api/internal/domain/conversation"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/infrastructure/database/gormgen"
+	"jan-server/services/llm-api/internal/infrastructure/database/transaction"
+	"jan-server/services/llm-api/internal/utils/functional"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type ConversationGormRepository struct {
+	db *transaction.Database
+}
+
+var _ conversation.ConversationRepository = (*ConversationGormRepository)(nil)
+
+func NewConversationGormRepository(db *transaction.Database) conversation.ConversationRepository {
+	return &ConversationGormRepository{db}
+}
+
+// Create implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) Create(ctx context.Context, conv *conversation.Conversation) error {
+	model := dbschema.NewSchemaConversation(conv)
+	if err := repo.db.GetQuery(ctx).Conversation.WithContext(ctx).Create(model); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to create conversation")
+	}
+	// Update the domain object with generated ID and timestamps
+	conv.ID = model.ID
+	conv.CreatedAt = model.CreatedAt
+	conv.UpdatedAt = model.UpdatedAt
+	return nil
+}
+
+// FindByFilter implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) FindByFilter(ctx context.Context, filter conversation.ConversationFilter, pagination *query.Pagination) ([]*conversation.Conversation, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.Conversation.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, filter)
+	sql = repo.applyPagination(q, sql, pagination)
+
+	rows, err := sql.Find()
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find conversations")
+	}
+
+	result := functional.Map(rows, func(item *dbschema.Conversation) *conversation.Conversation {
+		return item.EtoD()
+	})
+	return result, nil
+}
+
+// Count implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) Count(ctx context.Context, filter conversation.ConversationFilter) (int64, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.Conversation.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, filter)
+	return sql.Count()
+}
+
+// FindByID implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) FindByID(ctx context.Context, id uint) (*conversation.Conversation, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.Conversation.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, conversation.ConversationFilter{ID: &id})
+	result, err := sql.First()
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find conversation by ID")
+	}
+	return result.EtoD(), nil
+}
+
+// FindByPublicID implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) FindByPublicID(ctx context.Context, publicID string) (*conversation.Conversation, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.Conversation.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, conversation.ConversationFilter{PublicID: &publicID})
+	result, err := sql.First()
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find conversation by public ID")
+	}
+	return result.EtoD(), nil
+}
+
+// Update implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) Update(ctx context.Context, conv *conversation.Conversation) error {
+	model := dbschema.NewSchemaConversation(conv)
+	q := repo.db.GetQuery(ctx)
+
+	// Use Save to update all fields
+	if err := q.Conversation.WithContext(ctx).Where(q.Conversation.ID.Eq(conv.ID)).Save(model); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to update conversation")
+	}
+
+	// Update timestamps
+	conv.UpdatedAt = model.UpdatedAt
+	return nil
+}
+
+// Delete implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) Delete(ctx context.Context, id uint) error {
+	q := repo.db.GetQuery(ctx)
+	_, err := q.Conversation.WithContext(ctx).Where(q.Conversation.ID.Eq(id)).Delete()
+	if err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to delete conversation")
+	}
+	return nil
+}
+
+// AddItem implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) AddItem(ctx context.Context, conversationID uint, item *conversation.Item) error {
+	// Verify conversation exists
+	_, err := repo.FindByID(ctx, conversationID)
+	if err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "conversation not found")
+	}
+
+	// Set conversation ID
+	item.ConversationID = conversationID
+
+	// Create the item
+	model := dbschema.NewSchemaConversationItem(item)
+	q := repo.db.GetQuery(ctx)
+
+	if err := q.ConversationItem.WithContext(ctx).Create(model); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to create conversation item")
+	}
+
+	// Update the domain object with generated ID
+	item.ID = model.ID
+	item.CreatedAt = model.CreatedAt
+
+	return nil
+}
+
+// SearchItems implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) SearchItems(ctx context.Context, conversationID uint, searchQuery string) ([]*conversation.Item, error) {
+	// For now, this is a simple implementation
+	// In production, you'd want to use full-text search or a search engine like Elasticsearch
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyItemFilter(q, sql, conversation.ItemFilter{
+		ConversationID: &conversationID,
+	})
+
+	// Search in content JSON field (PostgreSQL JSONB search)
+	// This is a basic implementation - enhance based on your database capabilities
+	// Note: For proper JSON search in PostgreSQL, you might need raw SQL or custom query
+	rows, err := sql.Find()
+
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to search items")
+	}
+
+	result := functional.Map(rows, func(item *dbschema.ConversationItem) *conversation.Item {
+		return item.EtoD()
+	})
+
+	// TODO: Implement proper full-text search filtering
+	// For now, returning all items in the conversation
+	return result, nil
+}
+
+// BulkAddItems implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) BulkAddItems(ctx context.Context, conversationID uint, items []*conversation.Item) error {
+	if len(items) == 0 {
+		return nil
+	}
+
+	// Verify conversation exists
+	_, err := repo.FindByID(ctx, conversationID)
+	if err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "conversation not found")
+	}
+
+	// Set conversation ID for all items
+	for _, item := range items {
+		item.ConversationID = conversationID
+	}
+
+	// Convert to schema models
+	models := functional.Map(items, func(item *conversation.Item) *dbschema.ConversationItem {
+		return dbschema.NewSchemaConversationItem(item)
+	})
+
+	// Bulk insert with manual batching to ensure ID population
+	q := repo.db.GetQuery(ctx)
+	batchSize := 100
+
+	// Process in batches
+	for i := 0; i < len(models); i += batchSize {
+		end := i + batchSize
+		if end > len(models) {
+			end = len(models)
+		}
+
+		batch := models[i:end]
+		if err := q.ConversationItem.WithContext(ctx).Create(batch...); err != nil {
+			return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to bulk create items")
+		}
+
+		// Update domain objects with generated IDs for this batch
+		for j, model := range batch {
+			items[i+j].ID = model.ID
+			items[i+j].CreatedAt = model.CreatedAt
+		}
+	}
+
+	return nil
+}
+
+// GetItemByID implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) GetItemByID(ctx context.Context, conversationID uint, itemID uint) (*conversation.Item, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyItemFilter(q, sql, conversation.ItemFilter{
+		ID:             &itemID,
+		ConversationID: &conversationID,
+	})
+	result, err := sql.First()
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find item by ID")
+	}
+	return result.EtoD(), nil
+}
+
+// GetItemByPublicID implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) GetItemByPublicID(ctx context.Context, conversationID uint, publicID string) (*conversation.Item, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyItemFilter(q, sql, conversation.ItemFilter{
+		PublicID:       &publicID,
+		ConversationID: &conversationID,
+	})
+	result, err := sql.First()
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find item by public ID")
+	}
+	return result.EtoD(), nil
+}
+
+// DeleteItem implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) DeleteItem(ctx context.Context, conversationID uint, itemID uint) error {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyItemFilter(q, sql, conversation.ItemFilter{
+		ID:             &itemID,
+		ConversationID: &conversationID,
+	})
+	_, err := sql.Delete()
+	if err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to delete item")
+	}
+	return nil
+}
+
+// CountItems implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) CountItems(ctx context.Context, conversationID uint, branchName string) (int, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyItemFilter(q, sql, conversation.ItemFilter{
+		ConversationID: &conversationID,
+	})
+
+	// For now, we count all items since branch filtering isn't fully implemented in gormgen
+	count, err := sql.Count()
+
+	if err != nil {
+		return 0, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to count items")
+	}
+
+	return int(count), nil
+}
+
+// Branch operations
+// CreateBranch implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) CreateBranch(ctx context.Context, conversationID uint, branchName string, metadata *conversation.BranchMetadata) error {
+	// Verify conversation exists
+	_, err := repo.FindByID(ctx, conversationID)
+	if err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "conversation not found")
+	}
+
+	// TODO: Implement branch storage in database
+	// For now, return not implemented error
+	return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "branch operations not yet implemented in database layer", nil, "")
+}
+
+// GetBranch implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) GetBranch(ctx context.Context, conversationID uint, branchName string) (*conversation.BranchMetadata, error) {
+	return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "branch operations not yet implemented in database layer", nil, "")
+}
+
+// ListBranches implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) ListBranches(ctx context.Context, conversationID uint) ([]*conversation.BranchMetadata, error) {
+	return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "branch operations not yet implemented in database layer", nil, "")
+}
+
+// DeleteBranch implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) DeleteBranch(ctx context.Context, conversationID uint, branchName string) error {
+	return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "branch operations not yet implemented in database layer", nil, "")
+}
+
+// SetActiveBranch implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) SetActiveBranch(ctx context.Context, conversationID uint, branchName string) error {
+	return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "branch operations not yet implemented in database layer", nil, "")
+}
+
+// Branch item operations
+// AddItemToBranch implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) AddItemToBranch(ctx context.Context, conversationID uint, branchName string, item *conversation.Item) error {
+	// For now, branch operations are not implemented
+	// Default to MAIN branch behavior
+	if branchName == "MAIN" || branchName == "" {
+		return repo.AddItem(ctx, conversationID, item)
+	}
+	return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "branch operations not yet implemented in database layer", nil, "")
+}
+
+// GetBranchItems implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) GetBranchItems(ctx context.Context, conversationID uint, branchName string, pagination *query.Pagination) ([]*conversation.Item, error) {
+	// For now, return items for MAIN branch with pagination support
+	if branchName == "MAIN" || branchName == "" {
+		q := repo.db.GetQuery(ctx)
+		sql := q.ConversationItem.WithContext(ctx)
+		sql = repo.applyItemFilter(q, sql, conversation.ItemFilter{
+			ConversationID: &conversationID,
+		})
+		sql = repo.applyItemPagination(q, sql, pagination)
+
+		rows, err := sql.Find()
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to get branch items")
+		}
+
+		return functional.Map(rows, func(item *dbschema.ConversationItem) *conversation.Item {
+			return item.EtoD()
+		}), nil
+	}
+	return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "branch operations not yet implemented in database layer", nil, "")
+}
+
+// applyItemPagination applies pagination to item queries
+func (repo *ConversationGormRepository) applyItemPagination(q *gormgen.Query, sql gormgen.IConversationItemDo, p *query.Pagination) gormgen.IConversationItemDo {
+	if p != nil {
+		// Apply cursor-based pagination
+		if p.After != nil {
+			if p.Order == "desc" {
+				sql = sql.Where(q.ConversationItem.ID.Lt(*p.After))
+			} else {
+				sql = sql.Where(q.ConversationItem.ID.Gt(*p.After))
+			}
+		}
+
+		// Apply ordering (default to ascending by ID)
+		if p.Order == "desc" {
+			sql = sql.Order(q.ConversationItem.ID.Desc())
+		} else {
+			sql = sql.Order(q.ConversationItem.ID.Asc())
+		}
+
+		// Apply limit
+		if p.Limit != nil && *p.Limit > 0 {
+			sql = sql.Limit(*p.Limit)
+		}
+	} else {
+		// Default ordering when no pagination specified
+		sql = sql.Order(q.ConversationItem.ID.Asc())
+	}
+	return sql
+}
+
+// BulkAddItemsToBranch implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) BulkAddItemsToBranch(ctx context.Context, conversationID uint, branchName string, items []*conversation.Item) error {
+	// For now, branch operations are not implemented
+	// Default to MAIN branch behavior
+	if branchName == "MAIN" || branchName == "" {
+		return repo.BulkAddItems(ctx, conversationID, items)
+	}
+	return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "branch operations not yet implemented in database layer", nil, "")
+}
+
+// ForkBranch implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) ForkBranch(ctx context.Context, conversationID uint, sourceBranch, newBranch string, fromItemID string, description *string) error {
+	return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "branch operations not yet implemented in database layer", nil, "")
+}
+
+// Item rating operations
+// RateItem implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) RateItem(ctx context.Context, conversationID uint, itemID string, rating conversation.ItemRating, comment *string) error {
+	// TODO: Implement rating storage in database
+	// For now, return not implemented error
+	return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "rating operations not yet implemented in database layer", nil, "")
+}
+
+// GetItemRating implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) GetItemRating(ctx context.Context, conversationID uint, itemID string) (*conversation.ItemRating, error) {
+	return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "rating operations not yet implemented in database layer", nil, "")
+}
+
+// RemoveItemRating implements conversation.ConversationRepository.
+func (repo *ConversationGormRepository) RemoveItemRating(ctx context.Context, conversationID uint, itemID string) error {
+	return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotImplemented, "rating operations not yet implemented in database layer", nil, "")
+}
+
+// applyFilter applies filter conditions to the query
+func (repo *ConversationGormRepository) applyFilter(q *gormgen.Query, sql gormgen.IConversationDo, filter conversation.ConversationFilter) gormgen.IConversationDo {
+	if filter.ID != nil {
+		sql = sql.Where(q.Conversation.ID.Eq(*filter.ID))
+	}
+	if filter.PublicID != nil {
+		sql = sql.Where(q.Conversation.PublicID.Eq(*filter.PublicID))
+	}
+	if filter.UserID != nil {
+		sql = sql.Where(q.Conversation.UserID.Eq(*filter.UserID))
+	}
+	if filter.Referrer != nil && *filter.Referrer != "" {
+		sql = sql.Where(q.Conversation.Referrer.Eq(*filter.Referrer))
+	}
+	return sql
+}
+
+// applyItemFilter applies filter conditions to the conversation item query
+func (repo *ConversationGormRepository) applyItemFilter(q *gormgen.Query, sql gormgen.IConversationItemDo, filter conversation.ItemFilter) gormgen.IConversationItemDo {
+	if filter.ID != nil {
+		sql = sql.Where(q.ConversationItem.ID.Eq(*filter.ID))
+	}
+	if filter.PublicID != nil {
+		sql = sql.Where(q.ConversationItem.PublicID.Eq(*filter.PublicID))
+	}
+	if filter.ConversationID != nil {
+		sql = sql.Where(q.ConversationItem.ConversationID.Eq(*filter.ConversationID))
+	}
+	if filter.Role != nil {
+		roleStr := string(*filter.Role)
+		sql = sql.Where(q.ConversationItem.Role.Eq(roleStr))
+	}
+	if filter.ResponseID != nil {
+		sql = sql.Where(q.ConversationItem.ResponseID.Eq(*filter.ResponseID))
+	}
+	return sql
+}
+
+// applyPagination applies pagination to the query
+func (repo *ConversationGormRepository) applyPagination(q *gormgen.Query, sql gormgen.IConversationDo, p *query.Pagination) gormgen.IConversationDo {
+	if p != nil {
+		if p.Limit != nil && *p.Limit > 0 {
+			sql = sql.Limit(*p.Limit)
+		}
+		if p.After != nil {
+			if p.Order == "desc" {
+				sql = sql.Where(q.Conversation.ID.Lt(*p.After))
+			} else {
+				sql = sql.Where(q.Conversation.ID.Gt(*p.After))
+			}
+		}
+		if p.Order == "desc" {
+			sql = sql.Order(q.Conversation.ID.Desc())
+		} else {
+			sql = sql.Order(q.Conversation.ID.Asc())
+		}
+	}
+	return sql
+}
diff --git a/services/llm-api/internal/infrastructure/database/repository/conversationrepo/item_repository.go b/services/llm-api/internal/infrastructure/database/repository/conversationrepo/item_repository.go
new file mode 100644
index 00000000..d5a49ba2
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/conversationrepo/item_repository.go
@@ -0,0 +1,233 @@
+package conversationrepo
+
+import (
+	"context"
+
+	"jan-server/services/llm-api/internal/domain/conversation"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/infrastructure/database/gormgen"
+	"jan-server/services/llm-api/internal/infrastructure/database/transaction"
+	"jan-server/services/llm-api/internal/utils/functional"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type ItemGormRepository struct {
+	db *transaction.Database
+}
+
+var _ conversation.ItemRepository = (*ItemGormRepository)(nil)
+
+func NewItemGormRepository(db *transaction.Database) conversation.ItemRepository {
+	return &ItemGormRepository{db}
+}
+
+// Create implements conversation.ItemRepository.
+func (repo *ItemGormRepository) Create(ctx context.Context, item *conversation.Item) error {
+	model := dbschema.NewSchemaConversationItem(item)
+	if err := repo.db.GetQuery(ctx).ConversationItem.WithContext(ctx).Create(model); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to create item")
+	}
+	// Update the domain object with generated ID and timestamps
+	item.ID = model.ID
+	item.CreatedAt = model.CreatedAt
+	return nil
+}
+
+// FindByID implements conversation.ItemRepository.
+func (repo *ItemGormRepository) FindByID(ctx context.Context, id uint) (*conversation.Item, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, conversation.ItemFilter{ID: &id})
+	result, err := sql.First()
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find item by ID")
+	}
+	return result.EtoD(), nil
+}
+
+// FindByPublicID implements conversation.ItemRepository.
+func (repo *ItemGormRepository) FindByPublicID(ctx context.Context, publicID string) (*conversation.Item, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, conversation.ItemFilter{PublicID: &publicID})
+	result, err := sql.First()
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find item by public ID")
+	}
+	return result.EtoD(), nil
+}
+
+// FindByConversationID implements conversation.ItemRepository.
+func (repo *ItemGormRepository) FindByConversationID(ctx context.Context, conversationID uint) ([]*conversation.Item, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, conversation.ItemFilter{ConversationID: &conversationID})
+	rows, err := sql.Order(q.ConversationItem.CreatedAt.Asc()).Find()
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find items by conversation ID")
+	}
+
+	result := functional.Map(rows, func(item *dbschema.ConversationItem) *conversation.Item {
+		return item.EtoD()
+	})
+	return result, nil
+}
+
+// Search implements conversation.ItemRepository.
+func (repo *ItemGormRepository) Search(ctx context.Context, conversationID uint, searchQuery string) ([]*conversation.Item, error) {
+	// For now, this is a simple implementation
+	// In production, you'd want to use full-text search or a search engine
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, conversation.ItemFilter{ConversationID: &conversationID})
+
+	// Basic search - in production, enhance with proper full-text search
+	rows, err := sql.Order(q.ConversationItem.CreatedAt.Asc()).Find()
+
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to search items")
+	}
+
+	result := functional.Map(rows, func(item *dbschema.ConversationItem) *conversation.Item {
+		return item.EtoD()
+	})
+
+	// TODO: Implement proper full-text search filtering based on searchQuery
+	return result, nil
+}
+
+// Delete implements conversation.ItemRepository.
+func (repo *ItemGormRepository) Delete(ctx context.Context, id uint) error {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, conversation.ItemFilter{ID: &id})
+	_, err := sql.Delete()
+	if err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to delete item")
+	}
+	return nil
+}
+
+// BulkCreate implements conversation.ItemRepository.
+func (repo *ItemGormRepository) BulkCreate(ctx context.Context, items []*conversation.Item) error {
+	if len(items) == 0 {
+		return nil
+	}
+
+	// Convert to schema models
+	models := functional.Map(items, func(item *conversation.Item) *dbschema.ConversationItem {
+		return dbschema.NewSchemaConversationItem(item)
+	})
+
+	// Bulk insert
+	q := repo.db.GetQuery(ctx)
+	if err := q.ConversationItem.WithContext(ctx).CreateInBatches(models, 100); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to bulk create items")
+	}
+
+	// Update domain objects with generated IDs
+	for i, model := range models {
+		items[i].ID = model.ID
+		items[i].CreatedAt = model.CreatedAt
+	}
+
+	return nil
+}
+
+// CountByConversation implements conversation.ItemRepository.
+func (repo *ItemGormRepository) CountByConversation(ctx context.Context, conversationID uint) (int64, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, conversation.ItemFilter{ConversationID: &conversationID})
+	count, err := sql.Count()
+	if err != nil {
+		return 0, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to count items by conversation")
+	}
+	return count, nil
+}
+
+// ExistsByIDAndConversation implements conversation.ItemRepository.
+func (repo *ItemGormRepository) ExistsByIDAndConversation(ctx context.Context, itemID uint, conversationID uint) (bool, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, conversation.ItemFilter{
+		ID:             &itemID,
+		ConversationID: &conversationID,
+	})
+	count, err := sql.Count()
+	if err != nil {
+		return false, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to check item existence")
+	}
+	return count > 0, nil
+}
+
+// FindByFilter implements conversation.ItemRepository.
+func (repo *ItemGormRepository) FindByFilter(ctx context.Context, filter conversation.ItemFilter, pagination *query.Pagination) ([]*conversation.Item, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, filter)
+	sql = repo.applyPagination(q, sql, pagination)
+
+	rows, err := sql.Find()
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find items by filter")
+	}
+
+	result := functional.Map(rows, func(item *dbschema.ConversationItem) *conversation.Item {
+		return item.EtoD()
+	})
+	return result, nil
+}
+
+// Count implements conversation.ItemRepository.
+func (repo *ItemGormRepository) Count(ctx context.Context, filter conversation.ItemFilter) (int64, error) {
+	q := repo.db.GetQuery(ctx)
+	sql := q.ConversationItem.WithContext(ctx)
+	sql = repo.applyFilter(q, sql, filter)
+	count, err := sql.Count()
+	if err != nil {
+		return 0, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to count items")
+	}
+	return count, nil
+}
+
+// applyFilter applies filter conditions to the query
+func (repo *ItemGormRepository) applyFilter(q *gormgen.Query, sql gormgen.IConversationItemDo, filter conversation.ItemFilter) gormgen.IConversationItemDo {
+	if filter.PublicID != nil {
+		sql = sql.Where(q.ConversationItem.PublicID.Eq(*filter.PublicID))
+	}
+	if filter.ConversationID != nil {
+		sql = sql.Where(q.ConversationItem.ConversationID.Eq(*filter.ConversationID))
+	}
+	if filter.Role != nil {
+		roleStr := string(*filter.Role)
+		sql = sql.Where(q.ConversationItem.Role.Eq(roleStr))
+	}
+	if filter.ResponseID != nil {
+		sql = sql.Where(q.ConversationItem.ResponseID.Eq(*filter.ResponseID))
+	}
+	return sql
+}
+
+// applyPagination applies pagination to the query
+func (repo *ItemGormRepository) applyPagination(q *gormgen.Query, sql gormgen.IConversationItemDo, p *query.Pagination) gormgen.IConversationItemDo {
+	if p != nil {
+		if p.Limit != nil && *p.Limit > 0 {
+			sql = sql.Limit(*p.Limit)
+		}
+		if p.After != nil {
+			if p.Order == "desc" {
+				sql = sql.Where(q.ConversationItem.ID.Lt(*p.After))
+			} else {
+				sql = sql.Where(q.ConversationItem.ID.Gt(*p.After))
+			}
+		}
+		if p.Order == "desc" {
+			sql = sql.Order(q.ConversationItem.ID.Desc())
+		} else {
+			sql = sql.Order(q.ConversationItem.ID.Asc())
+		}
+	}
+	return sql
+}
diff --git a/services/llm-api/internal/infrastructure/database/repository/modelrepo/model_catalog_repository.go b/services/llm-api/internal/infrastructure/database/repository/modelrepo/model_catalog_repository.go
new file mode 100644
index 00000000..19f202ff
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/modelrepo/model_catalog_repository.go
@@ -0,0 +1,210 @@
+package modelrepo
+
+import (
+	"context"
+	"errors"
+
+	"gorm.io/gorm"
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/infrastructure/database/gormgen"
+	"jan-server/services/llm-api/internal/infrastructure/database/transaction"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type ModelCatalogGormRepository struct {
+	db *transaction.Database
+}
+
+var _ domainmodel.ModelCatalogRepository = (*ModelCatalogGormRepository)(nil)
+
+func NewModelCatalogGormRepository(db *transaction.Database) domainmodel.ModelCatalogRepository {
+	return &ModelCatalogGormRepository{db: db}
+}
+
+func (repo *ModelCatalogGormRepository) applyFilter(query *gormgen.Query, sql gormgen.IModelCatalogDo, filter domainmodel.ModelCatalogFilter) gormgen.IModelCatalogDo {
+	if filter.IDs != nil && len(*filter.IDs) > 0 {
+		sql = sql.Where(query.ModelCatalog.ID.In((*filter.IDs)...))
+	}
+	if filter.PublicID != nil {
+		sql = sql.Where(query.ModelCatalog.PublicID.Eq(*filter.PublicID))
+	}
+	if filter.IsModerated != nil {
+		sql = sql.Where(query.ModelCatalog.IsModerated.Is(*filter.IsModerated))
+	}
+
+	if filter.Active != nil {
+		sql = sql.Where(query.ModelCatalog.Active.Is(*filter.Active))
+	}
+	if filter.Status != nil {
+		sql = sql.Where(query.ModelCatalog.Status.Eq(string(*filter.Status)))
+	}
+	return sql
+}
+
+func (repo *ModelCatalogGormRepository) Create(ctx context.Context, catalog *domainmodel.ModelCatalog) error {
+	model, err := dbschema.NewSchemaModelCatalog(catalog)
+	if err != nil {
+		return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeValidation, "failed to convert model catalog to schema", err, "4850d796-eba9-4027-822a-8c1db9633fe0")
+	}
+	query := repo.db.GetQuery(ctx)
+	if err := query.ModelCatalog.WithContext(ctx).Create(model); err != nil {
+		return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeDatabaseError, "failed to create model catalog", err, "576a4099-91ff-4af7-b53a-898336b6ac94")
+	}
+	catalog.ID = model.ID
+	catalog.CreatedAt = model.CreatedAt
+	catalog.UpdatedAt = model.UpdatedAt
+	catalog.Status = domainmodel.ModelCatalogStatus(model.Status)
+	return nil
+}
+
+func (repo *ModelCatalogGormRepository) Update(ctx context.Context, catalog *domainmodel.ModelCatalog) error {
+	model, err := dbschema.NewSchemaModelCatalog(catalog)
+	if err != nil {
+		return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeValidation, "failed to convert model catalog to schema", err, "01276cbf-469c-4f3c-ae07-d572baf74f87")
+	}
+	query := repo.db.GetQuery(ctx)
+	_, err = query.ModelCatalog.WithContext(ctx).Where(query.ModelCatalog.ID.Eq(model.ID)).Updates(model)
+	return err
+
+}
+
+func (repo *ModelCatalogGormRepository) DeleteByID(ctx context.Context, id uint) error {
+	query := repo.db.GetQuery(ctx)
+	_, err := query.ModelCatalog.WithContext(ctx).
+		Where(query.ModelCatalog.ID.Eq(id)).
+		Delete(&dbschema.ModelCatalog{})
+	if err != nil {
+		return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeDatabaseError, "failed to delete model catalog", err, "27d71df3-0b21-4793-8042-19d3df51ac01")
+	}
+	return nil
+}
+
+func (repo *ModelCatalogGormRepository) FindByID(ctx context.Context, id uint) (*domainmodel.ModelCatalog, error) {
+	query := repo.db.GetQuery(ctx)
+	model, err := query.ModelCatalog.WithContext(ctx).Where(query.ModelCatalog.ID.Eq(id)).First()
+	if err != nil {
+		if errors.Is(err, gorm.ErrRecordNotFound) {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotFound, "model catalog not found", err, "d97cf4f2-b638-443b-9c4b-afe1de66fe25")
+		}
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeDatabaseError, "failed to find model catalog by ID", err, "dc31efe7-13e0-41df-9f20-ac366aa4d437")
+	}
+	catalog, err := model.EtoD()
+	if err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeInternal, "failed to convert model catalog from schema", err, "6fe551cc-e2c9-40ff-915b-5e88b84126b6")
+	}
+	return catalog, nil
+}
+
+func (repo *ModelCatalogGormRepository) FindByPublicID(ctx context.Context, publicID string) (*domainmodel.ModelCatalog, error) {
+	filter := domainmodel.ModelCatalogFilter{
+		PublicID: &publicID,
+	}
+	results, err := repo.FindByFilter(ctx, filter, nil)
+	if err != nil {
+		return nil, err
+	}
+	if len(results) == 0 {
+		return nil, platformerrors.NewErrorWithContext(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotFound, "model catalog not found", nil, "772954bd-28fa-46f6-9237-057a1e61f8fd", map[string]any{
+			"public_id": publicID,
+		})
+	}
+	if len(results) > 1 {
+		return nil, platformerrors.NewErrorWithContext(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeTooManyRecords,
+			"multiple model catalogs found with same public ID", nil, "", map[string]any{
+				"public_id": publicID,
+				"count":     len(results),
+			})
+	}
+	return results[0], nil
+}
+
+func (repo *ModelCatalogGormRepository) FindByFilter(ctx context.Context, filter domainmodel.ModelCatalogFilter, p *query.Pagination) ([]*domainmodel.ModelCatalog, error) {
+	query := repo.db.GetQuery(ctx)
+	sql := query.ModelCatalog.WithContext(ctx)
+	sql = repo.applyFilter(query, sql, filter)
+	if p != nil {
+		if p.Limit != nil && *p.Limit > 0 {
+			sql = sql.Limit(*p.Limit)
+		}
+		if p.Offset != nil && *p.Offset >= 0 {
+			sql = sql.Offset(*p.Offset)
+		}
+		if p.Order == "desc" {
+			sql = sql.Order(query.ModelCatalog.CreatedAt.Desc())
+		} else {
+			sql = sql.Order(query.ModelCatalog.CreatedAt.Asc())
+		}
+	}
+	rows, err := sql.Find()
+	if err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeDatabaseError, "failed to find model catalogs by filter", err, "982b14bf-11b8-4e96-9e74-b0fb689d81c1")
+	}
+	result := make([]*domainmodel.ModelCatalog, 0, len(rows))
+	for _, item := range rows {
+		domainItem, err := item.EtoD()
+		if err != nil {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeInternal, "failed to convert model catalog from schema", err, "4e448960-8811-4401-a385-fd658ce54816")
+		}
+		result = append(result, domainItem)
+	}
+	return result, nil
+}
+
+func (repo *ModelCatalogGormRepository) Count(ctx context.Context, filter domainmodel.ModelCatalogFilter) (int64, error) {
+	query := repo.db.GetQuery(ctx)
+	sql := query.ModelCatalog.WithContext(ctx)
+	sql = repo.applyFilter(query, sql, filter)
+	count, err := sql.Count()
+	if err != nil {
+		return 0, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeDatabaseError, "failed to count model catalogs", err, "7830ad79-8377-4ae8-bf7e-3c9e1382644c")
+	}
+	return count, nil
+}
+
+func (repo *ModelCatalogGormRepository) BatchUpdateActive(ctx context.Context, filter domainmodel.ModelCatalogFilter, active bool) (int64, error) {
+	query := repo.db.GetQuery(ctx)
+	sql := query.ModelCatalog.WithContext(ctx)
+	sql = repo.applyFilter(query, sql, filter)
+	result, err := sql.Update(query.ModelCatalog.Active, active)
+	if err != nil {
+		return 0, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeDatabaseError, "failed to batch update model catalog active status", err, "e1878f1b-e6ca-4753-926a-21c41c02c201")
+	}
+	return result.RowsAffected, nil
+}
+
+func (repo *ModelCatalogGormRepository) FindByIDs(ctx context.Context, ids []uint) ([]*domainmodel.ModelCatalog, error) {
+	if len(ids) == 0 {
+		return []*domainmodel.ModelCatalog{}, nil
+	}
+
+	filter := domainmodel.ModelCatalogFilter{
+		IDs: &ids,
+	}
+	return repo.FindByFilter(ctx, filter, nil)
+}
+
+func (repo *ModelCatalogGormRepository) FindByPublicIDs(ctx context.Context, publicIDs []string) ([]*domainmodel.ModelCatalog, error) {
+	if len(publicIDs) == 0 {
+		return []*domainmodel.ModelCatalog{}, nil
+	}
+
+	query := repo.db.GetQuery(ctx)
+	rows, err := query.ModelCatalog.WithContext(ctx).
+		Where(query.ModelCatalog.PublicID.In(publicIDs...)).
+		Find()
+	if err != nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeDatabaseError, "failed to find model catalogs by public IDs", err, "fcd6a1a6-b9bd-43f4-8f57-dbfa21a6d489")
+	}
+
+	catalogs := make([]*domainmodel.ModelCatalog, 0, len(rows))
+	for _, item := range rows {
+		catalog, err := item.EtoD()
+		if err != nil {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeInternal, "failed to convert model catalog from schema", err, "3f55f3ea-b139-407d-aa43-3952881de22f")
+		}
+		catalogs = append(catalogs, catalog)
+	}
+	return catalogs, nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/repository/modelrepo/provider_model_repository.go b/services/llm-api/internal/infrastructure/database/repository/modelrepo/provider_model_repository.go
new file mode 100644
index 00000000..6dd31630
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/modelrepo/provider_model_repository.go
@@ -0,0 +1,169 @@
+package modelrepo
+
+import (
+	"context"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/infrastructure/database/gormgen"
+	"jan-server/services/llm-api/internal/infrastructure/database/transaction"
+)
+
+type ProviderModelGormRepository struct {
+	db *transaction.Database
+}
+
+var _ domainmodel.ProviderModelRepository = (*ProviderModelGormRepository)(nil)
+
+func NewProviderModelGormRepository(db *transaction.Database) domainmodel.ProviderModelRepository {
+	return &ProviderModelGormRepository{db: db}
+}
+
+func (repo *ProviderModelGormRepository) applyFilter(query *gormgen.Query, sql gormgen.IProviderModelDo, filter domainmodel.ProviderModelFilter) gormgen.IProviderModelDo {
+	if filter.IDs != nil && len(*filter.IDs) > 0 {
+		sql = sql.Where(query.ProviderModel.ID.In((*filter.IDs)...))
+	}
+	if filter.PublicID != nil {
+		sql = sql.Where(query.ProviderModel.PublicID.Eq(*filter.PublicID))
+	}
+	if filter.ProviderID != nil {
+		sql = sql.Where(query.ProviderModel.ProviderID.Eq(*filter.ProviderID))
+	}
+	if filter.ProviderIDs != nil && len(*filter.ProviderIDs) > 0 {
+		sql = sql.Where(query.ProviderModel.ProviderID.In((*filter.ProviderIDs)...))
+	}
+	if filter.ModelCatalogID != nil {
+		sql = sql.Where(query.ProviderModel.ModelCatalogID.Eq(*filter.ModelCatalogID))
+	}
+	if filter.ModelPublicID != nil {
+		sql = sql.Where(query.ProviderModel.ModelPublicID.Eq(*filter.ModelPublicID))
+	}
+	if filter.ModelPublicIDs != nil && len(*filter.ModelPublicIDs) > 0 {
+		sql = sql.Where(query.ProviderModel.ModelPublicID.In((*filter.ModelPublicIDs)...))
+	}
+	if filter.Active != nil {
+		sql = sql.Where(query.ProviderModel.Active.Is(*filter.Active))
+	}
+	if filter.SupportsImages != nil {
+		sql = sql.Where(query.ProviderModel.SupportsImages.Is(*filter.SupportsImages))
+	}
+	if filter.SupportsEmbeddings != nil {
+		sql = sql.Where(query.ProviderModel.SupportsEmbeddings.Is(*filter.SupportsEmbeddings))
+	}
+	if filter.SupportsReasoning != nil {
+		sql = sql.Where(query.ProviderModel.SupportsReasoning.Is(*filter.SupportsReasoning))
+	}
+	return sql
+}
+
+func (repo *ProviderModelGormRepository) Create(ctx context.Context, model *domainmodel.ProviderModel) error {
+	schemaModel, err := dbschema.NewSchemaProviderModel(model)
+	if err != nil {
+		return err
+	}
+	query := repo.db.GetQuery(ctx)
+	if err := query.ProviderModel.WithContext(ctx).Create(schemaModel); err != nil {
+		return err
+	}
+	model.ID = schemaModel.ID
+	model.CreatedAt = schemaModel.CreatedAt
+	model.UpdatedAt = schemaModel.UpdatedAt
+	return nil
+}
+
+func (repo *ProviderModelGormRepository) Update(ctx context.Context, model *domainmodel.ProviderModel) error {
+	schemaModel, err := dbschema.NewSchemaProviderModel(model)
+	if err != nil {
+		return err
+	}
+	query := repo.db.GetQuery(ctx)
+	_, err = query.ProviderModel.WithContext(ctx).Where(query.ProviderModel.ID.Eq(model.ID)).Updates(schemaModel)
+	return err
+}
+
+func (repo *ProviderModelGormRepository) DeleteByID(ctx context.Context, id uint) error {
+	query := repo.db.GetQuery(ctx)
+	_, err := query.ProviderModel.WithContext(ctx).Where(query.ProviderModel.ID.Eq(id)).Delete(&dbschema.ProviderModel{})
+	return err
+}
+
+func (repo *ProviderModelGormRepository) FindByID(ctx context.Context, id uint) (*domainmodel.ProviderModel, error) {
+	query := repo.db.GetQuery(ctx)
+	schemaModel, err := query.ProviderModel.WithContext(ctx).Where(query.ProviderModel.ID.Eq(id)).First()
+	if err != nil {
+		return nil, err
+	}
+	return schemaModel.EtoD()
+}
+
+func (repo *ProviderModelGormRepository) FindByPublicID(ctx context.Context, publicID string) (*domainmodel.ProviderModel, error) {
+	filter := domainmodel.ProviderModelFilter{
+		PublicID: &publicID,
+	}
+	results, err := repo.FindByFilter(ctx, filter, nil)
+	if err != nil {
+		return nil, err
+	}
+	if len(results) == 0 {
+		return nil, nil
+	}
+	return results[0], nil
+}
+
+func (repo *ProviderModelGormRepository) FindByFilter(ctx context.Context, filter domainmodel.ProviderModelFilter, p *query.Pagination) ([]*domainmodel.ProviderModel, error) {
+	query := repo.db.GetQuery(ctx)
+	sql := query.ProviderModel.WithContext(ctx)
+	sql = repo.applyFilter(query, sql, filter)
+	if p != nil {
+		if p.Limit != nil && *p.Limit > 0 {
+			sql = sql.Limit(*p.Limit)
+		}
+		if p.Offset != nil && *p.Offset >= 0 {
+			sql = sql.Offset(*p.Offset)
+		}
+		if p.After != nil {
+			if p.Order == "desc" {
+				sql = sql.Where(query.ProviderModel.ID.Lt(*p.After))
+			} else {
+				sql = sql.Where(query.ProviderModel.ID.Gt(*p.After))
+			}
+		}
+		if p.Order == "desc" {
+			sql = sql.Order(query.ProviderModel.ID.Desc())
+		} else {
+			sql = sql.Order(query.ProviderModel.ID.Asc())
+		}
+	}
+	rows, err := sql.Find()
+	if err != nil {
+		return nil, err
+	}
+	result := make([]*domainmodel.ProviderModel, 0, len(rows))
+	for _, item := range rows {
+		domainItem, err := item.EtoD()
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, domainItem)
+	}
+	return result, nil
+}
+
+func (repo *ProviderModelGormRepository) Count(ctx context.Context, filter domainmodel.ProviderModelFilter) (int64, error) {
+	query := repo.db.GetQuery(ctx)
+	sql := query.ProviderModel.WithContext(ctx)
+	sql = repo.applyFilter(query, sql, filter)
+	return sql.Count()
+}
+
+func (repo *ProviderModelGormRepository) BatchUpdateActive(ctx context.Context, filter domainmodel.ProviderModelFilter, active bool) (int64, error) {
+	query := repo.db.GetQuery(ctx)
+	sql := query.ProviderModel.WithContext(ctx)
+	sql = repo.applyFilter(query, sql, filter)
+	result, err := sql.Update(query.ProviderModel.Active, active)
+	if err != nil {
+		return 0, err
+	}
+	return result.RowsAffected, nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/repository/modelrepo/provider_repository.go b/services/llm-api/internal/infrastructure/database/repository/modelrepo/provider_repository.go
new file mode 100644
index 00000000..f5e2cce8
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/modelrepo/provider_repository.go
@@ -0,0 +1,148 @@
+package modelrepo
+
+import (
+	"context"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/infrastructure/database/gormgen"
+	"jan-server/services/llm-api/internal/infrastructure/database/transaction"
+	"jan-server/services/llm-api/internal/utils/functional"
+)
+
+type ProviderGormRepository struct {
+	db *transaction.Database
+}
+
+var _ domainmodel.ProviderRepository = (*ProviderGormRepository)(nil)
+
+func NewProviderGormRepository(db *transaction.Database) domainmodel.ProviderRepository {
+	return &ProviderGormRepository{db: db}
+}
+
+func (repo *ProviderGormRepository) applyFilter(query *gormgen.Query, sql gormgen.IProviderDo, filter domainmodel.ProviderFilter) gormgen.IProviderDo {
+	if filter.IDs != nil && len(*filter.IDs) > 0 {
+		sql = sql.Where(query.Provider.ID.In((*filter.IDs)...))
+	}
+	if filter.PublicID != nil {
+		sql = sql.Where(query.Provider.PublicID.Eq(*filter.PublicID))
+	}
+	if filter.Kind != nil {
+		sql = sql.Where(query.Provider.Kind.Eq(string(*filter.Kind)))
+	}
+	if filter.Active != nil {
+		sql = sql.Where(query.Provider.Active.Is(*filter.Active))
+	}
+	if filter.IsModerated != nil {
+		sql = sql.Where(query.Provider.IsModerated.Is(*filter.IsModerated))
+	}
+	if filter.LastSyncedAfter != nil {
+		sql = sql.Where(query.Provider.LastSyncedAt.Gte(*filter.LastSyncedAfter))
+	}
+	if filter.LastSyncedBefore != nil {
+		sql = sql.Where(query.Provider.LastSyncedAt.Lte(*filter.LastSyncedBefore))
+	}
+	return sql
+}
+
+func (repo *ProviderGormRepository) Create(ctx context.Context, provider *domainmodel.Provider) error {
+	model := dbschema.NewSchemaProvider(provider)
+	query := repo.db.GetQuery(ctx)
+	if err := query.Provider.WithContext(ctx).Create(model); err != nil {
+		return err
+	}
+	provider.ID = model.ID
+	provider.CreatedAt = model.CreatedAt
+	provider.UpdatedAt = model.UpdatedAt
+	return nil
+}
+
+func (repo *ProviderGormRepository) Update(ctx context.Context, provider *domainmodel.Provider) error {
+	model := dbschema.NewSchemaProvider(provider)
+	query := repo.db.GetQuery(ctx)
+	_, err := query.Provider.WithContext(ctx).Where(query.Provider.ID.Eq(model.ID)).Updates(model)
+	return err
+}
+
+func (repo *ProviderGormRepository) DeleteByID(ctx context.Context, id uint) error {
+	query := repo.db.GetQuery(ctx)
+	_, err := query.Provider.WithContext(ctx).
+		Where(query.Provider.ID.Eq(id)).
+		Delete(&dbschema.Provider{})
+	return err
+}
+
+func (repo *ProviderGormRepository) FindByID(ctx context.Context, id uint) (*domainmodel.Provider, error) {
+	ids := []uint{id}
+	filter := domainmodel.ProviderFilter{
+		IDs: &ids,
+	}
+	results, err := repo.FindByFilter(ctx, filter, nil)
+	if err != nil {
+		return nil, err
+	}
+	if len(results) == 0 {
+		return nil, nil
+	}
+	return results[0], nil
+}
+
+func (repo *ProviderGormRepository) FindByPublicID(ctx context.Context, publicID string) (*domainmodel.Provider, error) {
+	filter := domainmodel.ProviderFilter{
+		PublicID: &publicID,
+	}
+	results, err := repo.FindByFilter(ctx, filter, nil)
+	if err != nil {
+		return nil, err
+	}
+	if len(results) == 0 {
+		return nil, nil
+	}
+	return results[0], nil
+}
+
+func (repo *ProviderGormRepository) FindByFilter(ctx context.Context, filter domainmodel.ProviderFilter, p *query.Pagination) ([]*domainmodel.Provider, error) {
+	query := repo.db.GetQuery(ctx)
+	sql := query.Provider.WithContext(ctx)
+	sql = repo.applyFilter(query, sql, filter)
+	if p != nil {
+		if p.Limit != nil && *p.Limit > 0 {
+			sql = sql.Limit(*p.Limit)
+		}
+		if p.Offset != nil && *p.Offset >= 0 {
+			sql = sql.Offset(*p.Offset)
+		}
+		if p.Order == "desc" {
+			sql = sql.Order(query.Provider.CreatedAt.Desc())
+		} else {
+			sql = sql.Order(query.Provider.CreatedAt.Asc())
+		}
+	}
+	rows, err := sql.Find()
+	if err != nil {
+		return nil, err
+	}
+	providers := functional.Map(rows, func(item *dbschema.Provider) *domainmodel.Provider {
+		return item.EtoD()
+	})
+	return providers, nil
+}
+
+func (repo *ProviderGormRepository) Count(ctx context.Context, filter domainmodel.ProviderFilter) (int64, error) {
+	query := repo.db.GetQuery(ctx)
+	sql := query.Provider.WithContext(ctx)
+	sql = repo.applyFilter(query, sql, filter)
+	return sql.Count()
+}
+
+func (repo *ProviderGormRepository) FindByIDs(ctx context.Context, ids []uint) ([]*domainmodel.Provider, error) {
+	if len(ids) == 0 {
+		return []*domainmodel.Provider{}, nil
+	}
+
+	filter := domainmodel.ProviderFilter{
+		IDs: &ids,
+	}
+	return repo.FindByFilter(ctx, filter, nil)
+}
diff --git a/services/llm-api/internal/infrastructure/database/repository/projectrepo/project_repository.go b/services/llm-api/internal/infrastructure/database/repository/projectrepo/project_repository.go
new file mode 100644
index 00000000..d324d51c
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/projectrepo/project_repository.go
@@ -0,0 +1,159 @@
+package projectrepo
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"gorm.io/gorm"
+
+	"jan-server/services/llm-api/internal/domain/project"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type ProjectGormRepository struct {
+	db *gorm.DB
+}
+
+var _ project.ProjectRepository = (*ProjectGormRepository)(nil)
+
+func NewProjectGormRepository(db *gorm.DB) project.ProjectRepository {
+	return &ProjectGormRepository{db: db}
+}
+
+// Create implements project.ProjectRepository.
+func (repo *ProjectGormRepository) Create(ctx context.Context, proj *project.Project) error {
+	dbProject := dbschema.NewSchemaProject(proj)
+	if err := repo.db.WithContext(ctx).Create(dbProject).Error; err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to create project")
+	}
+	proj.ID = dbProject.ID
+	proj.CreatedAt = dbProject.CreatedAt
+	proj.UpdatedAt = dbProject.UpdatedAt
+	return nil
+}
+
+// GetByPublicID implements project.ProjectRepository.
+func (repo *ProjectGormRepository) GetByPublicID(ctx context.Context, publicID string) (*project.Project, error) {
+	var dbProject dbschema.Project
+	err := repo.db.WithContext(ctx).
+		Where("public_id = ? AND deleted_at IS NULL", publicID).
+		First(&dbProject).Error
+
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find project by public ID")
+	}
+	return dbProject.EtoD(), nil
+}
+
+// GetByPublicIDAndUserID implements project.ProjectRepository.
+func (repo *ProjectGormRepository) GetByPublicIDAndUserID(ctx context.Context, publicID string, userID uint) (*project.Project, error) {
+	var dbProject dbschema.Project
+	err := repo.db.WithContext(ctx).
+		Where("public_id = ? AND user_id = ? AND deleted_at IS NULL", publicID, userID).
+		First(&dbProject).Error
+
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to find project by public ID and user ID")
+	}
+	return dbProject.EtoD(), nil
+}
+
+// ListByUserID implements project.ProjectRepository.
+func (repo *ProjectGormRepository) ListByUserID(ctx context.Context, userID uint, pagination *query.Pagination) ([]*project.Project, int64, error) {
+	// Build base query
+	baseQuery := repo.db.WithContext(ctx).
+		Model(&dbschema.Project{}).
+		Where("user_id = ? AND deleted_at IS NULL", userID)
+
+	// Count total
+	var total int64
+	if err := baseQuery.Count(&total).Error; err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to count projects")
+	}
+
+	// Apply pagination
+	query := baseQuery
+	if pagination != nil {
+		if pagination.After != nil {
+			if pagination.Order == "desc" {
+				query = query.Where("id < ?", *pagination.After)
+			} else {
+				query = query.Where("id > ?", *pagination.After)
+			}
+		}
+
+		if pagination.Order == "desc" {
+			query = query.Order("updated_at DESC")
+		} else {
+			query = query.Order("updated_at ASC")
+		}
+
+		if pagination.Limit != nil && *pagination.Limit > 0 {
+			query = query.Limit(*pagination.Limit)
+		}
+	} else {
+		// Default ordering
+		query = query.Order("updated_at DESC")
+	}
+
+	// Execute query
+	var rows []dbschema.Project
+	if err := query.Find(&rows).Error; err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to list projects")
+	}
+
+	// Convert to domain
+	result := make([]*project.Project, len(rows))
+	for i, row := range rows {
+		result[i] = row.EtoD()
+	}
+
+	return result, total, nil
+}
+
+// Update implements project.ProjectRepository.
+func (repo *ProjectGormRepository) Update(ctx context.Context, proj *project.Project) error {
+	dbProject := dbschema.ProjectDtoE(proj)
+	dbProject.UpdatedAt = time.Now()
+
+	// Update only specified fields
+	err := repo.db.WithContext(ctx).Model(&dbschema.Project{}).
+		Where("public_id = ?", proj.PublicID).
+		Updates(map[string]interface{}{
+			"name":         dbProject.Name,
+			"instruction":  dbProject.Instruction,
+			"favorite":     dbProject.Favorite,
+			"archived_at":  dbProject.ArchivedAt,
+			"last_used_at": dbProject.LastUsedAt,
+			"updated_at":   dbProject.UpdatedAt,
+		}).Error
+
+	if err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, err, "failed to update project")
+	}
+
+	proj.UpdatedAt = dbProject.UpdatedAt
+	return nil
+}
+
+// Delete implements project.ProjectRepository.
+func (repo *ProjectGormRepository) Delete(ctx context.Context, publicID string) error {
+	now := time.Now()
+
+	result := repo.db.WithContext(ctx).Model(&dbschema.Project{}).
+		Where("public_id = ? AND deleted_at IS NULL", publicID).
+		Update("deleted_at", now)
+
+	if result.Error != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerRepository, result.Error, "failed to delete project")
+	}
+
+	if result.RowsAffected == 0 {
+		return platformerrors.NewError(ctx, platformerrors.LayerRepository, platformerrors.ErrorTypeNotFound, fmt.Sprintf("project %s not found", publicID), nil, "")
+	}
+
+	return nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/repository/repository_provider.go b/services/llm-api/internal/infrastructure/database/repository/repository_provider.go
new file mode 100644
index 00000000..8aa072c2
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/repository_provider.go
@@ -0,0 +1,23 @@
+package repository
+
+import (
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/apikeyrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/conversationrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/modelrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/projectrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/userrepo"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository/usersettingsrepo"
+
+	"github.com/google/wire"
+)
+
+var RepositoryProvider = wire.NewSet(
+	conversationrepo.NewConversationGormRepository,
+	projectrepo.NewProjectGormRepository,
+	modelrepo.NewProviderGormRepository,
+	modelrepo.NewProviderModelGormRepository,
+	modelrepo.NewModelCatalogGormRepository,
+	userrepo.NewUserGormRepository,
+	apikeyrepo.NewAPIKeyRepository,
+	usersettingsrepo.NewUserSettingsGormRepository,
+)
diff --git a/services/llm-api/internal/infrastructure/database/repository/userrepo/user_repository.go b/services/llm-api/internal/infrastructure/database/repository/userrepo/user_repository.go
new file mode 100644
index 00000000..431e6873
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/userrepo/user_repository.go
@@ -0,0 +1,120 @@
+package userrepo
+
+import (
+	"context"
+	"time"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+
+	"jan-server/services/llm-api/internal/domain/user"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type UserGormRepository struct {
+	db *gorm.DB
+}
+
+var _ user.Repository = (*UserGormRepository)(nil)
+
+func NewUserGormRepository(db *gorm.DB) user.Repository {
+	return &UserGormRepository{db: db}
+}
+
+func (repo *UserGormRepository) FindByIssuerAndSubject(ctx context.Context, issuer, subject string) (*user.User, error) {
+	var entity dbschema.User
+	err := repo.db.WithContext(ctx).
+		Where("issuer = ? AND subject = ?", issuer, subject).
+		First(&entity).
+		Error
+	if err == gorm.ErrRecordNotFound {
+		return nil, nil
+	}
+	if err != nil {
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to find user by issuer and subject",
+			err,
+			"b2a7c2d5-53b2-44a3-8f8f-927f94e9a4db",
+		)
+	}
+	return entity.EtoD(), nil
+}
+
+func (repo *UserGormRepository) FindByID(ctx context.Context, id uint) (*user.User, error) {
+	var entity dbschema.User
+	err := repo.db.WithContext(ctx).
+		Where("id = ?", id).
+		First(&entity).
+		Error
+	if err == gorm.ErrRecordNotFound {
+		return nil, nil
+	}
+	if err != nil {
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to find user by ID",
+			err,
+			"a9d3f8e4-21c7-4f5b-9a2e-6d8f9e1a2b3c",
+		)
+	}
+	return entity.EtoD(), nil
+}
+
+func (repo *UserGormRepository) Upsert(ctx context.Context, usr *user.User) (*user.User, error) {
+	// Prepare schema model from domain user
+	schemaUser := dbschema.NewSchemaUser(usr)
+
+	assignments := map[string]any{
+		"auth_provider": schemaUser.AuthProvider,
+		"username":      schemaUser.Username,
+		"email":         schemaUser.Email,
+		"name":          schemaUser.Name,
+		"picture":       schemaUser.Picture,
+		"updated_at":    gorm.Expr("NOW()"),
+	}
+
+	if err := repo.db.WithContext(ctx).
+		Clauses(clause.OnConflict{
+			Columns:   []clause.Column{{Name: "issuer"}, {Name: "subject"}},
+			DoUpdates: clause.Assignments(assignments),
+		}).
+		Create(schemaUser).Error; err != nil {
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to upsert user",
+			err,
+			"3b31d2bd-3260-4233-b0c8-09909fa0f154",
+		)
+	}
+
+	// Retrieve the persisted user to capture ID and timestamps
+	// Use a background context with timeout for the reload to avoid context cancellation
+	// if the client disconnects after the upsert succeeds
+	reloadCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+
+	var persisted dbschema.User
+	if err := repo.db.WithContext(reloadCtx).
+		Where("issuer = ? AND subject = ?", schemaUser.Issuer, schemaUser.Subject).
+		First(&persisted).Error; err != nil {
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to reload upserted user",
+			err,
+			"f71f98cb-3154-4ad2-9076-7e58628a4098",
+		)
+	}
+
+	domainUser := persisted.EtoD()
+	return domainUser, nil
+}
diff --git a/services/llm-api/internal/infrastructure/database/repository/usersettingsrepo/user_settings_repository.go b/services/llm-api/internal/infrastructure/database/repository/usersettingsrepo/user_settings_repository.go
new file mode 100644
index 00000000..1b7ab6bf
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/database/repository/usersettingsrepo/user_settings_repository.go
@@ -0,0 +1,133 @@
+package usersettingsrepo
+
+import (
+	"context"
+
+	"gorm.io/gorm"
+	"gorm.io/gorm/clause"
+
+	"jan-server/services/llm-api/internal/domain/usersettings"
+	"jan-server/services/llm-api/internal/infrastructure/database/dbschema"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+// UserSettingsGormRepository implements usersettings.Repository using GORM.
+type UserSettingsGormRepository struct {
+	db *gorm.DB
+}
+
+var _ usersettings.Repository = (*UserSettingsGormRepository)(nil)
+
+// NewUserSettingsGormRepository constructs a new repository.
+func NewUserSettingsGormRepository(db *gorm.DB) usersettings.Repository {
+	return &UserSettingsGormRepository{db: db}
+}
+
+// FindByUserID retrieves user settings by user ID.
+func (repo *UserSettingsGormRepository) FindByUserID(ctx context.Context, userID uint) (*usersettings.UserSettings, error) {
+	var entity dbschema.UserSettings
+	err := repo.db.WithContext(ctx).
+		Where("user_id = ?", userID).
+		First(&entity).
+		Error
+
+	if err == gorm.ErrRecordNotFound {
+		return nil, nil
+	}
+	if err != nil {
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to find user settings by user ID",
+			err,
+			"us-01",
+		)
+	}
+
+	return entity.EtoD(), nil
+}
+
+// Upsert inserts or updates user settings.
+func (repo *UserSettingsGormRepository) Upsert(ctx context.Context, settings *usersettings.UserSettings) (*usersettings.UserSettings, error) {
+	entity := dbschema.NewSchemaUserSettings(settings)
+
+	assignments := map[string]interface{}{
+		"enable_trace":      entity.EnableTrace,
+		"enable_tools":      entity.EnableTools,
+		"memory_config":     entity.MemoryConfig,
+		"profile_settings":  entity.ProfileSettings,
+		"advanced_settings": entity.AdvancedSettings,
+		"preferences":       entity.Preferences,
+		"updated_at":        gorm.Expr("NOW()"),
+	}
+
+	err := repo.db.WithContext(ctx).
+		Clauses(clause.OnConflict{
+			Columns:   []clause.Column{{Name: "user_id"}},
+			DoUpdates: clause.Assignments(assignments),
+		}).
+		Create(entity).
+		Error
+
+	if err != nil {
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to upsert user settings",
+			err,
+			"us-02",
+		)
+	}
+
+	// Reload to get generated ID and timestamps
+	var persisted dbschema.UserSettings
+	if err := repo.db.WithContext(ctx).
+		Where("user_id = ?", settings.UserID).
+		First(&persisted).
+		Error; err != nil {
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to reload upserted user settings",
+			err,
+			"us-03",
+		)
+	}
+
+	return persisted.EtoD(), nil
+}
+
+// Update updates existing user settings.
+func (repo *UserSettingsGormRepository) Update(ctx context.Context, settings *usersettings.UserSettings) error {
+	entity := dbschema.NewSchemaUserSettings(settings)
+
+	err := repo.db.WithContext(ctx).
+		Model(&dbschema.UserSettings{}).
+		Where("user_id = ?", settings.UserID).
+		Updates(map[string]interface{}{
+			"enable_trace":      entity.EnableTrace,
+			"enable_tools":      entity.EnableTools,
+			"memory_config":     entity.MemoryConfig,
+			"profile_settings":  entity.ProfileSettings,
+			"advanced_settings": entity.AdvancedSettings,
+			"preferences":       entity.Preferences,
+			"updated_at":        gorm.Expr("NOW()"),
+		}).
+		Error
+
+	if err != nil {
+		return platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to update user settings",
+			err,
+			"us-04",
+		)
+	}
+
+	return nil
+}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/database/repository/transaction/database.go b/services/llm-api/internal/infrastructure/database/transaction/database.go
similarity index 62%
rename from apps/jan-api-gateway/application/app/infrastructure/database/repository/transaction/database.go
rename to services/llm-api/internal/infrastructure/database/transaction/database.go
index ac2790db..a35ee577 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/database/repository/transaction/database.go
+++ b/services/llm-api/internal/infrastructure/database/transaction/database.go
@@ -4,12 +4,13 @@ import (
 	"context"
 
 	"gorm.io/gorm"
-	"menlo.ai/jan-api-gateway/app/infrastructure/database/gormgen"
-	"menlo.ai/jan-api-gateway/app/utils/contextkeys"
+	"jan-server/services/llm-api/internal/infrastructure/database/gormgen"
 )
 
+type TransactionContextKey struct{}
+
 func WithTx(ctx context.Context, tx *gorm.DB) context.Context {
-	return context.WithValue(ctx, contextkeys.TransactionContextKey{}, tx)
+	return context.WithValue(ctx, TransactionContextKey{}, tx)
 }
 
 type Database struct {
@@ -17,7 +18,7 @@ type Database struct {
 }
 
 func (t *Database) GetTx(ctx context.Context) *gorm.DB {
-	if tx, ok := ctx.Value(contextkeys.TransactionContextKey{}).(*gorm.DB); ok {
+	if tx, ok := ctx.Value(TransactionContextKey{}).(*gorm.DB); ok {
 		return tx
 	}
 	return t.db
diff --git a/services/llm-api/internal/infrastructure/inference/inference_provider.go b/services/llm-api/internal/infrastructure/inference/inference_provider.go
new file mode 100644
index 00000000..5180281a
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/inference/inference_provider.go
@@ -0,0 +1,103 @@
+package inference
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"jan-server/services/llm-api/internal/config"
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/utils/crypto"
+	httpclients "jan-server/services/llm-api/internal/utils/httpclients"
+	chatclient "jan-server/services/llm-api/internal/utils/httpclients/chat"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+
+	"resty.dev/v3"
+)
+
+type InferenceProvider struct{}
+
+func NewInferenceProvider() *InferenceProvider {
+	return &InferenceProvider{}
+}
+
+func (ip *InferenceProvider) GetChatCompletionClient(ctx context.Context, provider *domainmodel.Provider) (*chatclient.ChatCompletionClient, error) {
+	client, err := ip.createRestyClient(ctx, provider)
+	if err != nil {
+		return nil, err
+	}
+
+	clientName := provider.DisplayName
+	return chatclient.NewChatCompletionClient(client, clientName, provider.BaseURL), nil
+}
+
+func (ip *InferenceProvider) GetChatModelClient(ctx context.Context, provider *domainmodel.Provider) (*chatclient.ChatModelClient, error) {
+	client, err := ip.createRestyClient(ctx, provider)
+	if err != nil {
+		return nil, err
+	}
+
+	clientName := provider.DisplayName
+	return chatclient.NewChatModelClient(client, clientName, provider.BaseURL), nil
+}
+
+func (ip *InferenceProvider) ListModels(ctx context.Context, provider *domainmodel.Provider) ([]chatclient.Model, error) {
+	modelClient, err := ip.GetChatModelClient(ctx, provider)
+	if err != nil {
+		return nil, err
+	}
+
+	resp, err := modelClient.ListModels(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	return resp.Data, nil
+}
+
+func (ip *InferenceProvider) createRestyClient(ctx context.Context, provider *domainmodel.Provider) (*resty.Client, error) {
+	clientName := fmt.Sprintf("%sClient", provider.PublicID)
+	client := httpclients.NewClient(clientName)
+	client.SetBaseURL(provider.BaseURL)
+
+	// Set authorization header if API key exists
+	if provider.EncryptedAPIKey != "" {
+		apiKey, err := ip.decryptAPIKey(ctx, provider.EncryptedAPIKey)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to decrypt API key")
+		}
+		if strings.TrimSpace(apiKey) != "" && strings.ToLower(apiKey) != "none" {
+			switch provider.Kind {
+			case domainmodel.ProviderAzureOpenAI:
+				client.SetHeader("api-key", apiKey)
+			case domainmodel.ProviderAnthropic:
+				client.SetHeader("X-API-Key", apiKey)
+				client.SetHeader("Anthropic-Version", "2023-06-01")
+			case domainmodel.ProviderCohere:
+				client.SetHeader("Authorization", fmt.Sprintf("Bearer %s", apiKey))
+			default:
+				client.SetHeader("Authorization", fmt.Sprintf("Bearer %s", apiKey))
+			}
+		}
+	}
+
+	return client, nil
+}
+
+func (ip *InferenceProvider) decryptAPIKey(ctx context.Context, encryptedAPIKey string) (string, error) {
+	if encryptedAPIKey == "" {
+		return "", nil
+	}
+
+	secret := strings.TrimSpace(config.GetGlobal().ModelProviderSecret)
+	if secret == "" {
+		return "", platformerrors.NewError(ctx, platformerrors.LayerInfrastructure, platformerrors.ErrorTypeInternal, "MODEL_PROVIDER_SECRET not configured", nil, "8f07ea41-1096-405b-ae2e-cde06564e5bc")
+	}
+
+	plainText, err := crypto.DecryptString(secret, encryptedAPIKey)
+	if err != nil {
+		return "", err
+	}
+
+	return plainText, nil
+}
diff --git a/services/llm-api/internal/infrastructure/infrastructure_provider.go b/services/llm-api/internal/infrastructure/infrastructure_provider.go
new file mode 100644
index 00000000..86189343
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/infrastructure_provider.go
@@ -0,0 +1,172 @@
+package infrastructure
+
+import (
+	"context"
+	"net/http"
+	"time"
+
+	"github.com/google/wire"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/infrastructure/auth"
+	"jan-server/services/llm-api/internal/infrastructure/crontab"
+	"jan-server/services/llm-api/internal/infrastructure/database"
+	"jan-server/services/llm-api/internal/infrastructure/database/repository"
+	"jan-server/services/llm-api/internal/infrastructure/database/transaction"
+	"jan-server/services/llm-api/internal/infrastructure/inference"
+	"jan-server/services/llm-api/internal/infrastructure/keycloak"
+	"jan-server/services/llm-api/internal/infrastructure/kong"
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+	"jan-server/services/llm-api/internal/infrastructure/mediaresolver"
+	memclient "jan-server/services/llm-api/internal/infrastructure/memory"
+)
+
+// ProvideConfig loads and provides the application configuration
+func ProvideConfig() (*config.Config, error) {
+	return config.Load()
+}
+
+// ProvideKeycloakClient provides a keycloak client
+func ProvideKeycloakClient(cfg *config.Config, log zerolog.Logger) *keycloak.Client {
+	return keycloak.NewClient(
+		cfg.KeycloakBaseURL,
+		cfg.KeycloakRealm,
+		cfg.BackendClientID,
+		cfg.BackendClientSecret,
+		cfg.Client,
+		cfg.GuestRole,
+		&http.Client{},
+		log,
+		cfg.KeycloakAdminUser,
+		cfg.KeycloakAdminPass,
+		cfg.KeycloakAdminRealm,
+		cfg.KeycloakAdminClient,
+		cfg.KeycloakAdminSecret,
+	)
+}
+
+// ProvideKongClient returns a Kong Admin API client.
+func ProvideKongClient(cfg *config.Config, log zerolog.Logger) *kong.Client {
+	httpClient := &http.Client{Timeout: 10 * time.Second}
+	return kong.NewClient(cfg.KongAdminURL, httpClient, log)
+}
+
+// ProvideKeycloakValidator provides a JWT validator
+func ProvideKeycloakValidator(cfg *config.Config, log zerolog.Logger) (*auth.KeycloakValidator, error) {
+	jwksURL := cfg.JWKSURL
+	return auth.NewKeycloakValidator(
+		context.Background(),
+		jwksURL,
+		cfg.Issuer,
+		cfg.Account,
+		cfg.Client,
+		cfg.RefreshJWKSInterval,
+		cfg.AuthClockSkew,
+		log,
+	)
+}
+
+// ProvideMemoryClient creates a memory-tools client with health check.
+func ProvideMemoryClient(cfg *config.Config, log zerolog.Logger) *memclient.Client {
+	if !cfg.MemoryEnabled {
+		return nil
+	}
+	client := memclient.NewClient(cfg.MemoryBaseURL, cfg.MemoryTimeout)
+	ctx, cancel := context.WithTimeout(context.Background(), cfg.MemoryTimeout)
+	defer cancel()
+	if err := client.Health(ctx); err != nil {
+		log.Warn().Err(err).Msg("memory-tools health check failed, disabling memory integration")
+		return nil
+	}
+	return client
+}
+
+// ProvideDatabase provides a database connection
+func ProvideDatabase(cfg *config.Config, log zerolog.Logger) (*gorm.DB, error) {
+	db, err := database.NewDB(cfg.GetDatabaseWriteDSN())
+	if err != nil {
+		return nil, err
+	}
+
+	// Run migrations if AUTO_MIGRATE is enabled
+	if cfg.AutoMigrate {
+		log.Info().Msg("Running database migrations...")
+		if err := database.AutoMigrate(db); err != nil {
+			log.Error().Err(err).Msg("Failed to run database migrations")
+			return nil, err
+		}
+		log.Info().Msg("Database migrations completed successfully")
+	}
+
+	return db, nil
+}
+
+// ProvideTransactionDatabase provides a transaction database wrapper
+func ProvideTransactionDatabase(db *gorm.DB) *transaction.Database {
+	return transaction.NewDatabase(db)
+}
+
+// ProvideMediaResolver wires the HTTP-based media placeholder resolver.
+func ProvideMediaResolver(cfg *config.Config, log zerolog.Logger, kc *keycloak.Client) mediaresolver.Resolver {
+	return mediaresolver.NewResolver(cfg, log, kc)
+}
+
+// Infrastructure holds all infrastructure dependencies
+type Infrastructure struct {
+	DB                *gorm.DB
+	KeycloakValidator *auth.KeycloakValidator
+	Logger            zerolog.Logger
+}
+
+// NewInfrastructure creates a new infrastructure instance
+func NewInfrastructure(
+	db *gorm.DB,
+	keycloakValidator *auth.KeycloakValidator,
+	logger zerolog.Logger,
+) *Infrastructure {
+	return &Infrastructure{
+		DB:                db,
+		KeycloakValidator: keycloakValidator,
+		Logger:            logger,
+	}
+}
+
+// InfrastructureProvider provides all infrastructure dependencies
+var InfrastructureProvider = wire.NewSet(
+	// Config
+	ProvideConfig,
+
+	// Database
+	ProvideDatabase,
+	ProvideTransactionDatabase,
+
+	// Repositories
+	repository.RepositoryProvider,
+
+	// Provider registry
+	inference.NewInferenceProvider,
+
+	// Media resolver
+	ProvideMediaResolver,
+
+	// Logger
+	logger.GetLogger,
+
+	// Kong client removed - API keys now managed via Keycloak
+	// ProvideKongClient,
+
+	// Keycloak
+	ProvideKeycloakClient,
+	ProvideKeycloakValidator,
+
+	// Memory
+	ProvideMemoryClient,
+
+	// Crontab for model sync
+	crontab.NewCrontab,
+
+	// Infrastructure struct
+	NewInfrastructure,
+)
diff --git a/services/llm-api/internal/infrastructure/keycloak/client.go b/services/llm-api/internal/infrastructure/keycloak/client.go
new file mode 100644
index 00000000..d655aa65
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/keycloak/client.go
@@ -0,0 +1,1019 @@
+package keycloak
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/rs/zerolog"
+)
+
+// Client wraps interactions with the Keycloak Admin and Token APIs.
+type Client struct {
+	baseURL             string
+	realm               string
+	backendClientID     string
+	backendClientSecret string
+	clientID            string
+	guestRole           string
+	httpClient          *http.Client
+	logger              zerolog.Logger
+	adminUsername       string
+	adminPassword       string
+	adminRealm          string
+	adminClientID       string
+	adminClientSecret   string
+}
+
+// NewClient constructs a Keycloak client.
+func NewClient(baseURL, realm, backendClientID, backendClientSecret, clientID, guestRole string, httpClient *http.Client, logger zerolog.Logger, adminUsername, adminPassword, adminRealm, adminClientID, adminClientSecret string) *Client {
+	if httpClient == nil {
+		httpClient = &http.Client{Timeout: 15 * time.Second}
+	}
+	return &Client{
+		baseURL:             strings.TrimRight(baseURL, "/"),
+		realm:               realm,
+		backendClientID:     backendClientID,
+		backendClientSecret: backendClientSecret,
+		clientID:            clientID,
+		guestRole:           guestRole,
+		httpClient:          httpClient,
+		logger:              logger,
+		adminUsername:       adminUsername,
+		adminPassword:       adminPassword,
+		adminRealm:          adminRealm,
+		adminClientID:       adminClientID,
+		adminClientSecret:   adminClientSecret,
+	}
+}
+
+// TokenSet bundles token information returned by Keycloak.
+type TokenSet struct {
+	AccessToken     string `json:"access_token"`
+	RefreshToken    string `json:"refresh_token,omitempty"`
+	TokenType       string `json:"token_type"`
+	ExpiresIn       int    `json:"expires_in"`
+	IssuedTokenType string `json:"issued_token_type,omitempty"`
+	Scope           string `json:"scope,omitempty"`
+}
+
+// GuestCredentials represents the result of creating a guest user.
+type GuestCredentials struct {
+	UserID      string   `json:"user_id"`
+	Username    string   `json:"username"`
+	Email       string   `json:"email,omitempty"`
+	PrincipalID string   `json:"pid"`
+	Tokens      TokenSet `json:"tokens"`
+}
+
+// UpgradePayload describes the upgrade request body.
+type UpgradePayload struct {
+	Username string `json:"username"`
+	Email    string `json:"email" binding:"required,email"` // Required to overwrite temporary email
+	FullName string `json:"full_name"`
+}
+
+// CreateGuest provisions a new guest user and returns impersonated tokens.
+func (c *Client) CreateGuest(ctx context.Context) (*GuestCredentials, error) {
+	if c.adminUsername != "" && c.adminPassword != "" {
+		adminToken, err := c.adminUserToken(ctx)
+		if err != nil {
+			c.logger.Warn().
+				Err(err).
+				Msg("admin credentials present but password grant failed, falling back to service account")
+		} else {
+			return c.createGuestWithPasswordGrant(ctx, adminToken.AccessToken)
+		}
+	}
+
+	serviceToken, err := c.serviceAccountToken(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	adminToken := c.adminAccessToken(ctx, serviceToken.AccessToken)
+	return c.createGuestWithPasswordGrant(ctx, adminToken)
+}
+
+func (c *Client) createGuestWithPasswordGrant(ctx context.Context, adminToken string) (*GuestCredentials, error) {
+	user, err := c.createGuestUser(ctx, adminToken)
+	if err != nil {
+		return nil, err
+	}
+
+	if err := c.assignGuestRole(ctx, adminToken, user.UserID); err != nil {
+		return nil, err
+	}
+
+	password := strings.ReplaceAll(uuid.NewString(), "-", "")
+	if err := c.setUserPassword(ctx, adminToken, user.UserID, password); err != nil {
+		return nil, err
+	}
+
+	tokens, err := c.passwordGrantTokens(ctx, user.Email, password)
+	if err != nil {
+		return nil, err
+	}
+
+	user.Tokens = *tokens
+	return user, nil
+}
+
+// UpgradeUser toggles the guest attribute off and updates profile fields.
+func (c *Client) UpgradeUser(ctx context.Context, userID string, payload UpgradePayload) error {
+	serviceToken, err := c.serviceAccountToken(ctx)
+	if err != nil {
+		return err
+	}
+
+	adminToken := c.adminAccessToken(ctx, serviceToken.AccessToken)
+
+	existing, err := c.getUser(ctx, adminToken, userID)
+	if err != nil {
+		return err
+	}
+
+	attributes := map[string][]string{}
+	if raw, ok := existing["attributes"].(map[string]any); ok {
+		for key, value := range raw {
+			switch v := value.(type) {
+			case []any:
+				var out []string
+				for _, item := range v {
+					if s, ok := item.(string); ok {
+						out = append(out, s)
+					}
+				}
+				if len(out) > 0 {
+					attributes[key] = out
+				}
+			}
+		}
+	}
+	attributes["guest"] = []string{"false"}
+
+	// Note: username is read-only by default in Keycloak after user creation
+	// Only update email, firstName, and attributes to avoid "error-user-attribute-read-only"
+	// When upgrading, we overwrite the temporary email (e.g., guest-xxx@temp.jan.ai) with the real email
+	update := map[string]any{
+		"attributes":    attributes,
+		"email":         payload.Email,
+		"emailVerified": true, // Mark email as verified when upgrading from guest
+		"firstName":     payload.FullName,
+		"enabled":       true,
+	}
+
+	body, err := json.Marshal(update)
+	if err != nil {
+		return err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPut, c.adminEndpoint("/users/"+url.PathEscape(userID)), bytes.NewReader(body))
+	if err != nil {
+		return err
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return fmt.Errorf("update user failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	return nil
+}
+
+func (c *Client) serviceAccountToken(ctx context.Context) (*TokenSet, error) {
+	values := url.Values{}
+	values.Set("grant_type", "client_credentials")
+	values.Set("client_id", c.backendClientID)
+	if c.backendClientSecret != "" {
+		values.Set("client_secret", c.backendClientSecret)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.tokenEndpoint(), strings.NewReader(values.Encode()))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return nil, fmt.Errorf("service account token request failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	var token TokenSet
+	if err := json.NewDecoder(resp.Body).Decode(&token); err != nil {
+		return nil, err
+	}
+	return &token, nil
+}
+
+func (c *Client) adminUserToken(ctx context.Context) (*TokenSet, error) {
+	if c.adminUsername == "" || c.adminPassword == "" {
+		return nil, errors.New("admin credentials required")
+	}
+
+	realm := c.adminRealm
+	if realm == "" {
+		realm = "master"
+	}
+
+	clientID := c.adminClientID
+	if clientID == "" {
+		clientID = "admin-cli"
+	}
+
+	values := url.Values{}
+	values.Set("grant_type", "password")
+	values.Set("client_id", clientID)
+	if c.adminClientSecret != "" {
+		values.Set("client_secret", c.adminClientSecret)
+	}
+	values.Set("username", c.adminUsername)
+	values.Set("password", c.adminPassword)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.realmTokenEndpoint(realm), strings.NewReader(values.Encode()))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return nil, fmt.Errorf("admin token request failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	var token TokenSet
+	if err := json.NewDecoder(resp.Body).Decode(&token); err != nil {
+		return nil, err
+	}
+	return &token, nil
+}
+
+// TokenForUser exchanges admin privileges for a user-scoped token.
+func (c *Client) TokenForUser(ctx context.Context, userID string) (*TokenSet, error) {
+	if strings.TrimSpace(userID) == "" {
+		return nil, errors.New("user id required")
+	}
+	adminToken, err := c.adminUserToken(ctx)
+	if err != nil {
+		return nil, err
+	}
+	return c.exchangeForUser(ctx, adminToken.AccessToken, userID)
+}
+
+func (c *Client) adminAccessToken(ctx context.Context, serviceToken string) string {
+	if c.adminUsername == "" || c.adminPassword == "" {
+		return serviceToken
+	}
+
+	adminToken, err := c.adminUserToken(ctx)
+	if err != nil {
+		c.logger.Warn().Err(err).Msg("fallback to service account token for admin operations")
+		return serviceToken
+	}
+	return adminToken.AccessToken
+}
+
+func (c *Client) createGuestUser(ctx context.Context, adminToken string) (*GuestCredentials, error) {
+	username := "guest-" + uuid.NewString()
+	// Generate temporary email for guest user (required by Keycloak when duplicateEmailsAllowed is false)
+	// Format: guest-{uuid}@temp.jan.ai to clearly identify as temporary
+	tempEmail := username + "@temp.jan.ai"
+
+	userPayload := map[string]any{
+		"username":   username,
+		"email":      tempEmail,
+		"enabled":    true,
+		"attributes": map[string][]string{"guest": {"true"}},
+	}
+
+	body, err := json.Marshal(userPayload)
+	if err != nil {
+		return nil, err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.adminEndpoint("/users"), bytes.NewReader(body))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return nil, fmt.Errorf("create user failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	location := resp.Header.Get("Location")
+	if location == "" {
+		return nil, errors.New("create user succeeded but location header missing")
+	}
+	userID := extractIDFromLocation(location)
+	if userID == "" {
+		return nil, errors.New("create user succeeded but failed to parse user id")
+	}
+
+	return &GuestCredentials{
+		UserID:      userID,
+		Username:    username,
+		Email:       tempEmail,
+		PrincipalID: userID,
+	}, nil
+}
+
+func (c *Client) assignGuestRole(ctx context.Context, adminToken, userID string) error {
+	role, err := c.getRealmRole(ctx, adminToken, c.guestRole)
+	if err != nil {
+		return err
+	}
+
+	body, err := json.Marshal([]map[string]any{role})
+	if err != nil {
+		return err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.adminEndpoint(fmt.Sprintf("/users/%s/role-mappings/realm", url.PathEscape(userID))), bytes.NewReader(body))
+	if err != nil {
+		return err
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return fmt.Errorf("assign role failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	return nil
+}
+
+func (c *Client) setUserPassword(ctx context.Context, adminToken, userID, password string) error {
+	payload := map[string]any{
+		"type":      "password",
+		"value":     password,
+		"temporary": false,
+	}
+
+	body, err := json.Marshal(payload)
+	if err != nil {
+		return err
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPut, c.adminEndpoint(fmt.Sprintf("/users/%s/reset-password", url.PathEscape(userID))), bytes.NewReader(body))
+	if err != nil {
+		return err
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return fmt.Errorf("set password failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	return nil
+}
+
+func (c *Client) passwordGrantTokens(ctx context.Context, email, password string) (*TokenSet, error) {
+	values := url.Values{}
+	values.Set("grant_type", "password")
+	values.Set("client_id", c.clientID)
+	if c.clientID == c.backendClientID && c.backendClientSecret != "" {
+		values.Set("client_secret", c.backendClientSecret)
+	}
+	values.Set("username", email)
+	values.Set("password", password)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.tokenEndpoint(), strings.NewReader(values.Encode()))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return nil, fmt.Errorf("password grant failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	var token TokenSet
+	if err := json.NewDecoder(resp.Body).Decode(&token); err != nil {
+		return nil, err
+	}
+	return &token, nil
+}
+
+func (c *Client) getRealmRole(ctx context.Context, adminToken, roleName string) (map[string]any, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.adminEndpoint(fmt.Sprintf("/roles/%s", url.PathEscape(roleName))), nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return nil, fmt.Errorf("fetch role failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	var role map[string]any
+	if err := json.NewDecoder(resp.Body).Decode(&role); err != nil {
+		return nil, err
+	}
+	return role, nil
+}
+
+func (c *Client) exchangeForUser(ctx context.Context, adminToken, userID string) (*TokenSet, error) {
+	values := url.Values{}
+	values.Set("grant_type", "urn:ietf:params:oauth:grant-type:token-exchange")
+	values.Set("client_id", c.backendClientID)
+	if c.backendClientSecret != "" {
+		values.Set("client_secret", c.backendClientSecret)
+	}
+	values.Set("subject_token", adminToken)
+	values.Set("requested_subject", userID)
+	values.Set("requested_token_type", "urn:ietf:params:oauth:token-type:access_token")
+	if c.clientID != "" {
+		values.Set("audience", c.clientID)
+	}
+	// Request tokens scoped for the frontend client so they pass audience/azp validation
+	values.Set("scope", "openid profile email")
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.tokenEndpoint(), strings.NewReader(values.Encode()))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
+		return nil, fmt.Errorf("token exchange failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	var tokens TokenSet
+	if err := json.NewDecoder(resp.Body).Decode(&tokens); err != nil {
+		return nil, err
+	}
+	return &tokens, nil
+}
+
+func (c *Client) getUser(ctx context.Context, adminToken, userID string) (map[string]any, error) {
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.adminEndpoint(fmt.Sprintf("/users/%s", userID)), nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return nil, fmt.Errorf("get user failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	var body map[string]any
+	if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
+		return nil, err
+	}
+	return body, nil
+}
+
+func (c *Client) adminEndpoint(p string) string {
+	return c.baseURL + "/admin/realms/" + url.PathEscape(c.realm) + p
+}
+
+func (c *Client) tokenEndpoint() string {
+	return c.baseURL + "/realms/" + url.PathEscape(c.realm) + "/protocol/openid-connect/token"
+}
+
+func (c *Client) realmTokenEndpoint(realm string) string {
+	return c.baseURL + "/realms/" + url.PathEscape(realm) + "/protocol/openid-connect/token"
+}
+
+func (c *Client) adminTokenEndpoint() string {
+	return c.baseURL + "/realms/master/protocol/openid-connect/token"
+}
+
+func (c *Client) logoutEndpoint() string {
+	return c.baseURL + "/realms/" + url.PathEscape(c.realm) + "/protocol/openid-connect/logout"
+}
+
+// RefreshToken exchanges a refresh token for new tokens
+func (c *Client) RefreshToken(ctx context.Context, refreshToken string) (*TokenSet, error) {
+	values := url.Values{}
+	values.Set("grant_type", "refresh_token")
+	values.Set("client_id", c.clientID)
+	values.Set("refresh_token", refreshToken)
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.tokenEndpoint(), strings.NewReader(values.Encode()))
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return nil, fmt.Errorf("refresh token request failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	var token TokenSet
+	if err := json.NewDecoder(resp.Body).Decode(&token); err != nil {
+		return nil, err
+	}
+	return &token, nil
+}
+
+// LogoutUser logs out a user from Keycloak by calling the logout endpoint
+// This will invalidate the user's session on the Keycloak server
+func (c *Client) LogoutUser(ctx context.Context, refreshToken string) error {
+	c.logger.Info().Msg("[KC-LOGOUT] Starting logout process")
+
+	if refreshToken == "" {
+		c.logger.Error().Msg("[KC-LOGOUT] Refresh token is empty")
+		return errors.New("refresh token is required for logout")
+	}
+
+	c.logger.Info().
+		Str("token_preview", refreshToken[:30]+"...").
+		Int("token_length", len(refreshToken)).
+		Msg("[KC-LOGOUT] Refresh token received")
+
+	values := url.Values{}
+	values.Set("client_id", c.clientID)
+
+	// Note: For public clients (like jan-client), we should NOT send client_secret
+	// Only send client_secret if this is a confidential client (backend)
+	isConfidentialClient := c.clientID == c.backendClientID && c.backendClientSecret != ""
+	if isConfidentialClient {
+		values.Set("client_secret", c.backendClientSecret)
+		c.logger.Info().
+			Str("client_id", c.clientID).
+			Bool("with_secret", true).
+			Msg("[KC-LOGOUT] Using confidential client (with secret)")
+	} else {
+		c.logger.Info().
+			Str("client_id", c.clientID).
+			Bool("with_secret", false).
+			Msg("[KC-LOGOUT] Using public client (no secret)")
+	}
+	values.Set("refresh_token", refreshToken)
+
+	logoutURL := c.logoutEndpoint()
+	c.logger.Info().
+		Str("url", logoutURL).
+		Str("method", http.MethodPost).
+		Msg("[KC-LOGOUT] Preparing logout request")
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, logoutURL, strings.NewReader(values.Encode()))
+	if err != nil {
+		c.logger.Error().Err(err).Msg("[KC-LOGOUT] Failed to create request")
+		return fmt.Errorf("create logout request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	c.logger.Info().
+		Str("content_type", "application/x-www-form-urlencoded").
+		Int("body_length", len(values.Encode())).
+		Msg("[KC-LOGOUT] Sending logout request to Keycloak")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		c.logger.Error().
+			Err(err).
+			Str("url", logoutURL).
+			Msg("[KC-LOGOUT] HTTP request failed")
+		return fmt.Errorf("execute logout request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	c.logger.Info().
+		Int("status_code", resp.StatusCode).
+		Str("status", resp.Status).
+		Msg("[KC-LOGOUT] Received response from Keycloak")
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		c.logger.Error().
+			Int("status_code", resp.StatusCode).
+			Str("response_body", strings.TrimSpace(string(payload))).
+			Str("client_id", c.clientID).
+			Bool("is_confidential", isConfidentialClient).
+			Msg("[KC-LOGOUT] Logout request failed")
+		return fmt.Errorf("logout request failed with status %d: %s", resp.StatusCode, strings.TrimSpace(string(payload)))
+	}
+
+	c.logger.Info().
+		Int("status_code", resp.StatusCode).
+		Msg("[KC-LOGOUT] Logout successful")
+	return nil
+}
+
+func extractIDFromLocation(location string) string {
+	if location == "" {
+		return ""
+	}
+	idx := strings.LastIndex(location, "/")
+	if idx == -1 || idx+1 >= len(location) {
+		return ""
+	}
+	return location[idx+1:]
+}
+
+// buildUserAttributeUpdate ensures required profile fields are retained when updating attributes.
+func buildUserAttributeUpdate(existing map[string]any, attributes map[string][]string) map[string]any {
+	update := map[string]any{
+		"attributes": attributes,
+		"email":      getString(existing, "email"),
+	}
+
+	if first := getString(existing, "firstName"); first != "" {
+		update["firstName"] = first
+	}
+	if last := getString(existing, "lastName"); last != "" {
+		update["lastName"] = last
+	}
+	if enabled, ok := existing["enabled"].(bool); ok {
+		update["enabled"] = enabled
+	}
+	if verified, ok := existing["emailVerified"].(bool); ok {
+		update["emailVerified"] = verified
+	}
+
+	return update
+}
+
+// StoreAPIKeyHash stores an API key hash in Keycloak user attributes
+func (c *Client) StoreAPIKeyHash(ctx context.Context, userID, keyID, keyHash string) error {
+	serviceToken, err := c.serviceAccountToken(ctx)
+	if err != nil {
+		return fmt.Errorf("get service token: %w", err)
+	}
+
+	adminToken := c.adminAccessToken(ctx, serviceToken.AccessToken)
+
+	// Get existing user
+	existing, err := c.getUser(ctx, adminToken, userID)
+	if err != nil {
+		return fmt.Errorf("get user: %w", err)
+	}
+
+	// Parse existing attributes
+	attributes := map[string][]string{}
+	if raw, ok := existing["attributes"].(map[string]any); ok {
+		for key, value := range raw {
+			switch v := value.(type) {
+			case []any:
+				var out []string
+				for _, item := range v {
+					if s, ok := item.(string); ok {
+						out = append(out, s)
+					}
+				}
+				if len(out) > 0 {
+					attributes[key] = out
+				}
+			}
+		}
+	}
+
+	// Add API key entry in format: keyID:hash
+	keyEntry := fmt.Sprintf("%s:%s", keyID, keyHash)
+	apiKeys := attributes["api_keys"]
+	apiKeys = append(apiKeys, keyEntry)
+	attributes["api_keys"] = apiKeys
+
+	// Update user attributes
+	update := buildUserAttributeUpdate(existing, attributes)
+
+	body, err := json.Marshal(update)
+	if err != nil {
+		return fmt.Errorf("marshal update: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPut, c.adminEndpoint("/users/"+url.PathEscape(userID)), bytes.NewReader(body))
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("update user: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return fmt.Errorf("update user failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	return nil
+}
+
+// RemoveAPIKeyHash removes an API key hash from Keycloak user attributes
+func (c *Client) RemoveAPIKeyHash(ctx context.Context, userID, keyID string) error {
+	serviceToken, err := c.serviceAccountToken(ctx)
+	if err != nil {
+		return fmt.Errorf("get service token: %w", err)
+	}
+
+	adminToken := c.adminAccessToken(ctx, serviceToken.AccessToken)
+
+	// Get existing user
+	existing, err := c.getUser(ctx, adminToken, userID)
+	if err != nil {
+		return fmt.Errorf("get user: %w", err)
+	}
+
+	// Parse existing attributes
+	attributes := map[string][]string{}
+	if raw, ok := existing["attributes"].(map[string]any); ok {
+		for key, value := range raw {
+			switch v := value.(type) {
+			case []any:
+				var out []string
+				for _, item := range v {
+					if s, ok := item.(string); ok {
+						out = append(out, s)
+					}
+				}
+				if len(out) > 0 {
+					attributes[key] = out
+				}
+			}
+		}
+	}
+
+	// Remove API key entry by keyID
+	apiKeys := attributes["api_keys"]
+	filtered := []string{}
+	for _, entry := range apiKeys {
+		if !strings.HasPrefix(entry, keyID+":") {
+			filtered = append(filtered, entry)
+		}
+	}
+	attributes["api_keys"] = filtered
+
+	// Update user attributes
+	update := buildUserAttributeUpdate(existing, attributes)
+
+	body, err := json.Marshal(update)
+	if err != nil {
+		return fmt.Errorf("marshal update: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPut, c.adminEndpoint("/users/"+url.PathEscape(userID)), bytes.NewReader(body))
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("update user: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return fmt.Errorf("update user failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	return nil
+}
+
+// APIKeyUserInfo represents validated user information from API key
+type APIKeyUserInfo struct {
+	UserID    string   `json:"user_id"`
+	Subject   string   `json:"subject"`
+	Username  string   `json:"username"`
+	Email     string   `json:"email"`
+	FirstName string   `json:"first_name"`
+	LastName  string   `json:"last_name"`
+	Roles     []string `json:"roles"`
+}
+
+// KeycloakUser represents a user in Keycloak
+type KeycloakUser struct {
+	ID        string   `json:"id"`
+	Username  string   `json:"username"`
+	Email     string   `json:"email"`
+	FirstName string   `json:"firstName"`
+	LastName  string   `json:"lastName"`
+	Enabled   bool     `json:"enabled"`
+	Roles     []string `json:"roles,omitempty"`
+}
+
+// GetUserBySubject retrieves a user from Keycloak by their subject (user ID)
+func (c *Client) GetUserBySubject(ctx context.Context, subject string) (*KeycloakUser, error) {
+	if strings.TrimSpace(subject) == "" {
+		return nil, errors.New("subject required")
+	}
+
+	serviceToken, err := c.serviceAccountToken(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("get service token: %w", err)
+	}
+
+	adminToken := c.adminAccessToken(ctx, serviceToken.AccessToken)
+
+	// Get user by ID (subject is the Keycloak user ID)
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.adminEndpoint(fmt.Sprintf("/users/%s", url.PathEscape(subject))), nil)
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("get user: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode == 404 {
+		return nil, errors.New("user not found in keycloak")
+	}
+
+	if resp.StatusCode >= 300 {
+		payload, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
+		return nil, fmt.Errorf("get user failed: %s", strings.TrimSpace(string(payload)))
+	}
+
+	var rawUser map[string]any
+	if err := json.NewDecoder(resp.Body).Decode(&rawUser); err != nil {
+		return nil, fmt.Errorf("decode user: %w", err)
+	}
+
+	user := &KeycloakUser{
+		ID:        getString(rawUser, "id"),
+		Username:  getString(rawUser, "username"),
+		Email:     getString(rawUser, "email"),
+		FirstName: getString(rawUser, "firstName"),
+		LastName:  getString(rawUser, "lastName"),
+		Enabled:   getBool(rawUser, "enabled"),
+	}
+
+	return user, nil
+}
+
+// ValidateAPIKeyHash validates an API key hash and returns user information
+func (c *Client) ValidateAPIKeyHash(ctx context.Context, keyHash string) (*APIKeyUserInfo, error) {
+	serviceToken, err := c.serviceAccountToken(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("get service token: %w", err)
+	}
+
+	adminToken := c.adminAccessToken(ctx, serviceToken.AccessToken)
+
+	// Search for users with this API key hash in attributes
+	// Note: Keycloak doesn't support searching in custom attributes directly,
+	// so we need to get all users and filter (or use a more efficient approach with a separate index)
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.adminEndpoint("/users?max=10000"), nil)
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Authorization", "Bearer "+adminToken)
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("get users: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 300 {
+		return nil, fmt.Errorf("get users failed: status %d", resp.StatusCode)
+	}
+
+	var users []map[string]any
+	if err := json.NewDecoder(resp.Body).Decode(&users); err != nil {
+		return nil, fmt.Errorf("decode users: %w", err)
+	}
+
+	// Find user with matching API key hash
+	for _, user := range users {
+		if attrs, ok := user["attributes"].(map[string]any); ok {
+			if apiKeysRaw, ok := attrs["api_keys"]; ok {
+				var apiKeys []string
+				switch v := apiKeysRaw.(type) {
+				case []any:
+					for _, item := range v {
+						if s, ok := item.(string); ok {
+							apiKeys = append(apiKeys, s)
+						}
+					}
+				}
+
+				// Check if any API key entry matches the hash
+				for _, entry := range apiKeys {
+					parts := strings.SplitN(entry, ":", 2)
+					if len(parts) == 2 && parts[1] == keyHash {
+						// Found matching user
+						userInfo := &APIKeyUserInfo{
+							UserID:    getString(user, "id"),
+							Subject:   getString(user, "id"),
+							Username:  getString(user, "username"),
+							Email:     getString(user, "email"),
+							FirstName: getString(user, "firstName"),
+							LastName:  getString(user, "lastName"),
+						}
+						return userInfo, nil
+					}
+				}
+			}
+		}
+	}
+
+	return nil, fmt.Errorf("invalid api key")
+}
+
+func getString(m map[string]any, key string) string {
+	if v, ok := m[key]; ok {
+		if s, ok := v.(string); ok {
+			return s
+		}
+	}
+	return ""
+}
+
+func getBool(m map[string]any, key string) bool {
+	if v, ok := m[key]; ok {
+		if b, ok := v.(bool); ok {
+			return b
+		}
+	}
+	return false
+}
diff --git a/services/llm-api/internal/infrastructure/kong/client.go b/services/llm-api/internal/infrastructure/kong/client.go
new file mode 100644
index 00000000..c34e9895
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/kong/client.go
@@ -0,0 +1,186 @@
+package kong
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/rs/zerolog"
+)
+
+// Client implements a thin wrapper around the Kong Admin API.
+type Client struct {
+	baseURL    string
+	httpClient *http.Client
+	logger     zerolog.Logger
+}
+
+// Consumer represents a Kong consumer entity.
+type Consumer struct {
+	ID       string `json:"id"`
+	Username string `json:"username"`
+	CustomID string `json:"custom_id"`
+}
+
+// KeyAuthCredential represents a key-auth credential in Kong.
+type KeyAuthCredential struct {
+	ID        string   `json:"id"`
+	Key       string   `json:"key"`
+	CreatedAt int64    `json:"created_at"`
+	Tags      []string `json:"tags"`
+}
+
+// NewClient constructs a Kong Admin API client.
+func NewClient(baseURL string, httpClient *http.Client, logger zerolog.Logger) *Client {
+	if httpClient == nil {
+		httpClient = &http.Client{Timeout: 10 * time.Second}
+	}
+	return &Client{
+		baseURL:    strings.TrimRight(baseURL, "/"),
+		httpClient: httpClient,
+		logger:     logger.With().Str("component", "kong-admin-client").Logger(),
+	}
+}
+
+// EnsureConsumer fetches or creates a consumer with the provided identifiers.
+func (c *Client) EnsureConsumer(ctx context.Context, username, customID string, tags []string) (*Consumer, error) {
+	if username == "" {
+		return nil, fmt.Errorf("username is required")
+	}
+	if consumer, err := c.getConsumer(ctx, username); err == nil {
+		return consumer, nil
+	} else if !isNotFound(err) {
+		return nil, err
+	}
+	payload := map[string]any{
+		"username":  username,
+		"custom_id": customID,
+	}
+	if len(tags) > 0 {
+		payload["tags"] = tags
+	}
+	var resp Consumer
+	if err := c.do(ctx, http.MethodPost, "/consumers", payload, &resp); err != nil {
+		if isConflict(err) {
+			return c.getConsumer(ctx, username)
+		}
+		return nil, err
+	}
+	return &resp, nil
+}
+
+// CreateKeyCredential registers a key-auth credential for the given consumer username.
+func (c *Client) CreateKeyCredential(ctx context.Context, username, key string, tags []string) (*KeyAuthCredential, error) {
+	if username == "" {
+		return nil, fmt.Errorf("consumer username is required")
+	}
+	if key == "" {
+		return nil, fmt.Errorf("key is required")
+	}
+
+	endpoint := fmt.Sprintf("/consumers/%s/key-auth", username)
+	payload := map[string]any{"key": key}
+	if len(tags) > 0 {
+		payload["tags"] = tags
+	}
+
+	var resp KeyAuthCredential
+	if err := c.do(ctx, http.MethodPost, endpoint, payload, &resp); err != nil {
+		return nil, err
+	}
+	return &resp, nil
+}
+
+// DeleteKeyCredential removes a key-auth credential by its ID.
+func (c *Client) DeleteKeyCredential(ctx context.Context, credentialID string) error {
+	if credentialID == "" {
+		return fmt.Errorf("credential id is required")
+	}
+	endpoint := fmt.Sprintf("/key-auth/%s", credentialID)
+	return c.do(ctx, http.MethodDelete, endpoint, nil, nil)
+}
+
+func (c *Client) getConsumer(ctx context.Context, username string) (*Consumer, error) {
+	endpoint := fmt.Sprintf("/consumers/%s", username)
+	var resp Consumer
+	if err := c.do(ctx, http.MethodGet, endpoint, nil, &resp); err != nil {
+		return nil, err
+	}
+	return &resp, nil
+}
+
+func (c *Client) do(ctx context.Context, method, path string, payload any, out any) error {
+	var body io.Reader
+	if payload != nil {
+		buf, err := json.Marshal(payload)
+		if err != nil {
+			return fmt.Errorf("marshal payload: %w", err)
+		}
+		body = bytes.NewReader(buf)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, method, c.baseURL+path, body)
+	if err != nil {
+		return err
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	res, err := c.httpClient.Do(req)
+	if err != nil {
+		return err
+	}
+	defer res.Body.Close()
+
+	if res.StatusCode >= 400 {
+		data, _ := io.ReadAll(io.LimitReader(res.Body, 2048))
+		return &Error{
+			Code:    res.StatusCode,
+			Message: strings.TrimSpace(string(data)),
+		}
+	}
+
+	if out != nil {
+		if err := json.NewDecoder(res.Body).Decode(out); err != nil {
+			return fmt.Errorf("decode response: %w", err)
+		}
+	}
+	return nil
+}
+
+// Error represents an HTTP error returned by Kong Admin API.
+type Error struct {
+	Code    int
+	Message string
+}
+
+func (e *Error) Error() string {
+	if e.Message == "" {
+		return fmt.Sprintf("kong admin api error: %d", e.Code)
+	}
+	return fmt.Sprintf("kong admin api error: %d %s", e.Code, e.Message)
+}
+
+func isNotFound(err error) bool {
+	if err == nil {
+		return false
+	}
+	if apiErr, ok := err.(*Error); ok && apiErr.Code == http.StatusNotFound {
+		return true
+	}
+	return false
+}
+
+func isConflict(err error) bool {
+	if err == nil {
+		return false
+	}
+	if apiErr, ok := err.(*Error); ok && apiErr.Code == http.StatusConflict {
+		return true
+	}
+	return false
+}
diff --git a/services/llm-api/internal/infrastructure/logger/logger.go b/services/llm-api/internal/infrastructure/logger/logger.go
new file mode 100644
index 00000000..420a1090
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/logger/logger.go
@@ -0,0 +1,59 @@
+package logger
+
+import (
+	"errors"
+	"os"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/rs/zerolog"
+)
+
+var (
+	globalLogger zerolog.Logger
+	once         sync.Once
+)
+
+// GetLogger returns the global logger instance
+func GetLogger() zerolog.Logger {
+	once.Do(func() {
+		// Default to console output with info level
+		consoleWriter := zerolog.ConsoleWriter{
+			Out:        os.Stdout,
+			TimeFormat: time.RFC3339,
+		}
+		globalLogger = zerolog.New(consoleWriter).With().Timestamp().Logger().Level(zerolog.InfoLevel)
+		zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	})
+	return globalLogger
+}
+
+// New constructs a zerolog logger based on level and format configuration.
+func New(level, format string) (zerolog.Logger, error) {
+	lvl, err := zerolog.ParseLevel(strings.ToLower(level))
+	if err != nil {
+		return zerolog.Logger{}, err
+	}
+
+	var writer zerolog.Logger
+	switch strings.ToLower(format) {
+	case "json":
+		writer = zerolog.New(os.Stdout).With().Timestamp().Logger()
+	case "console":
+		consoleWriter := zerolog.ConsoleWriter{
+			Out:        os.Stdout,
+			TimeFormat: time.RFC3339,
+		}
+		writer = zerolog.New(consoleWriter).With().Timestamp().Logger()
+	default:
+		return zerolog.Logger{}, errors.New("unsupported log format")
+	}
+
+	zerolog.SetGlobalLevel(lvl)
+
+	// Update global logger
+	globalLogger = writer.Level(lvl)
+
+	return globalLogger, nil
+}
diff --git a/services/llm-api/internal/infrastructure/mediaresolver/resolver.go b/services/llm-api/internal/infrastructure/mediaresolver/resolver.go
new file mode 100644
index 00000000..80b2e099
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/mediaresolver/resolver.go
@@ -0,0 +1,238 @@
+package mediaresolver
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/rs/zerolog"
+	openai "github.com/sashabaranov/go-openai"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/domain"
+	"jan-server/services/llm-api/internal/infrastructure/keycloak"
+)
+
+var placeholderPattern = regexp.MustCompile(`data:(image/[a-z0-9.+-]+);(jan_[A-Za-z0-9]+)`)
+
+// Resolver resolves jan_* media placeholders embedded in chat messages.
+type Resolver interface {
+	ResolveMessages(ctx context.Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, bool, error)
+}
+
+type ctxKey string
+
+const (
+	ctxAuthorization ctxKey = "mediaresolver.authorization"
+	ctxPrincipal     ctxKey = "mediaresolver.principal"
+)
+
+type httpResolver struct {
+	endpoint      string
+	client        *http.Client
+	log           zerolog.Logger
+	keycloak      *keycloak.Client
+	defaultIssuer string
+}
+
+// NewResolver constructs an HTTP-backed resolver. Returns nil when MEDIA_RESOLVE_URL is empty.
+func NewResolver(cfg *config.Config, log zerolog.Logger, keycloakClient *keycloak.Client) Resolver {
+	if cfg == nil {
+		return nil
+	}
+
+	endpoint := strings.TrimSpace(cfg.MediaResolveURL)
+	if endpoint == "" {
+		return nil
+	}
+
+	timeout := cfg.MediaResolveTimeout
+	if timeout <= 0 {
+		timeout = 5 * time.Second
+	}
+
+	return &httpResolver{
+		endpoint:      endpoint,
+		client:        &http.Client{Timeout: timeout},
+		log:           log.With().Str("component", "media-resolver").Logger(),
+		keycloak:      keycloakClient,
+		defaultIssuer: cfg.Issuer,
+	}
+}
+
+func (r *httpResolver) ResolveMessages(ctx context.Context, messages []openai.ChatCompletionMessage) ([]openai.ChatCompletionMessage, bool, error) {
+	// Debug: Check for placeholders
+	if r.log.Debug().Enabled() {
+		for i, msg := range messages {
+			if matchesPlaceholder(msg.Content) {
+				r.log.Debug().Int("message_index", i).Str("content_preview", msg.Content[:min(100, len(msg.Content))]).Msg("found placeholder in message content")
+			}
+			for j, part := range msg.MultiContent {
+				if part.Type == openai.ChatMessagePartTypeImageURL && part.ImageURL != nil && matchesPlaceholder(part.ImageURL.URL) {
+					r.log.Debug().Int("message_index", i).Int("part_index", j).Str("url_preview", part.ImageURL.URL[:min(100, len(part.ImageURL.URL))]).Msg("found placeholder in image URL")
+				}
+			}
+		}
+	}
+
+	if !r.hasPlaceholder(messages) {
+		r.log.Debug().Msg("no media placeholders found in messages")
+		return messages, false, nil
+	}
+
+	r.log.Debug().Int("message_count", len(messages)).Msg("resolving media placeholders via media-api")
+
+	requestBody := map[string]interface{}{
+		"payload": map[string]interface{}{
+			"messages": messages,
+		},
+	}
+
+	buf := bytes.NewBuffer(nil)
+	if err := json.NewEncoder(buf).Encode(requestBody); err != nil {
+		return messages, false, fmt.Errorf("encode media resolve request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, r.endpoint, buf)
+	if err != nil {
+		return messages, false, fmt.Errorf("build media resolve request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+	if authHeader := r.resolveAuthorization(ctx); authHeader != "" {
+		req.Header.Set("Authorization", authHeader)
+	}
+	if principal, ok := principalFromContext(ctx); ok && principal.ID != "" {
+		req.Header.Set("X-Principal-Id", principal.ID)
+	}
+
+	resp, err := r.client.Do(req)
+	if err != nil {
+		return messages, false, fmt.Errorf("call media resolve endpoint: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= http.StatusBadRequest {
+		var body bytes.Buffer
+		_, _ = body.ReadFrom(resp.Body)
+		return messages, false, fmt.Errorf("media resolve error: status=%d body=%s", resp.StatusCode, strings.TrimSpace(body.String()))
+	}
+
+	var envelope struct {
+		Payload json.RawMessage `json:"payload"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&envelope); err != nil {
+		return messages, false, fmt.Errorf("decode media resolve response: %w", err)
+	}
+	if len(envelope.Payload) == 0 {
+		return messages, false, errors.New("media resolve returned empty payload")
+	}
+
+	var output struct {
+		Messages []openai.ChatCompletionMessage `json:"messages"`
+	}
+	if err := json.Unmarshal(envelope.Payload, &output); err != nil {
+		return messages, false, fmt.Errorf("decode resolved messages: %w", err)
+	}
+	if len(output.Messages) == 0 {
+		return messages, false, errors.New("media resolve returned no messages")
+	}
+
+	r.log.Debug().
+		Int("input_message_count", len(messages)).
+		Int("output_message_count", len(output.Messages)).
+		Msg("resolved media placeholders in chat request")
+
+	// Debug: Log details of resolved messages
+	if r.log.Debug().Enabled() {
+		for i, msg := range output.Messages {
+			contentPreview := msg.Content
+			if len(contentPreview) > 200 {
+				contentPreview = contentPreview[:200] + "..."
+			}
+			r.log.Debug().
+				Int("message_index", i).
+				Str("role", string(msg.Role)).
+				Str("content_preview", contentPreview).
+				Int("multi_content_count", len(msg.MultiContent)).
+				Msg("resolved message detail")
+		}
+	}
+
+	return output.Messages, true, nil
+}
+
+func (r *httpResolver) hasPlaceholder(messages []openai.ChatCompletionMessage) bool {
+	for _, msg := range messages {
+		if matchesPlaceholder(msg.Content) {
+			return true
+		}
+		for _, part := range msg.MultiContent {
+			switch part.Type {
+			case openai.ChatMessagePartTypeImageURL:
+				if part.ImageURL != nil && matchesPlaceholder(part.ImageURL.URL) {
+					return true
+				}
+			case openai.ChatMessagePartTypeText:
+				if matchesPlaceholder(part.Text) {
+					return true
+				}
+			}
+		}
+	}
+	return false
+}
+
+func matchesPlaceholder(value string) bool {
+	if value == "" {
+		return false
+	}
+	return placeholderPattern.MatchString(value)
+}
+
+func (r *httpResolver) resolveAuthorization(ctx context.Context) string {
+	if token, ok := ctx.Value(ctxAuthorization).(string); ok && strings.TrimSpace(token) != "" {
+		return token
+	}
+	principal, ok := principalFromContext(ctx)
+	if !ok || principal.Subject == "" || r.keycloak == nil {
+		return ""
+	}
+	tokenSet, err := r.keycloak.TokenForUser(ctx, principal.Subject)
+	if err != nil {
+		r.log.Warn().Err(err).Msg("failed to mint user token for media resolver")
+		return ""
+	}
+	return "Bearer " + tokenSet.AccessToken
+}
+
+func ContextWithAuthorization(ctx context.Context, token string) context.Context {
+	if strings.TrimSpace(token) == "" {
+		return ctx
+	}
+	return context.WithValue(ctx, ctxAuthorization, token)
+}
+
+func ContextWithPrincipal(ctx context.Context, principal domain.Principal) context.Context {
+	return context.WithValue(ctx, ctxPrincipal, principal)
+}
+
+func principalFromContext(ctx context.Context) (domain.Principal, bool) {
+	if ctx == nil {
+		return domain.Principal{}, false
+	}
+	principal, ok := ctx.Value(ctxPrincipal).(domain.Principal)
+	return principal, ok
+}
+
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
diff --git a/services/llm-api/internal/infrastructure/memory/client.go b/services/llm-api/internal/infrastructure/memory/client.go
new file mode 100644
index 00000000..094120e0
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/memory/client.go
@@ -0,0 +1,216 @@
+package memory
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+)
+
+// Client handles communication with the memory-tools service.
+type Client struct {
+	baseURL    string
+	httpClient *http.Client
+}
+
+// NewClient creates a new memory client with the provided base URL and timeout.
+func NewClient(baseURL string, timeout time.Duration) *Client {
+	if timeout == 0 {
+		timeout = 5 * time.Second
+	}
+
+	return &Client{
+		baseURL: baseURL,
+		httpClient: &http.Client{
+			Timeout: timeout,
+		},
+	}
+}
+
+// LoadRequest represents a memory load request.
+type LoadRequest struct {
+	UserID         string      `json:"user_id"`
+	ProjectID      string      `json:"project_id,omitempty"`
+	ConversationID string      `json:"conversation_id,omitempty"`
+	Query          string      `json:"query"`
+	Options        LoadOptions `json:"options"`
+}
+
+// LoadOptions contains options for memory loading.
+type LoadOptions struct {
+	MaxUserItems     int     `json:"max_user_items"`
+	MaxProjectItems  int     `json:"max_project_items"`
+	MaxEpisodicItems int     `json:"max_episodic_items"`
+	MinSimilarity    float32 `json:"min_similarity"`
+}
+
+// LoadResponse contains loaded memories.
+type LoadResponse struct {
+	CoreMemory     []UserMemoryItem `json:"core_memory"`
+	EpisodicMemory []EpisodicEvent  `json:"episodic_memory"`
+	SemanticMemory []ProjectFact    `json:"semantic_memory"`
+}
+
+// UserMemoryItem represents a user memory item.
+type UserMemoryItem struct {
+	ID         string    `json:"id"`
+	UserID     string    `json:"user_id"`
+	Scope      string    `json:"scope"`
+	Text       string    `json:"text"`
+	Score      int       `json:"score"`
+	Similarity float32   `json:"similarity"`
+	CreatedAt  time.Time `json:"created_at"`
+}
+
+// ProjectFact represents a project fact.
+type ProjectFact struct {
+	ID         string    `json:"id"`
+	ProjectID  string    `json:"project_id"`
+	Kind       string    `json:"kind"`
+	Title      string    `json:"title"`
+	Text       string    `json:"text"`
+	Confidence float32   `json:"confidence"`
+	Similarity float32   `json:"similarity"`
+	CreatedAt  time.Time `json:"created_at"`
+}
+
+// EpisodicEvent represents an episodic event.
+type EpisodicEvent struct {
+	ID         string    `json:"id"`
+	UserID     string    `json:"user_id"`
+	Time       time.Time `json:"time"`
+	Text       string    `json:"text"`
+	Kind       string    `json:"kind"`
+	Similarity float32   `json:"similarity"`
+}
+
+// Load retrieves relevant memories.
+func (c *Client) Load(ctx context.Context, req LoadRequest) (*LoadResponse, error) {
+	jsonData, err := json.Marshal(req)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+	log := logger.GetLogger()
+	log.Info().
+		Str("base_url", c.baseURL).
+		Str("user_id", req.UserID).
+		Str("project_id", req.ProjectID).
+		Str("conversation_id", req.ConversationID).
+		Msg("memory load request")
+
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/v1/memory/load", bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+
+	httpReq.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		log.Warn().Int("status", resp.StatusCode).Msg("memory load failed")
+		return nil, fmt.Errorf("memory load failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	var loadResp LoadResponse
+	if err := json.Unmarshal(body, &loadResp); err != nil {
+		return nil, fmt.Errorf("unmarshal response: %w", err)
+	}
+
+	log.Info().
+		Int("status", resp.StatusCode).
+		Int("core_memory", len(loadResp.CoreMemory)).
+		Int("semantic_memory", len(loadResp.SemanticMemory)).
+		Int("episodic_memory", len(loadResp.EpisodicMemory)).
+		Msg("memory load response")
+
+	return &loadResp, nil
+}
+
+// ObserveRequest represents a memory observe request.
+type ObserveRequest struct {
+	UserID         string             `json:"user_id"`
+	ProjectID      string             `json:"project_id,omitempty"`
+	ConversationID string             `json:"conversation_id"`
+	Messages       []ConversationItem `json:"messages"`
+}
+
+// ConversationItem represents a message.
+type ConversationItem struct {
+	Role      string    `json:"role"`
+	Content   string    `json:"content"`
+	CreatedAt time.Time `json:"created_at"`
+}
+
+// Observe stores conversation for memory extraction.
+func (c *Client) Observe(ctx context.Context, req ObserveRequest) error {
+	jsonData, err := json.Marshal(req)
+	if err != nil {
+		return fmt.Errorf("marshal request: %w", err)
+	}
+	log := logger.GetLogger()
+	log.Info().
+		Str("base_url", c.baseURL).
+		Str("user_id", req.UserID).
+		Str("project_id", req.ProjectID).
+		Str("conversation_id", req.ConversationID).
+		Int("message_count", len(req.Messages)).
+		Msg("memory observe request")
+
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/v1/memory/observe", bytes.NewBuffer(jsonData))
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+
+	httpReq.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		log.Warn().Int("status", resp.StatusCode).Msg("memory observe failed")
+		body, _ := io.ReadAll(resp.Body)
+		return fmt.Errorf("memory observe failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	log.Info().Int("status", resp.StatusCode).Msg("memory observe response")
+
+	return nil
+}
+
+// Health checks the health of memory-tools service.
+func (c *Client) Health(ctx context.Context) error {
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodGet, c.baseURL+"/healthz", nil)
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("health check failed with status %d", resp.StatusCode)
+	}
+
+	return nil
+}
diff --git a/services/llm-api/internal/infrastructure/observability/otel.go b/services/llm-api/internal/infrastructure/observability/otel.go
new file mode 100644
index 00000000..34f0f511
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/observability/otel.go
@@ -0,0 +1,149 @@
+package observability
+
+import (
+	"context"
+	"strings"
+	"time"
+
+	"github.com/rs/zerolog"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
+	sdkmetric "go.opentelemetry.io/otel/sdk/metric"
+	"go.opentelemetry.io/otel/sdk/resource"
+	sdktrace "go.opentelemetry.io/otel/sdk/trace"
+	semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
+	"go.opentelemetry.io/otel/trace"
+
+	"jan-server/services/llm-api/internal/config"
+)
+
+// Setup initialises OpenTelemetry tracing and metrics exporters. It returns a shutdown function that must be invoked on exit.
+func Setup(ctx context.Context, cfg *config.Config, logger zerolog.Logger) (func(context.Context) error, error) {
+	res, err := resource.New(ctx,
+		resource.WithAttributes(
+			semconv.ServiceName(cfg.ServiceName),
+			semconv.ServiceNamespace(cfg.ServiceNamespace),
+			attribute.String("environment", cfg.Environment),
+		),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	var (
+		tracerProvider *sdktrace.TracerProvider
+		meterProvider  *sdkmetric.MeterProvider
+	)
+
+	if cfg.OTLPEndpoint != "" {
+		// Normalize endpoint: allow values like "otel-collector:4318" or full URLs like "http://otel-collector:4318"
+		endpoint := cfg.OTLPEndpoint
+		insecure := true
+		if strings.HasPrefix(endpoint, "http://") {
+			endpoint = strings.TrimPrefix(endpoint, "http://")
+			insecure = true
+		} else if strings.HasPrefix(endpoint, "https://") {
+			endpoint = strings.TrimPrefix(endpoint, "https://")
+			insecure = false
+		}
+
+		traceOpts := []otlptracehttp.Option{otlptracehttp.WithEndpoint(endpoint)}
+		metricOpts := []otlpmetrichttp.Option{otlpmetrichttp.WithEndpoint(endpoint)}
+		if insecure {
+			traceOpts = append(traceOpts, otlptracehttp.WithInsecure())
+			metricOpts = append(metricOpts, otlpmetrichttp.WithInsecure())
+		}
+		traceOpts = append(traceOpts, headerOptions(cfg.OTLPHeaders)...)
+		metricOpts = append(metricOpts, metricHeaderOptions(cfg.OTLPHeaders)...)
+
+		traceExporter, err := otlptracehttp.New(ctx, traceOpts...)
+		if err != nil {
+			return nil, err
+		}
+
+		meterExporter, err := otlpmetrichttp.New(ctx, metricOpts...)
+		if err != nil {
+			return nil, err
+		}
+
+		tracerProvider = sdktrace.NewTracerProvider(
+			sdktrace.WithResource(res),
+			sdktrace.WithBatcher(traceExporter),
+		)
+
+		reader := sdkmetric.NewPeriodicReader(meterExporter, sdkmetric.WithInterval(30*time.Second))
+		meterProvider = sdkmetric.NewMeterProvider(
+			sdkmetric.WithReader(reader),
+			sdkmetric.WithResource(res),
+		)
+	} else {
+		tracerProvider = sdktrace.NewTracerProvider(sdktrace.WithResource(res))
+		meterProvider = sdkmetric.NewMeterProvider(sdkmetric.WithResource(res))
+	}
+
+	otel.SetTracerProvider(tracerProvider)
+
+	shutdown := func(ctx context.Context) error {
+		var shutdownErr error
+		if err := meterProvider.Shutdown(ctx); err != nil {
+			logger.Error().Err(err).Msg("shutdown meter provider")
+			shutdownErr = err
+		}
+		if err := tracerProvider.Shutdown(ctx); err != nil {
+			logger.Error().Err(err).Msg("shutdown tracer provider")
+			if shutdownErr == nil {
+				shutdownErr = err
+			}
+		}
+		return shutdownErr
+	}
+
+	return shutdown, nil
+}
+
+func headerOptions(raw string) []otlptracehttp.Option {
+	if raw == "" {
+		return nil
+	}
+	opts := make([]otlptracehttp.Option, 0)
+	headers := parseHeaders(raw)
+	if len(headers) == 0 {
+		return nil
+	}
+	opts = append(opts, otlptracehttp.WithHeaders(headers))
+	return opts
+}
+
+func metricHeaderOptions(raw string) []otlpmetrichttp.Option {
+	if raw == "" {
+		return nil
+	}
+	headers := parseHeaders(raw)
+	if len(headers) == 0 {
+		return nil
+	}
+	return []otlpmetrichttp.Option{otlpmetrichttp.WithHeaders(headers)}
+}
+
+func parseHeaders(raw string) map[string]string {
+	result := make(map[string]string)
+	for _, pair := range strings.Split(raw, ",") {
+		parts := strings.SplitN(strings.TrimSpace(pair), "=", 2)
+		if len(parts) != 2 {
+			continue
+		}
+		key := strings.TrimSpace(parts[0])
+		value := strings.TrimSpace(parts[1])
+		if key != "" && value != "" {
+			result[key] = value
+		}
+	}
+	return result
+}
+
+// NoopTracer returns a noop tracer when telemetry is disabled.
+func NoopTracer() trace.Tracer {
+	return otel.Tracer("noop")
+}
diff --git a/services/llm-api/internal/infrastructure/observability/tracing.go b/services/llm-api/internal/infrastructure/observability/tracing.go
new file mode 100644
index 00000000..c1773771
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/observability/tracing.go
@@ -0,0 +1,67 @@
+package observability
+
+import (
+	"context"
+
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/codes"
+	"go.opentelemetry.io/otel/trace"
+)
+
+// StartSpan starts a new span with the given name and options
+func StartSpan(ctx context.Context, serviceName, spanName string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
+	tracer := otel.Tracer(serviceName)
+	return tracer.Start(ctx, spanName, opts...)
+}
+
+// AddSpanAttributes adds attributes to the current span
+func AddSpanAttributes(ctx context.Context, attributes ...attribute.KeyValue) {
+	span := trace.SpanFromContext(ctx)
+	if span.IsRecording() {
+		span.SetAttributes(attributes...)
+	}
+}
+
+// AddSpanEvent adds an event to the current span
+func AddSpanEvent(ctx context.Context, name string, attributes ...attribute.KeyValue) {
+	span := trace.SpanFromContext(ctx)
+	if span.IsRecording() {
+		span.AddEvent(name, trace.WithAttributes(attributes...))
+	}
+}
+
+// RecordError records an error on the current span
+func RecordError(ctx context.Context, err error) {
+	span := trace.SpanFromContext(ctx)
+	if span.IsRecording() && err != nil {
+		span.RecordError(err)
+		span.SetStatus(codes.Error, err.Error())
+	}
+}
+
+// SetSpanStatus sets the status of the current span
+func SetSpanStatus(ctx context.Context, code codes.Code, description string) {
+	span := trace.SpanFromContext(ctx)
+	if span.IsRecording() {
+		span.SetStatus(code, description)
+	}
+}
+
+// GetTraceID returns the trace ID from the current context
+func GetTraceID(ctx context.Context) string {
+	span := trace.SpanFromContext(ctx)
+	if span.SpanContext().IsValid() {
+		return span.SpanContext().TraceID().String()
+	}
+	return ""
+}
+
+// GetSpanID returns the span ID from the current context
+func GetSpanID(ctx context.Context) string {
+	span := trace.SpanFromContext(ctx)
+	if span.SpanContext().IsValid() {
+		return span.SpanContext().SpanID().String()
+	}
+	return ""
+}
diff --git a/services/llm-api/internal/infrastructure/provider/inference_provider.go b/services/llm-api/internal/infrastructure/provider/inference_provider.go
new file mode 100644
index 00000000..45cec641
--- /dev/null
+++ b/services/llm-api/internal/infrastructure/provider/inference_provider.go
@@ -0,0 +1,104 @@
+// Package provider implements the provider registry and related types for LLM providers
+package provider
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"jan-server/services/llm-api/internal/config"
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/utils/crypto"
+	httpclients "jan-server/services/llm-api/internal/utils/httpclients"
+	chatclient "jan-server/services/llm-api/internal/utils/httpclients/chat"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+
+	"resty.dev/v3"
+)
+
+type InferenceProvider struct{}
+
+func NewInferenceProvider() *InferenceProvider {
+	return &InferenceProvider{}
+}
+
+func (ip *InferenceProvider) GetChatCompletionClient(ctx context.Context, provider *domainmodel.Provider) (*chatclient.ChatCompletionClient, error) {
+	client, err := ip.createRestyClient(ctx, provider)
+	if err != nil {
+		return nil, err
+	}
+
+	clientName := provider.DisplayName
+	return chatclient.NewChatCompletionClient(client, clientName, provider.BaseURL), nil
+}
+
+func (ip *InferenceProvider) GetChatModelClient(ctx context.Context, provider *domainmodel.Provider) (*chatclient.ChatModelClient, error) {
+	client, err := ip.createRestyClient(ctx, provider)
+	if err != nil {
+		return nil, err
+	}
+
+	clientName := provider.DisplayName
+	return chatclient.NewChatModelClient(client, clientName, provider.BaseURL), nil
+}
+
+func (ip *InferenceProvider) ListModels(ctx context.Context, provider *domainmodel.Provider) ([]chatclient.Model, error) {
+	modelClient, err := ip.GetChatModelClient(ctx, provider)
+	if err != nil {
+		return nil, err
+	}
+
+	resp, err := modelClient.ListModels(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	return resp.Data, nil
+}
+
+func (ip *InferenceProvider) createRestyClient(ctx context.Context, provider *domainmodel.Provider) (*resty.Client, error) {
+	clientName := fmt.Sprintf("%sClient", provider.PublicID)
+	client := httpclients.NewClient(clientName)
+	client.SetBaseURL(provider.BaseURL)
+
+	// Set authorization header if API key exists
+	if provider.EncryptedAPIKey != "" {
+		apiKey, err := ip.decryptAPIKey(ctx, provider.EncryptedAPIKey)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "failed to decrypt API key")
+		}
+		if strings.TrimSpace(apiKey) != "" && strings.ToLower(apiKey) != "none" {
+			switch provider.Kind {
+			case domainmodel.ProviderAzureOpenAI:
+				client.SetHeader("api-key", apiKey)
+			case domainmodel.ProviderAnthropic:
+				client.SetHeader("X-API-Key", apiKey)
+				client.SetHeader("Anthropic-Version", "2023-06-01")
+			case domainmodel.ProviderCohere:
+				client.SetHeader("Authorization", fmt.Sprintf("Bearer %s", apiKey))
+			default:
+				client.SetHeader("Authorization", fmt.Sprintf("Bearer %s", apiKey))
+			}
+		}
+	}
+
+	return client, nil
+}
+
+func (ip *InferenceProvider) decryptAPIKey(ctx context.Context, encryptedAPIKey string) (string, error) {
+	if encryptedAPIKey == "" {
+		return "", nil
+	}
+
+	secret := strings.TrimSpace(config.GetGlobal().ModelProviderSecret)
+	if secret == "" {
+		return "", platformerrors.NewError(ctx, platformerrors.LayerInfrastructure, platformerrors.ErrorTypeInternal, "MODEL_PROVIDER_SECRET not configured", nil, "8f07ea41-1096-405b-ae2e-cde06564e5bc")
+	}
+
+	plainText, err := crypto.DecryptString(secret, encryptedAPIKey)
+	if err != nil {
+		return "", err
+	}
+
+	return plainText, nil
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/apikeyhandler/api_key_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/apikeyhandler/api_key_handler.go
new file mode 100644
index 00000000..b4cc5d26
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/apikeyhandler/api_key_handler.go
@@ -0,0 +1,181 @@
+package apikeyhandler
+
+import (
+	"errors"
+	"net/http"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/llm-api/internal/domain/apikey"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+)
+
+// Handler manages API key HTTP endpoints.
+type Handler struct {
+	service *apikey.Service
+	logger  zerolog.Logger
+}
+
+// NewHandler constructs a new API key handler.
+func NewHandler(service *apikey.Service, logger zerolog.Logger) *Handler {
+	return &Handler{
+		service: service,
+		logger:  logger.With().Str("component", "api-key-handler").Logger(),
+	}
+}
+
+type createRequest struct {
+	Name      string         `json:"name" binding:"required"`
+	ExpiresIn *time.Duration `json:"expires_in,omitempty"`
+}
+
+type apiKeyResponse struct {
+	ID         string     `json:"id"`
+	Name       string     `json:"name"`
+	Prefix     string     `json:"prefix"`
+	Suffix     string     `json:"suffix"`
+	CreatedAt  time.Time  `json:"created_at"`
+	ExpiresAt  time.Time  `json:"expires_at"`
+	RevokedAt  *time.Time `json:"revoked_at,omitempty"`
+	LastUsedAt *time.Time `json:"last_used_at,omitempty"`
+	Status     string     `json:"status"`
+	Key        string     `json:"key,omitempty"`
+}
+
+// Create issues a new API key for the authenticated user.
+func (h *Handler) Create(c *gin.Context) {
+	user, ok := authhandler.GetUserFromContext(c)
+	if !ok {
+		responses.HandleErrorWithStatus(c, http.StatusUnauthorized, nil, "user context missing")
+		return
+	}
+
+	var req createRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		responses.HandleErrorWithStatus(c, http.StatusBadRequest, err, "invalid request payload")
+		return
+	}
+
+	var ttl time.Duration
+	if req.ExpiresIn != nil {
+		ttl = *req.ExpiresIn
+	}
+
+	key, secret, err := h.service.CreateKey(c.Request.Context(), user, req.Name, ttl)
+	if err != nil {
+		h.logger.Error().Err(err).Msg("failed to create api key")
+		if err == apikey.ErrLimitExceeded {
+			responses.HandleErrorWithStatus(c, http.StatusBadRequest, err, "api key limit reached")
+			return
+		}
+		responses.HandleError(c, err, "failed to create api key")
+		return
+	}
+
+	c.JSON(http.StatusCreated, apiKeyResponse{
+		ID:        key.ID,
+		Name:      key.Name,
+		Prefix:    key.Prefix,
+		Suffix:    key.Suffix,
+		CreatedAt: key.CreatedAt,
+		ExpiresAt: key.ExpiresAt,
+		Status:    keyStatus(key),
+		Key:       secret,
+	})
+}
+
+// List returns API keys for the authenticated user.
+func (h *Handler) List(c *gin.Context) {
+	user, ok := authhandler.GetUserFromContext(c)
+	if !ok {
+		responses.HandleErrorWithStatus(c, http.StatusUnauthorized, nil, "user context missing")
+		return
+	}
+
+	items, err := h.service.ListKeys(c.Request.Context(), user.ID)
+	if err != nil {
+		h.logger.Error().Err(err).Msg("failed to list api keys")
+		responses.HandleError(c, err, "failed to list api keys")
+		return
+	}
+
+	resp := make([]apiKeyResponse, 0, len(items))
+	for _, item := range items {
+		resp = append(resp, apiKeyResponse{
+			ID:         item.ID,
+			Name:       item.Name,
+			Prefix:     item.Prefix,
+			Suffix:     item.Suffix,
+			CreatedAt:  item.CreatedAt,
+			ExpiresAt:  item.ExpiresAt,
+			RevokedAt:  item.RevokedAt,
+			LastUsedAt: item.LastUsedAt,
+			Status:     keyStatus(&item),
+		})
+	}
+
+	c.JSON(http.StatusOK, gin.H{"items": resp})
+}
+
+// Delete revokes the specified API key.
+func (h *Handler) Delete(c *gin.Context) {
+	user, ok := authhandler.GetUserFromContext(c)
+	if !ok {
+		responses.HandleErrorWithStatus(c, http.StatusUnauthorized, nil, "user context missing")
+		return
+	}
+
+	keyID := c.Param("id")
+	if keyID == "" || keyID == "null" {
+		responses.HandleErrorWithStatus(c, http.StatusBadRequest, nil, "api key id required and must be valid UUID")
+		return
+	}
+
+	if err := h.service.RevokeKey(c.Request.Context(), user, keyID); err != nil {
+		if errors.Is(err, apikey.ErrNotFound) {
+			responses.HandleErrorWithStatus(c, http.StatusNotFound, err, "api key not found")
+			return
+		}
+		h.logger.Error().Err(err).Str("key_id", keyID).Msg("failed to revoke api key")
+		responses.HandleError(c, err, "failed to revoke api key")
+		return
+	}
+
+	c.Status(http.StatusNoContent)
+}
+
+// Validate validates an API key and returns user information (for Kong plugin)
+func (h *Handler) Validate(c *gin.Context) {
+	var req struct {
+		APIKey string `json:"api_key" binding:"required"`
+	}
+
+	if err := c.ShouldBindJSON(&req); err != nil {
+		responses.HandleErrorWithStatus(c, http.StatusBadRequest, err, "invalid request payload")
+		return
+	}
+
+	userInfo, err := h.service.ValidateAPIKey(c.Request.Context(), req.APIKey)
+	if err != nil {
+		h.logger.Debug().Err(err).Msg("api key validation failed")
+		responses.HandleErrorWithStatus(c, http.StatusUnauthorized, err, "invalid api key")
+		return
+	}
+
+	c.JSON(http.StatusOK, userInfo)
+}
+
+func keyStatus(key *apikey.APIKey) string {
+	now := time.Now()
+	switch {
+	case key.RevokedAt != nil:
+		return "revoked"
+	case now.After(key.ExpiresAt):
+		return "expired"
+	default:
+		return "active"
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/app_user.go b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/app_user.go
new file mode 100644
index 00000000..a3307364
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/app_user.go
@@ -0,0 +1,82 @@
+package authhandler
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/llm-api/internal/domain/user"
+	middleware "jan-server/services/llm-api/internal/interfaces/httpserver/middlewares"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+	"jan-server/services/llm-api/internal/utils/ptr"
+)
+
+// GetUserFromContext returns the ensured application user from the request context.
+func GetUserFromContext(c *gin.Context) (*user.User, bool) {
+	val, ok := c.Get(appUserContextKey)
+	if !ok || val == nil {
+		return nil, false
+	}
+	usr, ok := val.(*user.User)
+	return usr, ok && usr != nil
+}
+
+func (h *AuthHandler) ensureAppUser() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		if h.userService == nil {
+			c.Next()
+			return
+		}
+
+		if _, ok := GetUserFromContext(c); ok {
+			c.Next()
+			return
+		}
+
+		principal, ok := middleware.PrincipalFromContext(c)
+		if !ok {
+			responses.HandleNewError(c, platformerrors.ErrorTypeUnauthorized, "authentication required", "5e1d3524-929e-4c7a-9bb7-0a8b74fa6f10")
+			c.Abort()
+			return
+		}
+
+		issuer := principal.Issuer
+		if issuer == "" {
+			issuer = principal.Credentials["issuer"]
+		}
+
+		identity := user.Identity{
+			Provider: string(principal.AuthMethod),
+			Issuer:   issuer,
+			Subject:  principal.Subject,
+		}
+		if identity.Issuer == "" || identity.Subject == "" {
+			responses.HandleNewError(c, platformerrors.ErrorTypeUnauthorized, "invalid user identity", "a6c6d3d0-5ca3-4235-9d54-8c4af3b04d62")
+			c.Abort()
+			return
+		}
+
+		if principal.Username != "" {
+			identity.Username = ptr.ToString(principal.Username)
+		}
+		if principal.Email != "" {
+			identity.Email = ptr.ToString(principal.Email)
+		}
+		if principal.Name != "" {
+			identity.Name = ptr.ToString(principal.Name)
+		}
+		if picture := principal.Credentials["picture"]; picture != "" {
+			identity.Picture = ptr.ToString(picture)
+		}
+
+		usr, err := h.userService.EnsureUser(c.Request.Context(), identity)
+		if err != nil {
+			h.logger.Error().Err(err).Msg("failed to ensure user from principal")
+			responses.HandleNewError(c, platformerrors.ErrorTypeInternal, "unable to resolve user identity", "7f6b30e8-6dc0-4af9-b42f-6fd717fe5a0c")
+			c.Abort()
+			return
+		}
+
+		c.Set(appUserContextKey, usr)
+		c.Next()
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/auth.go b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/auth.go
new file mode 100644
index 00000000..0d96b0d4
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/auth.go
@@ -0,0 +1,28 @@
+// Package authhandler provides authentication handlers
+package authhandler
+
+import "github.com/gin-gonic/gin"
+
+// AuthMiddleware is a placeholder for authentication middleware
+type AuthMiddleware struct{}
+
+// NewAuthMiddleware creates a new auth middleware
+func NewAuthMiddleware() *AuthMiddleware {
+	return &AuthMiddleware{}
+}
+
+// RequireAuth returns a middleware that requires authentication
+func (a *AuthMiddleware) RequireAuth() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		// TODO: Implement actual auth check
+		c.Next()
+	}
+}
+
+// OptionalAuth returns a middleware that optionally checks authentication
+func (a *AuthMiddleware) OptionalAuth() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		// TODO: Implement optional auth check
+		c.Next()
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/auth_extras.go b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/auth_extras.go
new file mode 100644
index 00000000..10172691
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/auth_extras.go
@@ -0,0 +1,37 @@
+package authhandler
+
+import (
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/llm-api/internal/domain/user"
+)
+
+const appUserContextKey = "app_user"
+
+// AuthHandler coordinates per-request authentication helpers.
+type AuthHandler struct {
+	userService *user.Service
+	logger      zerolog.Logger
+}
+
+// NewAuthHandler creates a new auth handler.
+func NewAuthHandler(userService *user.Service, logger zerolog.Logger) *AuthHandler {
+	return &AuthHandler{
+		userService: userService,
+		logger:      logger,
+	}
+}
+
+// WithAppUserAuthChain ensures the authenticated app user exists before executing handlers.
+func (h *AuthHandler) WithAppUserAuthChain(handlers ...gin.HandlerFunc) []gin.HandlerFunc {
+	chain := []gin.HandlerFunc{h.ensureAppUser()}
+	return append(chain, handlers...)
+}
+
+// RequireAuth currently delegates to WithAppUserAuthChain for backwards compatibility.
+func (h *AuthHandler) RequireAuth() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		h.ensureAppUser()(c)
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/keycloak_oauth.go b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/keycloak_oauth.go
new file mode 100644
index 00000000..7d0e6909
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/keycloak_oauth.go
@@ -0,0 +1,505 @@
+package authhandler
+
+import (
+	"context"
+	"crypto/rand"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog/log"
+
+	"jan-server/services/llm-api/internal/config"
+)
+
+// AuthRequest stores PKCE parameters for an authorization request
+type AuthRequest struct {
+	State        string
+	CodeVerifier string
+	RedirectURL  string
+	CreatedAt    time.Time
+}
+
+// authRequestStore stores pending authorization requests with TTL cleanup
+var (
+	authRequests  = &sync.Map{}
+	authStoreOnce sync.Once
+)
+
+// startAuthRequestCleanup starts a goroutine to clean up expired auth requests
+func startAuthRequestCleanup() {
+	authStoreOnce.Do(func() {
+		go func() {
+			ticker := time.NewTicker(1 * time.Minute)
+			defer ticker.Stop()
+			for range ticker.C {
+				now := time.Now()
+				authRequests.Range(func(key, value interface{}) bool {
+					if req, ok := value.(*AuthRequest); ok {
+						if now.Sub(req.CreatedAt) > 10*time.Minute {
+							authRequests.Delete(key)
+						}
+					}
+					return true
+				})
+			}
+		}()
+	})
+}
+
+// generateCodeVerifier generates a cryptographically secure code verifier for PKCE
+func generateCodeVerifier() (string, error) {
+	b := make([]byte, 32)
+	if _, err := rand.Read(b); err != nil {
+		return "", err
+	}
+	return base64.RawURLEncoding.EncodeToString(b), nil
+}
+
+// generateCodeChallenge generates a code challenge from a verifier using SHA256
+func generateCodeChallenge(verifier string) string {
+	hash := sha256.Sum256([]byte(verifier))
+	return base64.RawURLEncoding.EncodeToString(hash[:])
+}
+
+// KeycloakOAuthHandler handles Keycloak OAuth2/OIDC flow
+type KeycloakOAuthHandler struct {
+	keycloakBaseURL   string // Server-to-server URL (e.g., http://keycloak:8085)
+	keycloakPublicURL string // Browser-accessible URL (e.g., http://localhost:8085)
+	realm             string
+	clientID          string
+	clientSecret      string
+	redirectURI       string
+}
+
+// NewKeycloakOAuthHandler creates a new Keycloak OAuth handler
+func NewKeycloakOAuthHandler(
+	keycloakBaseURL string,
+	keycloakPublicURL string,
+	realm string,
+	clientID string,
+	clientSecret string,
+	redirectURI string,
+) *KeycloakOAuthHandler {
+	// Start background cleanup of expired auth requests
+	startAuthRequestCleanup()
+
+	// Default publicURL to baseURL if not provided
+	if keycloakPublicURL == "" {
+		keycloakPublicURL = keycloakBaseURL
+	}
+
+	handler := &KeycloakOAuthHandler{
+		keycloakBaseURL:   strings.TrimSuffix(keycloakBaseURL, "/"),
+		keycloakPublicURL: strings.TrimSuffix(keycloakPublicURL, "/"),
+		realm:             realm,
+		clientID:          clientID,
+		clientSecret:      clientSecret,
+		redirectURI:       redirectURI,
+	}
+
+	return handler
+} // KeycloakLoginRequest represents the login request
+type KeycloakLoginRequest struct {
+	RedirectURL string `json:"redirect_url" form:"redirect_url"`
+}
+
+// KeycloakTokenResponse represents Keycloak token response
+type KeycloakTokenResponse struct {
+	AccessToken      string `json:"access_token"`
+	ExpiresIn        int    `json:"expires_in"`
+	RefreshExpiresIn int    `json:"refresh_expires_in"`
+	RefreshToken     string `json:"refresh_token"`
+	TokenType        string `json:"token_type"`
+	IDToken          string `json:"id_token,omitempty"`
+	NotBeforePolicy  int    `json:"not-before-policy,omitempty"`
+	SessionState     string `json:"session_state,omitempty"`
+	Scope            string `json:"scope,omitempty"`
+}
+
+// LoginResponse represents the login response
+type LoginResponse struct {
+	AccessToken  string `json:"access_token"`
+	RefreshToken string `json:"refresh_token"`
+	ExpiresIn    int    `json:"expires_in"`
+	TokenType    string `json:"token_type"`
+	IDToken      string `json:"id_token,omitempty"`
+}
+
+// generateState generates a random state parameter for CSRF protection
+func generateState() (string, error) {
+	b := make([]byte, 32)
+	if _, err := rand.Read(b); err != nil {
+		return "", err
+	}
+	return base64.URLEncoding.EncodeToString(b), nil
+}
+
+// InitiateLogin godoc
+// @Summary Initiate Keycloak OAuth2 login
+// @Description Redirects the user to Keycloak's authorization endpoint to authenticate. Returns the authorization URL for frontend redirection with PKCE.
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param redirect_url query string false "URL to redirect after successful login"
+// @Success 200 {object} object{authorization_url=string,state=string} "Authorization URL and state parameter"
+// @Failure 500 {object} object{error=string} "Failed to generate state or PKCE parameters"
+// @Router /auth/keycloak/login [get]
+func (h *KeycloakOAuthHandler) InitiateLogin(c *gin.Context) {
+	// Generate state for CSRF protection
+	state, err := generateState()
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": "Failed to generate state",
+		})
+		return
+	}
+
+	// Generate PKCE parameters
+	codeVerifier, err := generateCodeVerifier()
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{
+			"error": "Failed to generate code verifier",
+		})
+		return
+	}
+
+	codeChallenge := generateCodeChallenge(codeVerifier)
+
+	// Get redirect URL from query parameter
+	redirectURL := c.Query("redirect_url")
+	if redirectURL == "" {
+		// Default redirect URL if not provided
+		redirectURL = "https://chat-dev.jan.ai/auth/keycloak/callback"
+	}
+
+	// Store state, code_verifier, and redirect_url for later validation in callback
+	authRequests.Store(state, &AuthRequest{
+		State:        state,
+		CodeVerifier: codeVerifier,
+		RedirectURL:  redirectURL,
+		CreatedAt:    time.Now(),
+	})
+
+	// Build authorization URL with PKCE using public URL (browser-accessible)
+	authURL := fmt.Sprintf("%s/realms/%s/protocol/openid-connect/auth",
+		h.keycloakPublicURL, h.realm)
+
+	params := url.Values{}
+	params.Add("client_id", h.clientID)
+	params.Add("redirect_uri", h.redirectURI)
+	params.Add("response_type", "code")
+	params.Add("scope", "openid profile email")
+	params.Add("state", state)
+	params.Add("code_challenge", codeChallenge)
+	params.Add("code_challenge_method", "S256")
+
+	fullAuthURL := fmt.Sprintf("%s?%s", authURL, params.Encode())
+
+	// Return the authorization URL for frontend to redirect
+	c.JSON(http.StatusOK, gin.H{
+		"authorization_url": fullAuthURL,
+		"state":             state,
+	})
+}
+
+// HandleCallback godoc
+// @Summary Handle Keycloak OAuth2 callback
+// @Description Handles the OAuth2 callback from Keycloak, exchanges authorization code for tokens using PKCE
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param code query string true "Authorization code from Keycloak"
+// @Param state query string true "State parameter for CSRF protection"
+// @Param redirect_url query string false "Frontend URL to redirect after successful authentication"
+// @Param error query string false "Error from Keycloak (if authentication failed)"
+// @Param error_description query string false "Error description from Keycloak"
+// @Success 302 "Redirects to frontend URL with tokens in URL fragment"
+// @Failure 400 {object} object{error=string} "Missing code or state, or Keycloak error"
+// @Failure 401 {object} object{error=string} "Invalid state parameter"
+// @Failure 500 {object} object{error=string} "Failed to exchange code for tokens"
+// @Router /auth/keycloak/callback [get]
+func (h *KeycloakOAuthHandler) HandleCallback(c *gin.Context) {
+	log.Debug().
+		Str("path", c.Request.URL.Path).
+		Str("query", c.Request.URL.RawQuery).
+		Msg("OAuth callback received")
+
+	// Check for errors from Keycloak first
+	keycloakError := c.Query("error")
+	if keycloakError != "" {
+		errorDescription := c.Query("error_description")
+		log.Error().
+			Str("error", keycloakError).
+			Str("error_description", errorDescription).
+			Msg("Keycloak returned error in callback")
+
+		// Redirect to homepage with error in URL
+		homeURL := "https://chat-dev.jan.ai/"
+		redirectURL := fmt.Sprintf("%s?error=%s&error_description=%s",
+			homeURL,
+			url.QueryEscape(keycloakError),
+			url.QueryEscape(errorDescription))
+		c.Redirect(http.StatusFound, redirectURL)
+		return
+	}
+
+	code := c.Query("code")
+	state := c.Query("state")
+
+	log.Debug().
+		Bool("has_code", code != "").
+		Bool("has_state", state != "").
+		Str("session_state", c.Query("session_state")).
+		Str("iss", c.Query("iss")).
+		Msg("OAuth callback parameters")
+
+	if code == "" || state == "" {
+		log.Error().Msg("Missing code or state parameter in callback")
+
+		// Redirect to homepage with error in URL
+		homeURL := "https://chat-dev.jan.ai/"
+		redirectURL := fmt.Sprintf("%s?error=%s&error_description=%s",
+			homeURL,
+			url.QueryEscape("invalid_request"),
+			url.QueryEscape("Missing code or state parameter"))
+		c.Redirect(http.StatusFound, redirectURL)
+		return
+	}
+
+	// Validate state and retrieve code_verifier from storage
+	authRequestVal, ok := authRequests.Load(state)
+	if !ok {
+		log.Error().
+			Str("state", state).
+			Msg("Invalid state parameter - not found or expired")
+
+		// Redirect to homepage with error in URL
+		homeURL := "https://chat-dev.jan.ai/"
+		redirectURL := fmt.Sprintf("%s?error=%s&error_description=%s",
+			homeURL,
+			url.QueryEscape("invalid_state"),
+			url.QueryEscape("State not found or expired"))
+		c.Redirect(http.StatusFound, redirectURL)
+		return
+	}
+
+	authRequest, ok := authRequestVal.(*AuthRequest)
+	if !ok {
+		log.Error().Msg("Invalid auth request data structure")
+
+		// Redirect to homepage with error in URL
+		homeURL := "https://chat-dev.jan.ai/"
+		redirectURL := fmt.Sprintf("%s?error=%s&error_description=%s",
+			homeURL,
+			url.QueryEscape("server_error"),
+			url.QueryEscape("Invalid auth request data"))
+		c.Redirect(http.StatusFound, redirectURL)
+		return
+	}
+
+	log.Debug().
+		Str("redirect_url", authRequest.RedirectURL).
+		Time("created_at", authRequest.CreatedAt).
+		Msg("Auth request validated successfully")
+
+	// Remove used state from storage
+	authRequests.Delete(state)
+
+	// Exchange code for tokens using PKCE
+	log.Debug().Msg("Exchanging authorization code for tokens")
+	tokenResp, err := h.exchangeCodeForTokens(code, authRequest.CodeVerifier)
+	if err != nil {
+		log.Error().
+			Err(err).
+			Msg("Failed to exchange code for tokens")
+
+		// Use stored redirect URL or fallback to homepage
+		homeURL := authRequest.RedirectURL
+		if homeURL == "" {
+			homeURL = "https://chat-dev.jan.ai/"
+		}
+		// Remove fragment from URL if exists
+		if parsedHome, parseErr := url.Parse(homeURL); parseErr == nil {
+			parsedHome.Fragment = ""
+			homeURL = parsedHome.String()
+		}
+		redirectURL := fmt.Sprintf("%s?error=%s&error_description=%s",
+			homeURL,
+			url.QueryEscape("token_exchange_failed"),
+			url.QueryEscape("Failed to exchange code for tokens"))
+		c.Redirect(http.StatusFound, redirectURL)
+		return
+	}
+
+	log.Debug().
+		Int("expires_in", tokenResp.ExpiresIn).
+		Str("token_type", tokenResp.TokenType).
+		Bool("has_id_token", tokenResp.IDToken != "").
+		Bool("has_refresh_token", tokenResp.RefreshToken != "").
+		Str("scope", tokenResp.Scope).
+		Msg("Token exchange successful")
+
+	log.Debug().
+		Int("expires_in", tokenResp.ExpiresIn).
+		Str("token_type", tokenResp.TokenType).
+		Bool("has_id_token", tokenResp.IDToken != "").
+		Bool("has_refresh_token", tokenResp.RefreshToken != "").
+		Str("scope", tokenResp.Scope).
+		Msg("Token exchange successful")
+
+	// Use the redirect URL that was stored during InitiateLogin
+	redirectURL := authRequest.RedirectURL
+	if redirectURL == "" {
+		redirectURL = "https://chat-dev.jan.ai/auth/keycloak/callback"
+	}
+
+	log.Debug().
+		Str("redirect_url", redirectURL).
+		Msg("Preparing redirect to frontend")
+
+	// Parse the redirect URL to append tokens in fragment
+	parsedURL, err := url.Parse(redirectURL)
+	if err != nil {
+		log.Error().
+			Err(err).
+			Str("redirect_url", redirectURL).
+			Msg("Invalid redirect URL")
+
+		// Fallback to homepage with error
+		homeURL := "https://chat-dev.jan.ai/"
+		fallbackURL := fmt.Sprintf("%s?error=%s&error_description=%s",
+			homeURL,
+			url.QueryEscape("invalid_redirect"),
+			url.QueryEscape("Invalid redirect URL"))
+		c.Redirect(http.StatusFound, fallbackURL)
+		return
+	}
+
+	// Build token fragment for frontend extraction
+	fragment := fmt.Sprintf("access_token=%s&refresh_token=%s&expires_in=%d&token_type=%s",
+		url.QueryEscape(tokenResp.AccessToken),
+		url.QueryEscape(tokenResp.RefreshToken),
+		tokenResp.ExpiresIn,
+		url.QueryEscape(tokenResp.TokenType),
+	)
+
+	// client don't need id_token, so we comment it out as kong rejects long redirect URLs
+	// if tokenResp.IDToken != "" {
+	// 	fragment += fmt.Sprintf("&id_token=%s", url.QueryEscape(tokenResp.IDToken))
+	// }
+
+	parsedURL.Fragment = fragment
+
+	log.Debug().
+		Str("final_redirect_url", parsedURL.String()).
+		Msg("Redirecting to frontend with tokens")
+
+	// Redirect to frontend with tokens in URL fragment
+	c.Redirect(http.StatusFound, parsedURL.String())
+}
+
+// exchangeCodeForTokens exchanges authorization code for access and refresh tokens using PKCE
+func (h *KeycloakOAuthHandler) exchangeCodeForTokens(code string, codeVerifier string) (*KeycloakTokenResponse, error) {
+	tokenURL := fmt.Sprintf("%s/realms/%s/protocol/openid-connect/token",
+		h.keycloakBaseURL, h.realm)
+
+	log.Debug().
+		Str("token_url", tokenURL).
+		Str("client_id", h.clientID).
+		Bool("has_client_secret", h.clientSecret != "").
+		Str("redirect_uri", h.redirectURI).
+		Msg("Preparing token exchange request")
+
+	data := url.Values{}
+	data.Set("grant_type", "authorization_code")
+	data.Set("code", code)
+	data.Set("redirect_uri", h.redirectURI)
+	data.Set("client_id", h.clientID)
+	data.Set("code_verifier", codeVerifier) // PKCE parameter
+	if h.clientSecret != "" {
+		data.Set("client_secret", h.clientSecret)
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, "POST", tokenURL, strings.NewReader(data.Encode()))
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to create token exchange request")
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		log.Error().Err(err).Msg("Token exchange HTTP request failed")
+		return nil, fmt.Errorf("failed to exchange code: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to read token response body")
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	log.Debug().
+		Int("status_code", resp.StatusCode).
+		Str("content_type", resp.Header.Get("Content-Type")).
+		Int("body_length", len(body)).
+		Msg("Token exchange response received")
+
+	if resp.StatusCode != http.StatusOK {
+		log.Error().
+			Int("status_code", resp.StatusCode).
+			Str("response_body", string(body)).
+			Msg("Token exchange failed")
+		return nil, fmt.Errorf("token exchange failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	var tokenResp KeycloakTokenResponse
+	if err := json.Unmarshal(body, &tokenResp); err != nil {
+		log.Error().
+			Err(err).
+			Str("response_body", string(body)).
+			Msg("Failed to parse token response JSON")
+		return nil, fmt.Errorf("failed to parse token response: %w", err)
+	}
+
+	log.Debug().
+		Str("token_type", tokenResp.TokenType).
+		Int("expires_in", tokenResp.ExpiresIn).
+		Bool("has_refresh_token", tokenResp.RefreshToken != "").
+		Bool("has_id_token", tokenResp.IDToken != "").
+		Str("scope", tokenResp.Scope).
+		Msg("Token response parsed successfully")
+
+	return &tokenResp, nil
+}
+
+// ProvideKeycloakOAuthHandler provides a KeycloakOAuthHandler for dependency injection
+func ProvideKeycloakOAuthHandler(cfg *config.Config) *KeycloakOAuthHandler {
+	clientSecret := ""
+	if cfg.Client == cfg.BackendClientID {
+		clientSecret = cfg.BackendClientSecret
+	}
+	return NewKeycloakOAuthHandler(
+		cfg.KeycloakBaseURL,
+		cfg.KeycloakPublicURL,
+		cfg.KeycloakRealm,
+		cfg.Client,
+		clientSecret,
+		cfg.OAuthRedirectURI,
+	)
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/keycloak_refresh.go b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/keycloak_refresh.go
new file mode 100644
index 00000000..b8a7b8f8
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/keycloak_refresh.go
@@ -0,0 +1,207 @@
+package authhandler
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+)
+
+// RefreshTokenRequest represents the request body for token refresh
+type RefreshTokenRequest struct {
+	RefreshToken string `json:"refresh_token" binding:"required"`
+}
+
+// RefreshTokenResponse represents the response after refreshing tokens
+type RefreshTokenResponse struct {
+	AccessToken  string `json:"access_token"`
+	RefreshToken string `json:"refresh_token"`
+	ExpiresIn    int    `json:"expires_in"`
+	TokenType    string `json:"token_type"`
+}
+
+// RefreshKeycloakToken refreshes an access token using a refresh token
+func (h *KeycloakOAuthHandler) RefreshKeycloakToken(c *gin.Context) {
+	var req RefreshTokenRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
+		return
+	}
+
+	// Prepare token refresh request
+	tokenURL := fmt.Sprintf("%s/realms/%s/protocol/openid-connect/token", h.keycloakBaseURL, h.realm)
+
+	data := url.Values{}
+	data.Set("grant_type", "refresh_token")
+	data.Set("client_id", h.clientID)
+	data.Set("refresh_token", req.RefreshToken)
+
+	// Add client secret if available (for confidential clients)
+	if h.clientSecret != "" {
+		data.Set("client_secret", h.clientSecret)
+	}
+
+	// Make HTTP request to Keycloak
+	httpReq, err := http.NewRequest("POST", tokenURL, strings.NewReader(data.Encode()))
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create refresh request"})
+		return
+	}
+
+	httpReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	client := &http.Client{}
+	resp, err := client.Do(httpReq)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to refresh token", "details": err.Error()})
+		return
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read response"})
+		return
+	}
+
+	// Check for errors from Keycloak
+	if resp.StatusCode != http.StatusOK {
+		c.JSON(resp.StatusCode, gin.H{
+			"error":   "Token refresh failed",
+			"details": string(body),
+		})
+		return
+	}
+
+	// Parse token response
+	var tokenResp RefreshTokenResponse
+	if err := json.Unmarshal(body, &tokenResp); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to parse token response"})
+		return
+	}
+
+	// Return new tokens
+	c.JSON(http.StatusOK, tokenResp)
+}
+
+// ValidateAccessToken validates an access token against Keycloak's userinfo endpoint
+func (h *KeycloakOAuthHandler) ValidateAccessToken(c *gin.Context) {
+	// Get token from Authorization header
+	authHeader := c.GetHeader("Authorization")
+	if authHeader == "" {
+		c.JSON(http.StatusUnauthorized, gin.H{"error": "Missing Authorization header"})
+		return
+	}
+
+	// Extract Bearer token
+	token := strings.TrimPrefix(authHeader, "Bearer ")
+	if token == authHeader {
+		c.JSON(http.StatusUnauthorized, gin.H{"error": "Invalid Authorization header format"})
+		return
+	}
+
+	// Call Keycloak's userinfo endpoint to validate token
+	userinfoURL := fmt.Sprintf("%s/realms/%s/protocol/openid-connect/userinfo", h.keycloakBaseURL, h.realm)
+
+	httpReq, err := http.NewRequest("GET", userinfoURL, nil)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create validation request"})
+		return
+	}
+
+	httpReq.Header.Set("Authorization", "Bearer "+token)
+
+	client := &http.Client{}
+	resp, err := client.Do(httpReq)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to validate token", "details": err.Error()})
+		return
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read response"})
+		return
+	}
+
+	// Check validation result
+	if resp.StatusCode != http.StatusOK {
+		c.JSON(http.StatusUnauthorized, gin.H{
+			"error":   "Token validation failed",
+			"details": string(body),
+		})
+		return
+	}
+
+	// Parse userinfo
+	var userInfo map[string]interface{}
+	if err := json.Unmarshal(body, &userInfo); err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to parse user info"})
+		return
+	}
+
+	// Return validation success with user info
+	c.JSON(http.StatusOK, gin.H{
+		"valid":     true,
+		"user_info": userInfo,
+	})
+}
+
+// RevokeKeycloakToken revokes a refresh token
+func (h *KeycloakOAuthHandler) RevokeKeycloakToken(c *gin.Context) {
+	var req RefreshTokenRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body", "details": err.Error()})
+		return
+	}
+
+	// Prepare token revocation request
+	revokeURL := fmt.Sprintf("%s/realms/%s/protocol/openid-connect/revoke", h.keycloakBaseURL, h.realm)
+
+	data := url.Values{}
+	data.Set("client_id", h.clientID)
+	data.Set("token", req.RefreshToken)
+	data.Set("token_type_hint", "refresh_token")
+
+	// Add client secret if available
+	if h.clientSecret != "" {
+		data.Set("client_secret", h.clientSecret)
+	}
+
+	// Make HTTP request to Keycloak
+	httpReq, err := http.NewRequest("POST", revokeURL, strings.NewReader(data.Encode()))
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to create revocation request"})
+		return
+	}
+
+	httpReq.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+
+	client := &http.Client{}
+	resp, err := client.Do(httpReq)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to revoke token", "details": err.Error()})
+		return
+	}
+	defer resp.Body.Close()
+
+	// Check revocation result
+	if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent {
+		body, _ := io.ReadAll(resp.Body)
+		c.JSON(resp.StatusCode, gin.H{
+			"error":   "Token revocation failed",
+			"details": string(body),
+		})
+		return
+	}
+
+	// Return success
+	c.JSON(http.StatusOK, gin.H{
+		"message": "Token revoked successfully",
+	})
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/token_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/token_handler.go
new file mode 100644
index 00000000..2d3dfeed
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/authhandler/token_handler.go
@@ -0,0 +1,263 @@
+package authhandler
+
+import (
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/llm-api/internal/infrastructure/keycloak"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/middlewares"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+)
+
+const (
+	// RefreshTokenCookieName is the name of the cookie that stores the refresh token
+	RefreshTokenCookieName = "refresh_token"
+)
+
+// TokenHandler handles token-related operations (logout, refresh, etc.)
+type TokenHandler struct {
+	kc     *keycloak.Client
+	logger zerolog.Logger
+}
+
+// NewTokenHandler creates a new token handler
+func NewTokenHandler(kc *keycloak.Client, logger zerolog.Logger) *TokenHandler {
+	return &TokenHandler{
+		kc:     kc,
+		logger: logger,
+	}
+}
+
+type AccessTokenResponse struct {
+	AccessToken  string    `json:"access_token"`
+	RefreshToken string    `json:"refresh_token"`
+	TokenType    string    `json:"token_type"`
+	ExpiresIn    int       `json:"expires_in"`
+	ExpiresAt    time.Time `json:"expires_at"`
+}
+
+type GetMeResponse struct {
+	ID         string `json:"id"`
+	Username   string `json:"username,omitempty"`
+	Email      string `json:"email,omitempty"`
+	Subject    string `json:"subject"`
+	AuthMethod string `json:"auth_method"`
+	Name       string `json:"name,omitempty"`
+}
+
+// Logout removes authentication tokens
+// @Summary Logout
+// @Description Remove refresh tokens to perform logout and invalidate Keycloak session. Accepts refresh token from cookie, Authorization header, or request body.
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param refresh_token body string false "Refresh token to revoke"
+// @Param Authorization header string false "Bearer refresh_token"
+// @Success 200 {object} map[string]string "Successfully logged out"
+// @Failure 400 {object} responses.ErrorResponse "Bad Request"
+// @Router /v1/auth/logout [get]
+// @Router /v1/auth/logout [post]
+func (h *TokenHandler) Logout(c *gin.Context) {
+	h.logger.Info().Str("method", c.Request.Method).Str("path", c.Request.URL.Path).Msg("[LOGOUT] Processing logout request")
+
+	// Log all cookies for debugging
+	allCookies := []string{}
+	for _, cookie := range c.Request.Cookies() {
+		allCookies = append(allCookies, cookie.Name)
+	}
+	h.logger.Info().
+		Strs("available_cookies", allCookies).
+		Int("cookie_count", len(allCookies)).
+		Msg("[LOGOUT] Received cookies")
+
+	var refreshToken string
+
+	// Try to get refresh token from multiple sources in order of priority:
+	// 1. From refresh_token cookie (standard flow)
+	refreshTokenCookie, err := c.Cookie(RefreshTokenCookieName)
+	if err == nil && refreshTokenCookie != "" {
+		refreshToken = refreshTokenCookie
+		h.logger.Info().
+			Str("source", "cookie").
+			Str("token_preview", refreshToken[:30]+"...").
+			Msg("[LOGOUT] Found refresh token in cookie")
+	} else {
+		h.logger.Debug().
+			Bool("cookie_exists", err == nil).
+			Msg("[LOGOUT] No refresh token in cookie")
+	}
+
+	// 2. From request body (JSON) - Higher priority than header because body is explicit
+	if refreshToken == "" {
+		var body struct {
+			RefreshToken string `json:"refresh_token"`
+		}
+		if err := c.ShouldBindJSON(&body); err == nil && body.RefreshToken != "" {
+			refreshToken = body.RefreshToken
+			h.logger.Info().
+				Str("source", "body").
+				Str("token_preview", refreshToken[:30]+"...").
+				Int("token_length", len(refreshToken)).
+				Msg("[LOGOUT] Found refresh token in request body")
+		} else {
+			h.logger.Debug().
+				Bool("bind_success", err == nil).
+				Str("bind_error", fmt.Sprintf("%v", err)).
+				Msg("[LOGOUT] No refresh token in request body")
+		}
+	}
+
+	// 3. From Authorization header (Bearer token) - Lower priority, may contain access token
+	// NOTE: Authorization header often contains ACCESS TOKEN, not REFRESH TOKEN
+	// Only use this as last resort
+	if refreshToken == "" {
+		authHeader := c.GetHeader("Authorization")
+		h.logger.Debug().
+			Str("auth_header_preview", func() string {
+				if len(authHeader) > 40 {
+					return authHeader[:40] + "..."
+				}
+				return authHeader
+			}()).
+			Bool("has_bearer", len(authHeader) > 7 && authHeader[:7] == "Bearer ").
+			Msg("[LOGOUT] Checking Authorization header")
+
+		if authHeader != "" && len(authHeader) > 7 && authHeader[:7] == "Bearer " {
+			refreshToken = authHeader[7:]
+			h.logger.Warn().
+				Str("source", "header").
+				Str("token_preview", refreshToken[:30]+"...").
+				Int("token_length", len(refreshToken)).
+				Msg("[LOGOUT] Using token from Authorization header (may be access token, not refresh token)")
+		} else {
+			h.logger.Debug().
+				Bool("header_exists", authHeader != "").
+				Msg("[LOGOUT] No valid Bearer token in Authorization header")
+		}
+	}
+
+	// Call Keycloak logout endpoint to invalidate the session
+	if refreshToken != "" {
+		h.logger.Info().
+			Str("token_length", fmt.Sprintf("%d", len(refreshToken))).
+			Msg("[LOGOUT] Calling Keycloak logout endpoint")
+
+		ctx := c.Request.Context()
+		logoutErr := h.kc.LogoutUser(ctx, refreshToken)
+		if logoutErr != nil {
+			h.logger.Error().
+				Err(logoutErr).
+				Str("error_type", fmt.Sprintf("%T", logoutErr)).
+				Msg("[LOGOUT] Failed to logout from Keycloak, but continuing with local logout")
+		} else {
+			h.logger.Info().Msg("[LOGOUT] Successfully logged out from Keycloak")
+		}
+	} else {
+		h.logger.Warn().Msg("[LOGOUT] No refresh token found, skipping Keycloak logout")
+	}
+
+	// Clear the refresh token cookie locally
+	http.SetCookie(c.Writer, responses.NewCookieWithSecurity(
+		RefreshTokenCookieName,
+		"",
+		time.Unix(0, 0), // Set expiration to past time to delete cookie
+	))
+
+	c.JSON(http.StatusOK, gin.H{"status": "logged out"})
+}
+
+// GetMe returns the current authenticated user's information
+// @Summary Get user profile
+// @Description Retrieves the profile of the authenticated user
+// @Tags Authentication API
+// @Security BearerAuth
+// @Produce json
+// @Success 200 {object} GetMeResponse "Successfully retrieved user profile"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
+// @Router /v1/auth/me [get]
+func (h *TokenHandler) GetMe(c *gin.Context) {
+	principal, ok := middlewares.PrincipalFromContext(c)
+	if !ok {
+		responses.HandleErrorWithStatus(c, http.StatusUnauthorized, nil, "principal missing")
+		return
+	}
+
+	// Use username as name if name is empty
+	name := principal.Name
+	if name == "" {
+		name = principal.Username
+	}
+
+	c.JSON(http.StatusOK, GetMeResponse{
+		ID:         principal.ID,
+		Username:   principal.Username,
+		Name:       name,
+		Email:      principal.Email,
+		Subject:    principal.Subject,
+		AuthMethod: string(principal.AuthMethod),
+	})
+}
+
+// RefreshToken exchanges a refresh token for a new access token
+// @Summary Refresh an access token
+// @Description Use a valid refresh token to obtain a new access token
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param refresh_token body string false "Refresh token (can also be in Authorization header)"
+// @Success 200 {object} AccessTokenResponse "Successfully refreshed the access token"
+// @Failure 400 {object} responses.ErrorResponse "Bad Request"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized"
+// @Router /v1/auth/refresh-token [post]
+func (h *TokenHandler) RefreshToken(c *gin.Context) {
+	var payload struct {
+		RefreshToken string `json:"refresh_token"`
+	}
+
+	// Try to get refresh token from request body
+	if err := c.ShouldBindJSON(&payload); err != nil {
+		// If not in body, try to get from cookie
+		if cookie, err := c.Cookie(RefreshTokenCookieName); err == nil && cookie != "" {
+			payload.RefreshToken = cookie
+		} else {
+			// If not in cookie, try Authorization header
+			authHeader := c.GetHeader("Authorization")
+			if authHeader == "" {
+				responses.HandleErrorWithStatus(c, http.StatusBadRequest, err, "refresh_token required")
+				return
+			}
+			// Remove "Bearer " prefix if present
+			if len(authHeader) > 7 && authHeader[:7] == "Bearer " {
+				payload.RefreshToken = authHeader[7:]
+			} else {
+				payload.RefreshToken = authHeader
+			}
+		}
+	}
+
+	if payload.RefreshToken == "" {
+		responses.HandleErrorWithStatus(c, http.StatusBadRequest, nil, "refresh_token required")
+		return
+	}
+
+	// Use Keycloak to refresh the token
+	tokens, err := h.kc.RefreshToken(c.Request.Context(), payload.RefreshToken)
+	if err != nil {
+		h.logger.Error().Err(err).Msg("failed to refresh token")
+		responses.HandleErrorWithStatus(c, http.StatusUnauthorized, err, "failed to refresh token")
+		return
+	}
+
+	// Return tokens in JSON response (token-based authentication, not cookies)
+	c.JSON(http.StatusOK, AccessTokenResponse{
+		AccessToken:  tokens.AccessToken,
+		RefreshToken: tokens.RefreshToken,
+		TokenType:    tokens.TokenType,
+		ExpiresIn:    tokens.ExpiresIn,
+		ExpiresAt:    time.Now().Add(time.Duration(tokens.ExpiresIn) * time.Second),
+	})
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/chathandler/chat_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/chathandler/chat_handler.go
new file mode 100644
index 00000000..2672cd76
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/chathandler/chat_handler.go
@@ -0,0 +1,1218 @@
+package chathandler
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/gin-gonic/gin"
+	openai "github.com/sashabaranov/go-openai"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/codes"
+
+	"jan-server/services/llm-api/internal/domain/conversation"
+	"jan-server/services/llm-api/internal/domain/project"
+	"jan-server/services/llm-api/internal/domain/prompt"
+	"jan-server/services/llm-api/internal/domain/usersettings"
+	"jan-server/services/llm-api/internal/infrastructure/inference"
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+	"jan-server/services/llm-api/internal/infrastructure/mediaresolver"
+	memclient "jan-server/services/llm-api/internal/infrastructure/memory"
+	"jan-server/services/llm-api/internal/infrastructure/observability"
+	conversationHandler "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/conversationhandler"
+	modelHandler "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/middlewares"
+	chatrequests "jan-server/services/llm-api/internal/interfaces/httpserver/requests/chat"
+	"jan-server/services/llm-api/internal/utils/httpclients/chat"
+	"jan-server/services/llm-api/internal/utils/idgen"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+const ConversationReferrerContextKey = "conversation_referrer"
+
+// ChatCompletionResult wraps the response with conversation context
+type ChatCompletionResult struct {
+	Response          *openai.ChatCompletionResponse
+	ConversationID    string
+	ConversationTitle *string
+}
+
+// ChatHandler handles chat completion requests
+type ChatHandler struct {
+	inferenceProvider   *inference.InferenceProvider
+	providerHandler     *modelHandler.ProviderHandler
+	conversationHandler *conversationHandler.ConversationHandler
+	conversationService *conversation.ConversationService
+	projectService      *project.ProjectService
+	mediaResolver       mediaresolver.Resolver
+	promptProcessor     *prompt.ProcessorImpl
+	memoryHandler       *MemoryHandler
+	userSettingsService *usersettings.Service
+}
+
+// NewChatHandler creates a new chat handler
+func NewChatHandler(
+	inferenceProvider *inference.InferenceProvider,
+	providerHandler *modelHandler.ProviderHandler,
+	conversationHandler *conversationHandler.ConversationHandler,
+	conversationService *conversation.ConversationService,
+	projectService *project.ProjectService,
+	mediaResolver mediaresolver.Resolver,
+	promptProcessor *prompt.ProcessorImpl,
+	memoryHandler *MemoryHandler,
+	userSettingsService *usersettings.Service,
+) *ChatHandler {
+	return &ChatHandler{
+		inferenceProvider:   inferenceProvider,
+		providerHandler:     providerHandler,
+		conversationHandler: conversationHandler,
+		conversationService: conversationService,
+		projectService:      projectService,
+		mediaResolver:       mediaResolver,
+		promptProcessor:     promptProcessor,
+		memoryHandler:       memoryHandler,
+		userSettingsService: userSettingsService,
+	}
+}
+
+// CreateChatCompletion handles chat completion requests (both streaming and non-streaming)
+func (h *ChatHandler) CreateChatCompletion(
+	ctx context.Context,
+	reqCtx *gin.Context,
+	userID uint,
+	request chatrequests.ChatCompletionRequest,
+) (*ChatCompletionResult, error) {
+	// Start OpenTelemetry span for chat completion
+	ctx, span := observability.StartSpan(ctx, "llm-api", "ChatHandler.CreateChatCompletion")
+	defer span.End()
+
+	// Track request start time for duration metrics
+	startTime := time.Now()
+
+	// Add basic attributes
+	observability.AddSpanAttributes(ctx,
+		attribute.String("chat.model", request.Model),
+		attribute.Bool("chat.stream", request.Stream),
+		attribute.Int("chat.message_count", len(request.Messages)),
+		attribute.Int("user.id", int(userID)),
+	)
+
+	var conv *conversation.Conversation
+	var conversationID string
+	var projectInstruction string
+	var err error
+	newMessages := append([]openai.ChatCompletionMessage(nil), request.Messages...)
+
+	// Extract referrer from context or query parameters
+	referrer := strings.TrimSpace(reqCtx.GetString(ConversationReferrerContextKey))
+	if referrer == "" {
+		referrer = strings.TrimSpace(reqCtx.Param("referrer"))
+	}
+	if referrer == "" {
+		referrer = strings.TrimSpace(reqCtx.Query("referrer"))
+	}
+
+	// Check if conversation.id exists in request
+	if referrer != "" || (request.Conversation != nil && !request.Conversation.IsEmpty()) {
+		observability.AddSpanEvent(ctx, "conversation_context_detected")
+
+		// Get or create conversation with referrer (referrer can be empty)
+		conv, err = h.getOrCreateConversation(ctx, userID, request.Conversation, referrer)
+		if err != nil {
+			observability.RecordError(ctx, err)
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get or create conversation")
+		}
+
+		// Auto-generate title from first message if conversation was just created
+		conv = h.updateConversationTitleFromMessages(ctx, userID, conv, request.Messages)
+
+		// Prepend conversation items to messages
+		conversationID = conv.PublicID
+		observability.AddSpanAttributes(ctx,
+			attribute.String("conversation.id", conversationID),
+		)
+		request.Messages = h.prependConversationItems(conv, request.Messages)
+
+		// Load project instruction for this conversation (if any)
+		projectInstruction = h.getProjectInstruction(ctx, userID, conv)
+	}
+	// If no conversation.id exists, bypass as non-conversation completion
+
+	// Validate messages (after prepending conversation items)
+	if len(request.Messages) == 0 {
+		err := platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "messages cannot be empty", nil, "")
+		observability.RecordError(ctx, err)
+		return nil, err
+	}
+
+	// Load memory context (best-effort) when a conversation is present
+	loadedMemory := h.collectPromptMemory(conv, reqCtx)
+
+	// Load user settings once for prompt orchestration and memory (best-effort)
+	var userSettings *usersettings.UserSettings
+	if h.userSettingsService != nil {
+		userSettings, err = h.userSettingsService.GetOrCreateSettings(ctx, userID)
+		if err != nil {
+			log := logger.GetLogger()
+			log.Warn().Err(err).Uint("user_id", userID).Msg("failed to load user settings for prompt orchestration")
+			userSettings = nil
+		}
+	}
+
+	// Load memory using memory_handler (respects MEMORY_ENABLED and user settings)
+	// Memory injection is controlled by PROMPT_ORCHESTRATION_MEMORY in the prompt processor
+	if h.memoryHandler != nil && conversationID != "" {
+		memoryContext, memErr := h.memoryHandler.LoadMemoryContext(ctx, userID, conversationID, conv, newMessages, userSettings)
+		if memErr == nil && len(memoryContext) > 0 {
+			loadedMemory = append(loadedMemory, memoryContext...)
+		}
+	}
+
+	// Get provider based on the requested model
+	observability.AddSpanEvent(ctx, "selecting_provider")
+	selectedProviderModel, selectedProvider, err := h.providerHandler.SelectProviderModelForModelPublicID(ctx, request.Model)
+	if err != nil {
+		observability.RecordError(ctx, err)
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to select provider model")
+	}
+
+	if selectedProviderModel == nil {
+		err := platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeNotFound, fmt.Sprintf("model not found: %s", request.Model), nil, "")
+		observability.RecordError(ctx, err)
+		return nil, err
+	}
+
+	if selectedProvider == nil {
+		err := platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeNotFound, "provider not found", nil, "")
+		observability.RecordError(ctx, err)
+		return nil, err
+	}
+
+	// Add provider information to span
+	observability.AddSpanAttributes(ctx,
+		attribute.String("provider.display_name", selectedProvider.DisplayName),
+		attribute.String("provider.id", selectedProvider.PublicID),
+		attribute.String("provider.kind", string(selectedProvider.Kind)),
+		attribute.String("model.original_id", selectedProviderModel.ProviderOriginalModelID),
+	)
+
+	// Override the request model with the provider's original model ID
+	request.Model = selectedProviderModel.ProviderOriginalModelID
+
+	// Resolve jan_* media placeholders (best-effort)
+	request.Messages = h.resolveMediaPlaceholders(ctx, reqCtx, request.Messages)
+
+	// Ensure project instruction is the first system message when available
+	if projectInstruction != "" {
+		request.Messages = prompt.PrependProjectInstruction(request.Messages, projectInstruction)
+	}
+
+	// Apply prompt orchestration (if enabled)
+	if h.promptProcessor != nil {
+		observability.AddSpanEvent(ctx, "processing_prompts")
+
+		preferences := make(map[string]interface{})
+		if len(request.Tools) > 0 || request.ToolChoice != nil {
+			preferences["use_tools"] = true
+		}
+		if persona := strings.TrimSpace(reqCtx.GetHeader("X-Prompt-Persona")); persona != "" {
+			preferences["persona"] = persona
+		}
+		if persona := strings.TrimSpace(reqCtx.Query("persona")); persona != "" {
+			preferences["persona"] = persona
+		}
+
+		var profileSettings *usersettings.ProfileSettings
+		if userSettings != nil {
+			profileSettings = &userSettings.ProfileSettings
+		}
+
+		promptCtx := &prompt.Context{
+			UserID:             userID,
+			ConversationID:     conversationID,
+			Language:           strings.TrimSpace(reqCtx.GetHeader("Accept-Language")),
+			Preferences:        preferences,
+			Memory:             loadedMemory,
+			ProjectInstruction: projectInstruction,
+			Profile:            profileSettings,
+		}
+
+		processedMessages, processErr := h.promptProcessor.Process(ctx, promptCtx, request.Messages)
+		if processErr != nil {
+			// Log error but continue with original messages
+			log := logger.GetLogger()
+			log.Warn().
+				Err(processErr).
+				Str("conversation_id", conversationID).
+				Msg("failed to process prompts, using original messages")
+		} else {
+			request.Messages = processedMessages
+			if len(promptCtx.AppliedModules) > 0 {
+				reqCtx.Header("X-Applied-Prompt-Modules", strings.Join(promptCtx.AppliedModules, ","))
+			}
+			observability.AddSpanEvent(ctx, "prompts_processed")
+		}
+	}
+
+	// Get chat completion client
+	chatClient, err := h.inferenceProvider.GetChatCompletionClient(ctx, selectedProvider)
+	if err != nil {
+		observability.RecordError(ctx, err)
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to create chat client")
+	}
+
+	var response *openai.ChatCompletionResponse
+
+	// Debug: Log final messages sent to LLM
+	logInstance := logger.GetLogger()
+	if logInstance.Debug().Enabled() {
+		finalMessagesJSON, _ := json.Marshal(request.Messages)
+		logInstance.Debug().
+			RawJSON("final_messages_to_llm", finalMessagesJSON).
+			Str("model", request.Model).
+			Bool("stream", request.Stream).
+			Msg("sending request to LLM")
+	}
+
+	// Handle streaming vs non-streaming
+	observability.AddSpanEvent(ctx, "calling_llm")
+	llmStartTime := time.Now()
+	if request.Stream {
+		response, err = h.streamCompletion(ctx, reqCtx, chatClient, conv, request.ChatCompletionRequest)
+	} else {
+		response, err = h.callCompletion(ctx, chatClient, request.ChatCompletionRequest)
+	}
+	llmDuration := time.Since(llmStartTime)
+
+	// Debug: Log LLM response
+	if logInstance.Debug().Enabled() {
+		if err != nil {
+			logInstance.Debug().
+				Err(err).
+				Dur("llm_duration_ms", llmDuration).
+				Msg("LLM call failed")
+		} else if response != nil {
+			responseJSON, _ := json.Marshal(response)
+			logInstance.Debug().
+				RawJSON("llm_response", responseJSON).
+				Dur("llm_duration_ms", llmDuration).
+				Int("prompt_tokens", response.Usage.PromptTokens).
+				Int("completion_tokens", response.Usage.CompletionTokens).
+				Int("total_tokens", response.Usage.TotalTokens).
+				Msg("LLM call completed")
+		}
+	}
+
+	if err != nil {
+		observability.RecordError(ctx, err)
+		observability.AddSpanAttributes(ctx,
+			attribute.String("completion.status", "failed"),
+		)
+		return nil, err
+	}
+
+	// Add LLM response metrics
+	if response != nil && response.Usage.TotalTokens > 0 {
+		observability.AddSpanAttributes(ctx,
+			attribute.Int("completion.prompt_tokens", response.Usage.PromptTokens),
+			attribute.Int("completion.completion_tokens", response.Usage.CompletionTokens),
+			attribute.Int("completion.total_tokens", response.Usage.TotalTokens),
+			attribute.Float64("completion.llm_duration_ms", float64(llmDuration.Milliseconds())),
+			attribute.String("completion.status", "success"),
+		)
+		if len(response.Choices) > 0 {
+			observability.AddSpanAttributes(ctx,
+				attribute.String("completion.finish_reason", string(response.Choices[0].FinishReason)),
+			)
+		}
+	}
+
+	// Add request and response to conversation if conversation context was provided
+	storeConversation := true
+	if request.Store != nil {
+		storeConversation = *request.Store
+	}
+
+	if conv != nil && response != nil && storeConversation {
+		observability.AddSpanEvent(ctx, "storing_conversation")
+		var askItemID, completionItemID string
+		if id, genErr := idgen.GenerateSecureID("msg", 16); genErr == nil {
+			askItemID = id
+		} else {
+			log := logger.GetLogger()
+			log.Warn().
+				Err(genErr).
+				Str("conversation_id", conv.PublicID).
+				Msg("failed to generate ask item id")
+		}
+		if id, genErr := idgen.GenerateSecureID("msg", 16); genErr == nil {
+			completionItemID = id
+		} else {
+			log := logger.GetLogger()
+			log.Warn().
+				Err(genErr).
+				Str("conversation_id", conv.PublicID).
+				Msg("failed to generate completion item id")
+		}
+		storeReasoning := false
+		if request.StoreReasoning != nil {
+			storeReasoning = *request.StoreReasoning
+		}
+
+		if err := h.addCompletionToConversation(ctx, conv, newMessages, response, askItemID, completionItemID, storeReasoning); err != nil {
+			// Log error but don't fail the request
+			log := logger.GetLogger()
+			log.Warn().
+				Err(err).
+				Str("conversation_id", conv.PublicID).
+				Msg("failed to store completion in conversation")
+			observability.AddSpanEvent(ctx, "conversation_storage_failed",
+				attribute.String("error", err.Error()),
+			)
+		} else {
+			observability.AddSpanAttributes(ctx,
+				attribute.Bool("completion.stored", true),
+			)
+
+			// Observe conversation for memory extraction using memory_handler
+			if h.memoryHandler != nil && response != nil && len(response.Choices) > 0 {
+				finishReason := response.Choices[0].FinishReason
+				observability.AddSpanEvent(ctx, "observing_for_memory",
+					attribute.String("finish_reason", string(finishReason)),
+				)
+				go h.memoryHandler.ObserveConversation(conv, userID, newMessages, response, finishReason)
+			}
+		}
+	}
+
+	// Calculate total duration
+	totalDuration := time.Since(startTime)
+	observability.AddSpanAttributes(ctx,
+		attribute.Float64("completion.total_duration_ms", float64(totalDuration.Milliseconds())),
+	)
+
+	// Set span status to OK
+	observability.SetSpanStatus(ctx, codes.Ok, "chat completion successful")
+
+	// Prepare conversation title for response
+	var conversationTitle *string
+	if conv != nil && conv.Title != nil {
+		conversationTitle = conv.Title
+	}
+
+	return &ChatCompletionResult{
+		Response:          response,
+		ConversationID:    conversationID,
+		ConversationTitle: conversationTitle,
+	}, nil
+}
+
+// callCompletion handles non-streaming chat completion
+func (h *ChatHandler) callCompletion(
+	ctx context.Context,
+	chatClient *chat.ChatCompletionClient,
+	request openai.ChatCompletionRequest,
+) (*openai.ChatCompletionResponse, error) {
+	chatCompletion, err := chatClient.CreateChatCompletion(ctx, "", request)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "chat completion failed")
+	}
+
+	return chatCompletion, nil
+}
+
+// streamCompletion handles streaming chat completion
+func (h *ChatHandler) streamCompletion(
+	ctx context.Context,
+	reqCtx *gin.Context,
+	chatClient *chat.ChatCompletionClient,
+	conv *conversation.Conversation,
+	request openai.ChatCompletionRequest,
+) (*openai.ChatCompletionResponse, error) {
+	// Create callback to send conversation data before [DONE]
+	var beforeDoneCallback chat.BeforeDoneCallback
+	if conv != nil && conv.PublicID != "" {
+		beforeDoneCallback = func(reqCtx *gin.Context) error {
+			// Build conversation data with ID and title
+			conversationData := map[string]interface{}{
+				"id": conv.PublicID,
+			}
+
+			// Include title if available
+			if conv.Title != nil && *conv.Title != "" {
+				conversationData["title"] = *conv.Title
+			}
+
+			conversationChunk := map[string]interface{}{
+				"conversation": conversationData,
+				"created":      time.Now().Unix(),
+				"id":           "", // Empty for conversation-only chunk
+				"model":        request.Model,
+				"object":       "chat.completion.chunk",
+			}
+
+			chunkJSON, err := json.Marshal(conversationChunk)
+			if err != nil {
+				return err
+			}
+
+			// Write conversation context as an SSE event BEFORE [DONE]
+			return h.writeSSEData(reqCtx, string(chunkJSON))
+		}
+	}
+
+	// Stream completion response to context with callback
+	resp, err := chatClient.StreamChatCompletionToContextWithCallback(reqCtx, "", request, beforeDoneCallback)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "streaming completion failed")
+	}
+
+	return resp, nil
+}
+
+func (h *ChatHandler) resolveMediaPlaceholders(ctx context.Context, reqCtx *gin.Context, messages []openai.ChatCompletionMessage) []openai.ChatCompletionMessage {
+	log := logger.GetLogger()
+
+	if h.mediaResolver == nil || len(messages) == 0 {
+		return messages
+	}
+
+	// Debug: Log input messages
+	if log.Debug().Enabled() {
+		messagesJSON, _ := json.Marshal(messages)
+		log.Debug().RawJSON("input_messages", messagesJSON).Msg("resolveMediaPlaceholders: input")
+	}
+
+	if reqCtx != nil {
+		if authHeader := strings.TrimSpace(reqCtx.GetHeader("Authorization")); authHeader != "" {
+			ctx = mediaresolver.ContextWithAuthorization(ctx, authHeader)
+		}
+		if principal, ok := middlewares.PrincipalFromContext(reqCtx); ok {
+			ctx = mediaresolver.ContextWithPrincipal(ctx, principal)
+		}
+	}
+
+	resolved, changed, err := h.mediaResolver.ResolveMessages(ctx, messages)
+	if err != nil {
+		log.Warn().Err(err).Msg("media placeholder resolution failed")
+		return messages
+	}
+	if changed {
+		observability.AddSpanEvent(ctx, "media_placeholders_resolved")
+
+		// Debug: Log resolved messages
+		if log.Debug().Enabled() {
+			resolvedJSON, _ := json.Marshal(resolved)
+			log.Debug().RawJSON("resolved_messages", resolvedJSON).Msg("resolveMediaPlaceholders: output (changed)")
+		}
+
+		return resolved
+	}
+
+	// Debug: Log unchanged messages
+	if log.Debug().Enabled() {
+		log.Debug().Msg("resolveMediaPlaceholders: output (no change)")
+	}
+
+	return messages
+}
+
+// getProjectInstruction loads the project instruction for the conversation, falling back to the stored snapshot.
+func (h *ChatHandler) getProjectInstruction(ctx context.Context, userID uint, conv *conversation.Conversation) string {
+	if conv == nil || h.projectService == nil {
+		return ""
+	}
+	if ctx != nil && ctx.Err() != nil {
+		return ""
+	}
+
+	if conv.EffectiveInstructionSnapshot != nil {
+		if snapshot := strings.TrimSpace(*conv.EffectiveInstructionSnapshot); snapshot != "" {
+			return snapshot
+		}
+	}
+
+	if conv.ProjectPublicID == nil {
+		return ""
+	}
+
+	projectID := strings.TrimSpace(*conv.ProjectPublicID)
+	if projectID == "" {
+		return ""
+	}
+
+	proj, err := h.projectService.GetProjectByPublicIDAndUserID(ctx, projectID, userID)
+	if err != nil {
+		log := logger.GetLogger()
+		log.Warn().
+			Err(err).
+			Str("conversation_id", conv.PublicID).
+			Str("project_id", projectID).
+			Msg("failed to load project instruction")
+		return ""
+	}
+
+	if proj.Instruction == nil {
+		return ""
+	}
+
+	return strings.TrimSpace(*proj.Instruction)
+}
+
+// collectPromptMemory gathers memory hints from request headers, conversation metadata, or recent turns.
+func (h *ChatHandler) collectPromptMemory(conv *conversation.Conversation, reqCtx *gin.Context) []string {
+	memory := make([]string, 0)
+
+	if reqCtx != nil {
+		if headerMemory := strings.TrimSpace(reqCtx.GetHeader("X-Prompt-Memory")); headerMemory != "" {
+			for _, part := range strings.Split(headerMemory, ";") {
+				if trimmed := strings.TrimSpace(part); trimmed != "" {
+					memory = append(memory, trimmed)
+				}
+			}
+		}
+	}
+
+	if conv != nil {
+		if conv.Metadata != nil {
+			for key, val := range conv.Metadata {
+				if strings.HasPrefix(strings.ToLower(key), "memory") && strings.TrimSpace(val) != "" {
+					memory = append(memory, strings.TrimSpace(val))
+				}
+			}
+		}
+
+		if len(memory) == 0 {
+			memory = append(memory, h.recentConversationMemory(conv)...)
+		}
+	}
+
+	return memory
+}
+
+// recentConversationMemory builds lightweight context lines from the latest conversation turns.
+func (h *ChatHandler) recentConversationMemory(conv *conversation.Conversation) []string {
+	items := conv.GetActiveBranchItems()
+	if len(items) == 0 {
+		return nil
+	}
+
+	memories := make([]string, 0, 3)
+	collected := 0
+	for i := len(items) - 1; i >= 0 && collected < 3; i-- {
+		text := firstTextFromItem(items[i])
+		if text == "" {
+			continue
+		}
+		role := "user"
+		if items[i].Role != nil {
+			role = string(*items[i].Role)
+		}
+		memories = append(memories, fmt.Sprintf("Recent %s message: %s", role, text))
+		collected++
+	}
+
+	// Reverse to keep chronological order
+	for i, j := 0, len(memories)-1; i < j; i, j = i+1, j-1 {
+		memories[i], memories[j] = memories[j], memories[i]
+	}
+
+	return memories
+}
+
+func formatMemoryForPromptCtx(resp *memclient.LoadResponse) []string {
+	if resp == nil {
+		return nil
+	}
+	memory := make([]string, 0, len(resp.CoreMemory)+len(resp.SemanticMemory)+len(resp.EpisodicMemory))
+	for _, item := range resp.CoreMemory {
+		if strings.TrimSpace(item.Text) != "" {
+			memory = append(memory, fmt.Sprintf("User memory: %s", item.Text))
+		}
+	}
+	for _, fact := range resp.SemanticMemory {
+		if strings.TrimSpace(fact.Text) != "" {
+			if strings.TrimSpace(fact.Title) != "" {
+				memory = append(memory, fmt.Sprintf("Project fact - %s: %s", fact.Title, fact.Text))
+			} else {
+				memory = append(memory, fmt.Sprintf("Project fact: %s", fact.Text))
+			}
+		}
+	}
+	for _, event := range resp.EpisodicMemory {
+		if strings.TrimSpace(event.Text) != "" {
+			memory = append(memory, fmt.Sprintf("Recent event: %s", event.Text))
+		}
+	}
+	return memory
+}
+
+// formatAndFilterMemory formats memory response and filters based on user settings
+func (h *ChatHandler) formatAndFilterMemory(resp *memclient.LoadResponse, settings *usersettings.UserSettings) []string {
+	if resp == nil {
+		return nil
+	}
+
+	memory := make([]string, 0)
+
+	// Add core memory (user preferences) if enabled
+	if settings.MemoryConfig.InjectUserCore {
+		for _, item := range resp.CoreMemory {
+			if strings.TrimSpace(item.Text) != "" {
+				memory = append(memory, fmt.Sprintf("User memory: %s", item.Text))
+			}
+		}
+	}
+
+	// Add semantic memory (project facts) if enabled
+	if settings.MemoryConfig.InjectSemantic {
+		for _, fact := range resp.SemanticMemory {
+			if strings.TrimSpace(fact.Text) != "" {
+				if strings.TrimSpace(fact.Title) != "" {
+					memory = append(memory, fmt.Sprintf("Project fact - %s: %s", fact.Title, fact.Text))
+				} else {
+					memory = append(memory, fmt.Sprintf("Project fact: %s", fact.Text))
+				}
+			}
+		}
+	}
+
+	// Add episodic memory (conversation history) if enabled
+	if settings.MemoryConfig.InjectEpisodic {
+		for _, event := range resp.EpisodicMemory {
+			if strings.TrimSpace(event.Text) != "" {
+				memory = append(memory, fmt.Sprintf("Recent event: %s", event.Text))
+			}
+		}
+	}
+
+	return memory
+}
+
+func firstTextFromItem(item conversation.Item) string {
+	for _, content := range item.Content {
+		if content.Text != nil {
+			if trimmed := strings.TrimSpace(content.Text.Text); trimmed != "" {
+				return trimmed
+			}
+		}
+		if content.InputText != nil {
+			if trimmed := strings.TrimSpace(*content.InputText); trimmed != "" {
+				return trimmed
+			}
+		}
+		if content.OutputText != nil {
+			if trimmed := strings.TrimSpace(content.OutputText.Text); trimmed != "" {
+				return trimmed
+			}
+		}
+		if content.ReasoningContent != nil {
+			if trimmed := strings.TrimSpace(*content.ReasoningContent); trimmed != "" {
+				return trimmed
+			}
+		}
+	}
+	return ""
+}
+
+func (h *ChatHandler) createConversationWithReferrer(ctx context.Context, userID uint, referrer string) (*conversation.Conversation, error) {
+	cleaned := strings.TrimSpace(referrer)
+	if cleaned == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "referrer cannot be empty", nil, "")
+	}
+
+	referrerCopy := cleaned
+	input := conversation.CreateConversationInput{
+		UserID:   userID,
+		Referrer: &referrerCopy,
+	}
+
+	conv, err := h.conversationService.CreateConversationWithInput(ctx, input)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to create conversation")
+	}
+	return conv, nil
+}
+
+// generateTitleFromMessage generates a conversation title from the first user message
+func (h *ChatHandler) generateTitleFromMessage(messages []openai.ChatCompletionMessage) string {
+	// Find the first user message
+	for _, msg := range messages {
+		if msg.Role == "user" && msg.Content != "" {
+			// Extract first 60 characters for title
+			content := strings.TrimSpace(msg.Content)
+			if len(content) > 60 {
+				// Find a good breaking point (end of word)
+				truncated := content[:60]
+				if lastSpace := strings.LastIndex(truncated, " "); lastSpace > 30 {
+					content = content[:lastSpace] + "..."
+				} else {
+					content = truncated + "..."
+				}
+			}
+			return content
+		}
+	}
+	return "New Conversation"
+}
+
+// updateConversationTitleFromMessages updates conversation title if it's still default and returns the updated conversation
+func (h *ChatHandler) updateConversationTitleFromMessages(ctx context.Context, userID uint, conv *conversation.Conversation, messages []openai.ChatCompletionMessage) *conversation.Conversation {
+	if conv == nil {
+		return nil
+	}
+
+	// Only update if title is not set or is empty
+	if conv.Title == nil || *conv.Title == "" {
+		newTitle := h.generateTitleFromMessage(messages)
+		if newTitle != "" {
+			// Update the conversation title
+			titleCopy := newTitle
+			updateInput := conversation.UpdateConversationInput{
+				Title: &titleCopy,
+			}
+			updatedConv, err := h.conversationService.UpdateConversationWithInput(ctx, userID, conv.PublicID, updateInput)
+			if err != nil {
+				// Log but don't fail the request
+				log := logger.GetLogger()
+				log.Warn().
+					Err(err).
+					Str("conversation_id", conv.PublicID).
+					Msg("failed to update conversation title")
+				return conv
+			}
+			return updatedConv
+		}
+	}
+	return conv
+}
+
+// getOrCreateConversation retrieves an existing conversation or creates a new one with optional referrer
+func (h *ChatHandler) getOrCreateConversation(
+	ctx context.Context,
+	userID uint,
+	convRef *chatrequests.ConversationReference,
+	referrer string,
+) (*conversation.Conversation, error) {
+	// If a conversation ID was provided (either directly or from object), fetch it from the service
+	if convRef != nil && convRef.GetID() != "" {
+		conv, err := h.conversationService.GetConversationByPublicIDAndUserID(ctx, convRef.GetID(), userID)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get conversation")
+		}
+
+		// Return existing conversation with its original referrer
+		// Note: Referrer is immutable after creation - it represents the conversation's origin
+		return conv, nil
+	}
+
+	// If no ID was provided, create a new conversation
+	if referrer != "" {
+		return h.createConversationWithReferrer(ctx, userID, referrer)
+	}
+
+	// Create conversation without referrer
+	input := conversation.CreateConversationInput{
+		UserID: userID,
+	}
+	conv, err := h.conversationService.CreateConversationWithInput(ctx, input)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to create conversation")
+	}
+	return conv, nil
+}
+
+// prependConversationItems prepends conversation items to the request messages
+func (h *ChatHandler) prependConversationItems(
+	conv *conversation.Conversation,
+	messages []openai.ChatCompletionMessage,
+) []openai.ChatCompletionMessage {
+	if conv == nil {
+		return messages
+	}
+
+	// Get items from the active branch or main branch
+	var items []conversation.Item
+	if conv.Branches != nil && conv.ActiveBranch != "" {
+		items = conv.Branches[conv.ActiveBranch]
+	} else {
+		items = conv.Items
+	}
+
+	if len(items) == 0 {
+		return messages
+	}
+
+	// Convert conversation items to chat messages
+	conversationMessages := make([]openai.ChatCompletionMessage, 0, len(items))
+	for _, item := range items {
+		msg := h.itemToMessage(item)
+		if msg != nil {
+			conversationMessages = append(conversationMessages, *msg)
+		}
+	}
+
+	// Prepend conversation messages to request messages
+	return append(conversationMessages, messages...)
+}
+
+// itemToMessage converts a conversation item to a chat completion message
+func (h *ChatHandler) itemToMessage(item conversation.Item) *openai.ChatCompletionMessage {
+	// Skip items that aren't in completed status
+	if item.Status != nil && *item.Status != conversation.ItemStatusCompleted {
+		return nil
+	}
+
+	role := conversation.ItemRoleUser
+	if item.Role != nil {
+		role = *item.Role
+	}
+
+	msg := &openai.ChatCompletionMessage{
+		Role: h.itemRoleToOpenAI(role),
+	}
+
+	// Extract content from item - handle both text and multimodal content
+	if len(item.Content) > 0 {
+		hasMultiModal := false
+		var textParts []string
+		var multiContent []openai.ChatMessagePart
+
+		for _, content := range item.Content {
+			// Handle text content
+			if content.Text != nil && content.Text.Text != "" {
+				textParts = append(textParts, content.Text.Text)
+				multiContent = append(multiContent, openai.ChatMessagePart{
+					Type: openai.ChatMessagePartTypeText,
+					Text: content.Text.Text,
+				})
+			} else if content.InputText != nil {
+				textParts = append(textParts, *content.InputText)
+				multiContent = append(multiContent, openai.ChatMessagePart{
+					Type: openai.ChatMessagePartTypeText,
+					Text: *content.InputText,
+				})
+			} else if content.OutputText != nil {
+				textParts = append(textParts, content.OutputText.Text)
+				multiContent = append(multiContent, openai.ChatMessagePart{
+					Type: openai.ChatMessagePartTypeText,
+					Text: content.OutputText.Text,
+				})
+			}
+
+			// Handle image content
+			if content.Image != nil && content.Image.URL != "" {
+				hasMultiModal = true
+				imageURL := &openai.ChatMessageImageURL{
+					URL: content.Image.URL,
+				}
+				if content.Image.Detail != "" {
+					imageURL.Detail = openai.ImageURLDetail(content.Image.Detail)
+				}
+				multiContent = append(multiContent, openai.ChatMessagePart{
+					Type:     openai.ChatMessagePartTypeImageURL,
+					ImageURL: imageURL,
+				})
+			}
+		}
+
+		// Use multimodal format if there are images, otherwise use simple string content
+		if hasMultiModal && len(multiContent) > 0 {
+			msg.MultiContent = multiContent
+		} else if len(textParts) > 0 {
+			msg.Content = textParts[0] // OpenAI typically uses single string content for text-only
+		}
+	}
+
+	return msg
+}
+
+// itemRoleToOpenAI converts conversation item role to OpenAI chat message role
+func (h *ChatHandler) itemRoleToOpenAI(role conversation.ItemRole) string {
+	switch role {
+	case conversation.ItemRoleSystem, conversation.ItemRoleDeveloper:
+		return openai.ChatMessageRoleSystem
+	case conversation.ItemRoleUser:
+		return openai.ChatMessageRoleUser
+	case conversation.ItemRoleAssistant:
+		return openai.ChatMessageRoleAssistant
+	case conversation.ItemRoleTool:
+		return openai.ChatMessageRoleTool
+	default:
+		return openai.ChatMessageRoleUser // Default to user role
+	}
+}
+
+// addCompletionToConversation persists the latest input and assistant response to the conversation
+func (h *ChatHandler) addCompletionToConversation(
+	ctx context.Context,
+	conv *conversation.Conversation,
+	newMessages []openai.ChatCompletionMessage,
+	response *openai.ChatCompletionResponse,
+	askItemID string,
+	completionItemID string,
+	storeReasoning bool,
+) error {
+	if conv == nil || response == nil || len(response.Choices) == 0 {
+		return nil
+	}
+
+	items := make([]conversation.Item, 0, 2)
+
+	if item := h.buildInputConversationItem(newMessages, storeReasoning, askItemID); item != nil {
+		items = append(items, *item)
+	}
+
+	if item := h.buildAssistantConversationItem(response, storeReasoning, completionItemID); item != nil {
+		items = append(items, *item)
+	}
+
+	if len(items) == 0 {
+		return nil
+	}
+
+	if _, err := h.conversationService.AddItemsToConversation(ctx, conv, conversation.BranchMain, items); err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to add items to conversation")
+	}
+
+	return nil
+}
+
+func (h *ChatHandler) buildInputConversationItem(
+	messages []openai.ChatCompletionMessage,
+	storeReasoning bool,
+	publicID string,
+) *conversation.Item {
+	if len(messages) == 0 {
+		return nil
+	}
+
+	msg := messages[len(messages)-1]
+	item := h.messageToItem(msg)
+
+	if item.Role != nil && *item.Role == conversation.ItemRoleSystem {
+		return nil
+	}
+
+	item.Content = h.filterReasoningContent(item.Content, storeReasoning)
+	if len(item.Content) == 0 && msg.Content == "" && len(msg.MultiContent) == 0 && msg.FunctionCall == nil && len(msg.ToolCalls) == 0 {
+		return nil
+	}
+
+	if publicID != "" {
+		item.PublicID = publicID
+	}
+	item.CreatedAt = time.Now().UTC()
+	return &item
+}
+
+func (h *ChatHandler) buildAssistantConversationItem(
+	response *openai.ChatCompletionResponse,
+	storeReasoning bool,
+	publicID string,
+) *conversation.Item {
+	if response == nil || len(response.Choices) == 0 {
+		return nil
+	}
+
+	choice := response.Choices[0]
+	item := h.messageToItem(choice.Message)
+	item.Content = h.filterReasoningContent(item.Content, storeReasoning)
+
+	if finishReason := string(choice.FinishReason); finishReason != "" && len(item.Content) > 0 {
+		item.Content[0].FinishReason = &finishReason
+	}
+
+	if len(item.Content) == 0 && choice.Message.Content == "" && len(choice.Message.MultiContent) == 0 && choice.Message.FunctionCall == nil && len(choice.Message.ToolCalls) == 0 {
+		return nil
+	}
+
+	if publicID != "" {
+		item.PublicID = publicID
+	}
+	item.CreatedAt = time.Now().UTC()
+	return &item
+}
+
+func (h *ChatHandler) filterReasoningContent(contents []conversation.Content, storeReasoning bool) []conversation.Content {
+	if storeReasoning || len(contents) == 0 {
+		return contents
+	}
+
+	filtered := make([]conversation.Content, 0, len(contents))
+	for _, content := range contents {
+		if strings.EqualFold(content.Type, "reasoning_content") {
+			continue
+		}
+		filtered = append(filtered, content)
+	}
+	return filtered
+}
+
+// messageToItem converts a chat completion message to a conversation item
+func (h *ChatHandler) messageToItem(msg openai.ChatCompletionMessage) conversation.Item {
+	status := conversation.ItemStatusCompleted
+	role := h.openAIRoleToItem(msg.Role)
+
+	item := conversation.Item{
+		Type:   conversation.ItemTypeMessage,
+		Role:   &role,
+		Status: &status,
+	}
+
+	contents := make([]conversation.Content, 0, 4)
+
+	// Handle simple string content
+	if msg.Content != "" {
+		switch role {
+		case conversation.ItemRoleUser:
+			contents = append(contents, conversation.NewInputTextContent(msg.Content))
+		case conversation.ItemRoleTool:
+			toolContent := conversation.Content{
+				Type: "tool_result",
+				Text: &conversation.Text{
+					Text: msg.Content,
+				},
+			}
+			contents = append(contents, toolContent)
+		default:
+			contents = append(contents, conversation.NewTextContent(msg.Content))
+		}
+	}
+
+	// Handle multimodal content (text + images)
+	if len(msg.MultiContent) > 0 {
+		for _, part := range msg.MultiContent {
+			switch part.Type {
+			case openai.ChatMessagePartTypeText:
+				if part.Text != "" {
+					switch role {
+					case conversation.ItemRoleUser:
+						contents = append(contents, conversation.NewInputTextContent(part.Text))
+					case conversation.ItemRoleTool:
+						toolContent := conversation.Content{
+							Type: "tool_result",
+							Text: &conversation.Text{
+								Text: part.Text,
+							},
+						}
+						contents = append(contents, toolContent)
+					default:
+						contents = append(contents, conversation.NewTextContent(part.Text))
+					}
+				}
+			case openai.ChatMessagePartTypeImageURL:
+				if part.ImageURL != nil && part.ImageURL.URL != "" {
+					imageContent := conversation.NewImageContent(
+						part.ImageURL.URL,
+						"", // fileID - could be extracted from jan_* URLs if needed
+						string(part.ImageURL.Detail),
+					)
+					contents = append(contents, imageContent)
+				}
+			}
+		}
+	}
+
+	if msg.ReasoningContent != "" {
+		reasoning := msg.ReasoningContent
+		contents = append(contents, conversation.Content{
+			Type:             "reasoning_content",
+			ReasoningContent: &reasoning,
+		})
+	}
+
+	if msg.FunctionCall != nil {
+		functionCall := conversation.FunctionCall{
+			Name:      msg.FunctionCall.Name,
+			Arguments: msg.FunctionCall.Arguments,
+		}
+
+		contents = append(contents, conversation.Content{
+			Type:         "function_call",
+			FunctionCall: &functionCall,
+		})
+	}
+
+	if len(msg.ToolCalls) > 0 {
+		toolCalls := make([]conversation.ToolCall, 0, len(msg.ToolCalls))
+		for _, call := range msg.ToolCalls {
+			toolCall := conversation.ToolCall{
+				ID:   call.ID,
+				Type: string(call.Type),
+				Function: conversation.FunctionCall{
+					Name:      call.Function.Name,
+					Arguments: call.Function.Arguments,
+				},
+			}
+			toolCalls = append(toolCalls, toolCall)
+		}
+
+		contents = append(contents, conversation.Content{
+			Type:      "tool_calls",
+			ToolCalls: toolCalls,
+		})
+	}
+
+	if msg.ToolCallID != "" {
+		toolCallID := msg.ToolCallID
+		attached := false
+		for i := range contents {
+			if contents[i].ToolCallID == nil {
+				content := contents[i]
+				content.ToolCallID = &toolCallID
+				contents[i] = content
+				attached = true
+				break
+			}
+		}
+		if !attached {
+			contents = append(contents, conversation.Content{
+				Type:       "tool_reference",
+				ToolCallID: &toolCallID,
+			})
+		}
+	}
+
+	if len(contents) > 0 {
+		item.Content = contents
+	}
+
+	return item
+}
+
+// openAIRoleToItem converts OpenAI chat message role to conversation item role
+func (h *ChatHandler) openAIRoleToItem(role string) conversation.ItemRole {
+	switch role {
+	case openai.ChatMessageRoleSystem:
+		return conversation.ItemRoleSystem
+	case openai.ChatMessageRoleUser:
+		return conversation.ItemRoleUser
+	case openai.ChatMessageRoleAssistant:
+		return conversation.ItemRoleAssistant
+	case openai.ChatMessageRoleTool:
+		return conversation.ItemRoleTool
+	default:
+		return conversation.ItemRoleUnknown
+	}
+}
+
+// writeSSEData writes an SSE data event to the response
+func (h *ChatHandler) writeSSEData(reqCtx *gin.Context, data string) error {
+	_, err := reqCtx.Writer.Write([]byte("data: "))
+	if err != nil {
+		return err
+	}
+	_, err = reqCtx.Writer.Write([]byte(data))
+	if err != nil {
+		return err
+	}
+	_, err = reqCtx.Writer.Write([]byte("\n\n"))
+	if err != nil {
+		return err
+	}
+	reqCtx.Writer.Flush()
+	return nil
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/chathandler/memory_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/chathandler/memory_handler.go
new file mode 100644
index 00000000..a3685b57
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/chathandler/memory_handler.go
@@ -0,0 +1,292 @@
+package chathandler
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	openai "github.com/sashabaranov/go-openai"
+	"go.opentelemetry.io/otel/attribute"
+
+	"jan-server/services/llm-api/internal/domain/conversation"
+	"jan-server/services/llm-api/internal/domain/usersettings"
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+	memclient "jan-server/services/llm-api/internal/infrastructure/memory"
+	"jan-server/services/llm-api/internal/infrastructure/observability"
+)
+
+// MemoryHandler handles memory-related operations for chat conversations
+type MemoryHandler struct {
+	memoryClient        *memclient.Client
+	memoryEnabled       bool // Application-level config
+	userSettingsService *usersettings.Service
+}
+
+// NewMemoryHandler creates a new memory handler
+func NewMemoryHandler(
+	memoryClient *memclient.Client,
+	memoryEnabled bool,
+	userSettingsService *usersettings.Service,
+) *MemoryHandler {
+	return &MemoryHandler{
+		memoryClient:        memoryClient,
+		memoryEnabled:       memoryEnabled,
+		userSettingsService: userSettingsService,
+	}
+}
+
+// LoadMemoryContext loads memory for a conversation based on application config and user settings
+// Returns memory array for prompt context, respecting both MEMORY_ENABLED and user settings.
+// If settings are provided, they are reused; otherwise the handler fetches them.
+func (m *MemoryHandler) LoadMemoryContext(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+	conv *conversation.Conversation,
+	messages []openai.ChatCompletionMessage,
+	settings *usersettings.UserSettings,
+) ([]string, error) {
+	// Check application-level config first
+	if !m.memoryEnabled || m.memoryClient == nil || conversationID == "" {
+		return nil, nil
+	}
+
+	// Load user settings if not provided
+	if settings == nil {
+		var err error
+		settings, err = m.userSettingsService.GetOrCreateSettings(ctx, userID)
+		if err != nil {
+			log := logger.GetLogger()
+			log.Warn().Err(err).Uint("user_id", userID).Msg("failed to load user settings, memory disabled")
+			return nil, nil
+		}
+	}
+
+	// Check user-level memory enabled flag
+	if !settings.MemoryConfig.Enabled {
+		return nil, nil
+	}
+
+	observability.AddSpanEvent(ctx, "loading_memories")
+	observability.AddSpanAttributes(ctx,
+		attribute.Bool("memory.app_enabled", m.memoryEnabled),
+		attribute.Bool("memory.user_enabled", settings.MemoryConfig.Enabled),
+		attribute.Bool("memory.inject_user_core", settings.MemoryConfig.InjectUserCore),
+		attribute.Bool("memory.inject_semantic", settings.MemoryConfig.InjectSemantic),
+		attribute.Bool("memory.inject_episodic", settings.MemoryConfig.InjectEpisodic),
+		attribute.Int("memory.max_user_items", settings.MemoryConfig.MaxUserItems),
+	)
+
+	// Load memory from memory-tools service
+	memoryResp, memErr := m.loadConversationMemory(ctx, userID, conversationID, conv, messages, settings)
+	if memErr != nil {
+		log := logger.GetLogger()
+		log.Warn().Err(memErr).Str("conversation_id", conversationID).Msg("failed to load memories, continuing without memory")
+		return nil, nil
+	}
+
+	if memoryResp == nil {
+		return nil, nil
+	}
+
+	// Format and filter memory based on user settings
+	loadedMemory := m.formatAndFilterMemory(memoryResp, settings)
+
+	observability.AddSpanEvent(ctx, "memories_loaded",
+		attribute.Int("core_memory_count", len(memoryResp.CoreMemory)),
+		attribute.Int("episodic_memory_count", len(memoryResp.EpisodicMemory)),
+		attribute.Int("semantic_memory_count", len(memoryResp.SemanticMemory)),
+		attribute.Int("injected_memory_count", len(loadedMemory)),
+	)
+
+	return loadedMemory, nil
+}
+
+// ObserveConversation observes a conversation for memory extraction
+// Respects both MEMORY_ENABLED and user settings for observation
+func (m *MemoryHandler) ObserveConversation(
+	conv *conversation.Conversation,
+	userID uint,
+	messages []openai.ChatCompletionMessage,
+	response *openai.ChatCompletionResponse,
+	finishReason openai.FinishReason,
+) {
+	// Check application-level config first
+	if !m.memoryEnabled || m.memoryClient == nil {
+		return
+	}
+
+	ctx := context.Background()
+
+	// Load user settings
+	settings, err := m.userSettingsService.GetOrCreateSettings(ctx, userID)
+	if err != nil {
+		log := logger.GetLogger()
+		log.Warn().Err(err).Uint("user_id", userID).Msg("failed to load user settings for memory observation")
+		return
+	}
+
+	// Check user-level memory enabled and observe enabled flags
+	if !settings.MemoryConfig.Enabled || !settings.MemoryConfig.ObserveEnabled {
+		return
+	}
+
+	// Only observe if completion finished with "stop" reason
+	if finishReason != openai.FinishReasonStop {
+		return
+	}
+
+	// Use a background context with timeout for async observation
+	observeCtx, cancel := context.WithTimeout(ctx, 50*time.Second)
+	defer cancel()
+
+	// Build conversation items for observation
+	conversationItems := buildMemoryConversationItems(messages, response)
+	if len(conversationItems) == 0 {
+		return
+	}
+
+	req := memclient.ObserveRequest{
+		UserID:         fmt.Sprintf("%d", userID),
+		ConversationID: conv.PublicID,
+		Messages:       conversationItems,
+	}
+	if conv.ProjectPublicID != nil {
+		req.ProjectID = *conv.ProjectPublicID
+	}
+
+	if err := m.memoryClient.Observe(observeCtx, req); err != nil {
+		log := logger.GetLogger()
+		log.Warn().
+			Err(err).
+			Str("conversation_id", conv.PublicID).
+			Uint("user_id", userID).
+			Msg("failed to observe conversation for memory extraction")
+	}
+}
+
+// loadConversationMemory loads memory using the memory-tools service
+func (m *MemoryHandler) loadConversationMemory(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+	conv *conversation.Conversation,
+	messages []openai.ChatCompletionMessage,
+	settings *usersettings.UserSettings,
+) (*memclient.LoadResponse, error) {
+	if ctx.Err() != nil {
+		return nil, ctx.Err()
+	}
+
+	// Use settings to configure memory load request
+	maxUserItems := settings.MemoryConfig.MaxUserItems
+	if maxUserItems <= 0 {
+		maxUserItems = 3
+	}
+	maxProjectItems := settings.MemoryConfig.MaxProjectItems
+	if maxProjectItems <= 0 {
+		maxProjectItems = 5
+	}
+	maxEpisodicItems := settings.MemoryConfig.MaxEpisodicItems
+	if maxEpisodicItems <= 0 {
+		maxEpisodicItems = 3
+	}
+	minSimilarity := settings.MemoryConfig.MinSimilarity
+	if minSimilarity <= 0 {
+		minSimilarity = 0.5
+	}
+
+	req := memclient.LoadRequest{
+		UserID:         fmt.Sprintf("%d", userID),
+		ConversationID: conversationID,
+		Query:          extractQueryFromMessages(messages),
+		Options: memclient.LoadOptions{
+			MaxUserItems:     maxUserItems,
+			MaxProjectItems:  maxProjectItems,
+			MaxEpisodicItems: maxEpisodicItems,
+			MinSimilarity:    minSimilarity,
+		},
+	}
+
+	if conv != nil && conv.ProjectPublicID != nil {
+		req.ProjectID = *conv.ProjectPublicID
+	}
+
+	return m.memoryClient.Load(ctx, req)
+}
+
+// formatAndFilterMemory formats memory items into strings based on user settings
+func (m *MemoryHandler) formatAndFilterMemory(resp *memclient.LoadResponse, settings *usersettings.UserSettings) []string {
+	if resp == nil {
+		return nil
+	}
+
+	memory := make([]string, 0)
+
+	// Add core memory (user preferences) if enabled
+	if settings.MemoryConfig.InjectUserCore {
+		for _, item := range resp.CoreMemory {
+			if strings.TrimSpace(item.Text) != "" {
+				memory = append(memory, fmt.Sprintf("User memory: %s", item.Text))
+			}
+		}
+	}
+
+	// Add semantic memory (project facts) if enabled
+	if settings.MemoryConfig.InjectSemantic {
+		for _, fact := range resp.SemanticMemory {
+			if strings.TrimSpace(fact.Text) != "" {
+				if strings.TrimSpace(fact.Title) != "" {
+					memory = append(memory, fmt.Sprintf("Project fact - %s: %s", fact.Title, fact.Text))
+				} else {
+					memory = append(memory, fmt.Sprintf("Project fact: %s", fact.Text))
+				}
+			}
+		}
+	}
+
+	// Add episodic memory (conversation history) if enabled
+	if settings.MemoryConfig.InjectEpisodic {
+		for _, event := range resp.EpisodicMemory {
+			if strings.TrimSpace(event.Text) != "" {
+				memory = append(memory, fmt.Sprintf("Recent event: %s", event.Text))
+			}
+		}
+	}
+
+	return memory
+}
+
+// extractQueryFromMessages extracts the last user message as the query
+func extractQueryFromMessages(messages []openai.ChatCompletionMessage) string {
+	for i := len(messages) - 1; i >= 0; i-- {
+		if messages[i].Role == openai.ChatMessageRoleUser && strings.TrimSpace(messages[i].Content) != "" {
+			return messages[i].Content
+		}
+	}
+	return ""
+}
+
+// buildMemoryConversationItems converts OpenAI messages to memory client format
+func buildMemoryConversationItems(messages []openai.ChatCompletionMessage, response *openai.ChatCompletionResponse) []memclient.ConversationItem {
+	items := make([]memclient.ConversationItem, 0, len(messages)+1)
+
+	for _, msg := range messages {
+		items = append(items, memclient.ConversationItem{
+			Role:      string(msg.Role),
+			Content:   msg.Content,
+			CreatedAt: time.Now(),
+		})
+	}
+
+	if response != nil && len(response.Choices) > 0 {
+		items = append(items, memclient.ConversationItem{
+			Role:      "assistant",
+			Content:   response.Choices[0].Message.Content,
+			CreatedAt: time.Now(),
+		})
+	}
+
+	return items
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/conversationhandler/conversation_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/conversationhandler/conversation_handler.go
new file mode 100644
index 00000000..57722cb5
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/conversationhandler/conversation_handler.go
@@ -0,0 +1,412 @@
+package conversationhandler
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/llm-api/internal/domain/conversation"
+	"jan-server/services/llm-api/internal/domain/project"
+	"jan-server/services/llm-api/internal/domain/query"
+	authhandler "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	conversationrequests "jan-server/services/llm-api/internal/interfaces/httpserver/requests/conversation"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+	conversationresponses "jan-server/services/llm-api/internal/interfaces/httpserver/responses/conversation"
+	"jan-server/services/llm-api/internal/utils/idgen"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+// Context keys for conversation data
+type ConversationContextKey string
+
+const (
+	ConversationContextKeyPublicID ConversationContextKey = "conv_public_id"
+	ConversationContextEntity      ConversationContextKey = "ConversationContextEntity"
+)
+
+// ConversationHandler handles conversation-related HTTP requests
+type ConversationHandler struct {
+	conversationService *conversation.ConversationService
+	projectService      *project.ProjectService
+	itemValidator       *conversation.ItemValidator
+}
+
+// NewConversationHandler creates a new conversation handler
+func NewConversationHandler(
+	conversationService *conversation.ConversationService,
+	projectService *project.ProjectService,
+) *ConversationHandler {
+	return &ConversationHandler{
+		conversationService: conversationService,
+		projectService:      projectService,
+		itemValidator:       conversation.NewItemValidator(conversation.DefaultItemValidationConfig()),
+	}
+}
+
+// CreateConversation creates a new conversation
+func (h *ConversationHandler) CreateConversation(
+	ctx context.Context,
+	userID uint,
+	req conversationrequests.CreateConversationRequest,
+) (*conversationresponses.ConversationResponse, error) {
+	// Validate item count (max 20 for initial creation per OpenAI spec)
+	if len(req.Items) > 20 {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation,
+			"cannot add more than 20 items at a time", nil, "items")
+	}
+
+	// Validate items before creating conversation
+	for i, item := range req.Items {
+		if err := h.itemValidator.ValidateItem(item); err != nil {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation,
+				fmt.Sprintf("item validation failed at index %d", i), err, fmt.Sprintf("items[%d]", i))
+		}
+	}
+
+	// Resolve project_id if provided
+	var projectID *uint
+	var projectPublicID *string
+	if req.ProjectID != nil && *req.ProjectID != "" {
+		// Verify project exists and user has access
+		proj, err := h.projectService.GetProjectByPublicIDAndUserID(ctx, *req.ProjectID, userID)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "invalid or inaccessible project_id")
+		}
+		projectID = &proj.ID
+		projectPublicID = &proj.PublicID
+	}
+
+	// Create conversation
+	input := conversation.CreateConversationInput{
+		UserID:          userID,
+		Title:           req.Title, // Use title from request
+		Metadata:        req.Metadata,
+		Referrer:        req.Referrer,
+		ProjectID:       projectID,
+		ProjectPublicID: projectPublicID,
+	}
+
+	conv, err := h.conversationService.CreateConversationWithInput(ctx, input)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to create conversation")
+	}
+
+	// Add items if provided
+	if len(req.Items) > 0 {
+		if err := h.addItemsToConversation(ctx, conv, req.Items); err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to add items")
+		}
+	}
+
+	return conversationresponses.NewConversationResponse(conv), nil
+}
+
+// GetConversation retrieves a conversation by ID
+func (h *ConversationHandler) GetConversation(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+) (*conversationresponses.ConversationResponse, error) {
+	conv, err := h.conversationService.GetConversationByPublicIDAndUserID(ctx, conversationID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get conversation")
+	}
+
+	return conversationresponses.NewConversationResponse(conv), nil
+}
+
+// ResolveConversationPublicIDToNumericID resolves a conversation public ID to its numeric ID
+// This is used for cursor-based pagination where the API exposes public IDs but the
+// underlying pagination system uses numeric IDs
+func (h *ConversationHandler) ResolveConversationPublicIDToNumericID(
+	ctx context.Context,
+	userID uint,
+	publicID string,
+) (*uint, error) {
+	conv, err := h.conversationService.GetConversationByPublicIDAndUserID(ctx, publicID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to resolve conversation ID")
+	}
+	return &conv.ID, nil
+}
+
+// UpdateConversation updates a conversation
+func (h *ConversationHandler) UpdateConversation(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+	req conversationrequests.UpdateConversationRequest,
+) (*conversationresponses.ConversationResponse, error) {
+	input := conversation.UpdateConversationInput{
+		Title:    req.Title,
+		Metadata: req.Metadata,
+		Referrer: req.Referrer,
+	}
+
+	conv, err := h.conversationService.UpdateConversationWithInput(ctx, userID, conversationID, input)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to update conversation")
+	}
+
+	return conversationresponses.NewConversationResponse(conv), nil
+}
+
+// ListConversations lists conversations with flexible filtering
+func (h *ConversationHandler) ListConversations(
+	ctx context.Context,
+	userID *uint,
+	referrer *string,
+	pagination *query.Pagination,
+) (*conversationresponses.ConversationListResponse, error) {
+	// Build filter
+	filter := conversation.ConversationFilter{}
+
+	if userID != nil {
+		filter.UserID = userID
+	}
+
+	if referrer != nil && *referrer != "" {
+		filter.Referrer = referrer
+	}
+
+	// To properly calculate hasMore, we fetch limit+1 items and trim if needed
+	// This is the standard pagination pattern that works correctly
+	var requestedLimit *int
+	if pagination != nil && pagination.Limit != nil {
+		requestedLimit = pagination.Limit
+		extraLimit := *pagination.Limit + 1
+		pagination.Limit = &extraLimit
+	}
+
+	// Use unified service method (fetching limit+1)
+	conversations, total, err := h.conversationService.FindConversationsByFilter(ctx, filter, pagination)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to list conversations")
+	}
+
+	// Calculate hasMore by checking if we got more than requested
+	hasMore := false
+	if requestedLimit != nil && len(conversations) > *requestedLimit {
+		// We got limit+1 items, so there are more pages
+		hasMore = true
+		// Trim to the requested limit
+		conversations = conversations[:*requestedLimit]
+	}
+
+	return conversationresponses.NewConversationListResponse(conversations, hasMore, total), nil
+}
+
+// DeleteConversation deletes a conversation
+func (h *ConversationHandler) DeleteConversation(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+) (*conversationresponses.ConversationDeletedResponse, error) {
+	if err := h.conversationService.DeleteConversationByID(ctx, userID, conversationID); err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to delete conversation")
+	}
+
+	return conversationresponses.NewConversationDeletedResponse(conversationID), nil
+}
+
+// ListItems lists items in a conversation
+func (h *ConversationHandler) ListItems(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+	pagination *query.Pagination,
+) ([]conversation.Item, error) {
+	// Verify conversation ownership
+	conv, err := h.conversationService.GetConversationByPublicIDAndUserID(ctx, conversationID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get conversation")
+	}
+
+	// Get items from repository for the active branch
+	items, err := h.conversationService.GetConversationItems(ctx, conv, conv.ActiveBranch, pagination)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to list items")
+	}
+
+	return items, nil
+}
+
+// ResolveItemPublicIDToNumericID resolves an item public ID to its numeric ID
+// This is used for cursor-based pagination where the API exposes public IDs but the
+// underlying pagination system uses numeric IDs
+func (h *ConversationHandler) ResolveItemPublicIDToNumericID(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+	itemPublicID string,
+) (*uint, error) {
+	conv, err := h.conversationService.GetConversationByPublicIDAndUserID(ctx, conversationID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get conversation")
+	}
+
+	item, err := h.conversationService.GetConversationItem(ctx, conv, itemPublicID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to resolve item ID")
+	}
+
+	return &item.ID, nil
+} // CreateItems creates items in a conversation
+func (h *ConversationHandler) CreateItems(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+	req conversationrequests.CreateItemsRequest,
+) (*conversationresponses.ConversationItemCreatedResponse, error) {
+	// Verify conversation ownership
+	conv, err := h.conversationService.GetConversationByPublicIDAndUserID(ctx, conversationID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get conversation")
+	}
+
+	// Validate item count (max 20)
+	if len(req.Items) > 20 {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation,
+			"cannot add more than 20 items at a time", nil, "items")
+	}
+
+	// Validate each item
+	for i, item := range req.Items {
+		if err := h.itemValidator.ValidateItem(item); err != nil {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation,
+				fmt.Sprintf("item validation failed at index %d", i), err, fmt.Sprintf("items[%d]", i))
+		}
+	}
+
+	// Add items to conversation
+	addedItems, err := h.conversationService.AddItemsToConversation(ctx, conv, conv.ActiveBranch, req.Items)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to add items")
+	}
+
+	return conversationresponses.NewConversationItemCreatedResponse(addedItems), nil
+}
+
+// GetItem retrieves a single item from a conversation
+func (h *ConversationHandler) GetItem(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+	itemID string,
+) (*conversationresponses.ItemResponse, error) {
+	// Verify conversation ownership
+	conv, err := h.conversationService.GetConversationByPublicIDAndUserID(ctx, conversationID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get conversation")
+	}
+
+	// Get item
+	item, err := h.conversationService.GetConversationItem(ctx, conv, itemID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get item")
+	}
+
+	return item, nil
+}
+
+// DeleteItem deletes an item from a conversation
+func (h *ConversationHandler) DeleteItem(
+	ctx context.Context,
+	userID uint,
+	conversationID string,
+	itemID string,
+) (*conversationresponses.ConversationResponse, error) {
+	// Verify conversation ownership
+	conv, err := h.conversationService.GetConversationByPublicIDAndUserID(ctx, conversationID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get conversation")
+	}
+
+	// Delete item
+	if err := h.conversationService.DeleteConversationItem(ctx, conv, itemID); err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to delete item")
+	}
+
+	// Return the conversation (per OpenAI spec)
+	return conversationresponses.NewConversationResponse(conv), nil
+}
+
+// Helper functions
+
+// addItemsToConversation adds items to a conversation
+func (h *ConversationHandler) addItemsToConversation(ctx context.Context, conv *conversation.Conversation, items []conversation.Item) error {
+	if len(items) == 0 {
+		return nil
+	}
+
+	// Generate public IDs for items if not present
+	for i := range items {
+		if items[i].PublicID == "" {
+			publicID, err := idgen.GenerateSecureID("msg", 16)
+			if err != nil {
+				return platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to generate item ID")
+			}
+			items[i].PublicID = publicID
+		}
+		items[i].Object = "conversation.item"
+	}
+
+	// Use service to add items to the default branch (MAIN)
+	_, err := h.conversationService.AddItemsToConversation(ctx, conv, conversation.BranchMain, items)
+	if err != nil {
+		return platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to add items to conversation")
+	} else {
+		return nil
+	}
+}
+
+// ===============================================
+// Middleware Functions
+// ===============================================
+func (h *ConversationHandler) ConversationMiddleware() gin.HandlerFunc {
+	return func(reqCtx *gin.Context) {
+		ctx := reqCtx.Request.Context()
+
+		// Get conversation public ID from path parameter
+		publicID := reqCtx.Param(string(ConversationContextKeyPublicID))
+		if publicID == "" {
+			responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "missing conversation public ID", "")
+			return
+		}
+
+		// Get authenticated user from context
+		user, ok := authhandler.GetUserFromContext(reqCtx)
+		if !ok {
+			responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "")
+			return
+		}
+
+		// Retrieve conversation with ownership check
+		conv, err := h.conversationService.GetConversationByPublicIDAndUserID(ctx, publicID, user.ID)
+		if err != nil {
+			responses.HandleError(reqCtx, err, "Failed to retrieve conversation")
+			return
+		} // Store conversation in context
+		SetConversationToContext(reqCtx, conv)
+		reqCtx.Next()
+	}
+}
+
+// SetConversationToContext stores a conversation in the request context
+func SetConversationToContext(reqCtx *gin.Context, conv *conversation.Conversation) {
+	reqCtx.Set(string(ConversationContextEntity), conv)
+}
+
+// GetConversationFromContext retrieves a conversation from the request context
+func GetConversationFromContext(reqCtx *gin.Context) (*conversation.Conversation, bool) {
+	conv, ok := reqCtx.Get(string(ConversationContextEntity))
+	if !ok {
+		return nil, false
+	}
+	v, ok := conv.(*conversation.Conversation)
+	if !ok {
+		return nil, false
+	}
+	return v, true
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/guesthandler/guest_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/guesthandler/guest_handler.go
new file mode 100644
index 00000000..57d22665
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/guesthandler/guest_handler.go
@@ -0,0 +1,93 @@
+package guestauth
+
+import (
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/llm-api/internal/domain"
+	"jan-server/services/llm-api/internal/infrastructure/keycloak"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/middlewares"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+)
+
+const (
+	// RefreshTokenCookieName is the name of the cookie that stores the refresh token
+	RefreshTokenCookieName = "refresh_token"
+)
+
+// GuestHandler handles guest authentication flows.
+type GuestHandler struct {
+	kc     *keycloak.Client
+	logger zerolog.Logger
+}
+
+// NewGuestHandler constructs a handler instance.
+func NewGuestHandler(kc *keycloak.Client, logger zerolog.Logger) *GuestHandler {
+	return &GuestHandler{kc: kc, logger: logger}
+}
+
+// UpgradeHandler handles user upgrade flows.
+type UpgradeHandler struct {
+	kc     *keycloak.Client
+	logger zerolog.Logger
+}
+
+// NewUpgradeHandler constructs an upgrade handler instance.
+func NewUpgradeHandler(kc *keycloak.Client, logger zerolog.Logger) *UpgradeHandler {
+	return &UpgradeHandler{kc: kc, logger: logger}
+}
+
+// CreateGuest handles POST /auth/guest-login requests.
+func (h *GuestHandler) CreateGuest(c *gin.Context) {
+	creds, err := h.kc.CreateGuest(c.Request.Context())
+	if err != nil {
+		h.logger.Error().Err(err).Msg("create guest user")
+		responses.HandleErrorWithStatus(c, http.StatusBadGateway, err, "failed to provision guest")
+
+		return
+	}
+
+	// Return tokens in JSON response (token-based authentication, not cookies)
+	c.JSON(http.StatusCreated, gin.H{
+		"user_id":       creds.UserID,
+		"username":      creds.Username,
+		"principal_id":  creds.PrincipalID,
+		"access_token":  creds.Tokens.AccessToken,
+		"refresh_token": creds.Tokens.RefreshToken,
+		"token_type":    creds.Tokens.TokenType,
+		"expires_in":    creds.Tokens.ExpiresIn,
+	})
+}
+
+// Upgrade processes POST /auth/upgrade.
+func (h *UpgradeHandler) Upgrade(c *gin.Context) {
+	principal, ok := middlewares.PrincipalFromContext(c)
+	if !ok || principal.ID == "" {
+		responses.HandleErrorWithStatus(c, http.StatusUnauthorized, nil, "principal missing")
+		return
+	}
+
+	var payload keycloak.UpgradePayload
+	if err := c.ShouldBindJSON(&payload); err != nil {
+		responses.HandleErrorWithStatus(c, http.StatusBadRequest, err, "invalid payload")
+
+		return
+	}
+
+	if err := h.kc.UpgradeUser(c.Request.Context(), subjectFromPrincipal(principal), payload); err != nil {
+		h.logger.Error().Err(err).Str("subject", principal.Subject).Msg("upgrade user failed")
+		responses.HandleErrorWithStatus(c, http.StatusBadGateway, err, "failed to upgrade user")
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{"status": "upgraded"})
+}
+
+func subjectFromPrincipal(p domain.Principal) string {
+	if p.Subject != "" {
+		return p.Subject
+	}
+	return p.ID
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/handlerprovider.go b/services/llm-api/internal/interfaces/httpserver/handlers/handlerprovider.go
new file mode 100644
index 00000000..45309de5
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/handlerprovider.go
@@ -0,0 +1,39 @@
+package handlers
+
+import (
+	"github.com/google/wire"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/domain/usersettings"
+	"jan-server/services/llm-api/internal/infrastructure/memory"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/apikeyhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/chathandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/conversationhandler"
+	guestauth "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/guesthandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler"
+)
+
+// ProvideMemoryHandler creates a memory handler with application config
+func ProvideMemoryHandler(
+	memoryClient *memory.Client,
+	cfg *config.Config,
+	userSettingsService *usersettings.Service,
+) *chathandler.MemoryHandler {
+	return chathandler.NewMemoryHandler(memoryClient, cfg.MemoryEnabled, userSettingsService)
+}
+
+var HandlerProvider = wire.NewSet(
+	authhandler.NewAuthHandler,
+	authhandler.NewTokenHandler,
+	apikeyhandler.NewHandler,
+	guestauth.NewGuestHandler,
+	guestauth.NewUpgradeHandler,
+	ProvideMemoryHandler,
+	chathandler.NewChatHandler,
+	conversationhandler.NewConversationHandler,
+	modelhandler.NewModelHandler,
+	modelhandler.NewProviderHandler,
+	modelhandler.NewModelCatalogHandler,
+	modelhandler.NewProviderModelHandler,
+)
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/model_catalog_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/model_catalog_handler.go
new file mode 100644
index 00000000..74feacdc
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/model_catalog_handler.go
@@ -0,0 +1,252 @@
+package modelhandler
+
+import (
+	"context"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/domain/query"
+	requestmodels "jan-server/services/llm-api/internal/interfaces/httpserver/requests/models"
+	modelresponses "jan-server/services/llm-api/internal/interfaces/httpserver/responses/model"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type ModelCatalogHandler struct {
+	modelCatalogService  *domainmodel.ModelCatalogService
+	providerModelService *domainmodel.ProviderModelService
+}
+
+func NewModelCatalogHandler(
+	modelCatalogService *domainmodel.ModelCatalogService,
+	providerModelService *domainmodel.ProviderModelService,
+) *ModelCatalogHandler {
+	return &ModelCatalogHandler{
+		modelCatalogService:  modelCatalogService,
+		providerModelService: providerModelService,
+	}
+}
+
+func (h *ModelCatalogHandler) GetCatalog(ctx context.Context, publicID string) (*modelresponses.ModelCatalogResponse, error) {
+	if publicID == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "model catalog public ID is required", nil, "c9076125-ba1b-496d-b55f-c1711af98eaa")
+	}
+
+	catalog, err := h.modelCatalogService.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get model catalog")
+	}
+
+	response := modelresponses.BuildModelCatalogResponse(catalog)
+	return &response, nil
+}
+
+func (h *ModelCatalogHandler) UpdateCatalog(
+	ctx context.Context,
+	publicID string,
+	req requestmodels.UpdateModelCatalogRequest,
+) (*modelresponses.ModelCatalogResponse, error) {
+	if publicID == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "model catalog public ID is required", nil, "b371fe0d-7c3e-41b2-b98f-fb7b2b6cf54a")
+	}
+
+	catalog, err := h.modelCatalogService.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get model catalog")
+	}
+
+	// Update fields if provided
+	if req.SupportedParameters != nil {
+		catalog.SupportedParameters = *req.SupportedParameters
+	}
+	if req.Architecture != nil {
+		catalog.Architecture = *req.Architecture
+	}
+	if req.Tags != nil {
+		catalog.Tags = *req.Tags
+	}
+	if req.Notes != nil {
+		catalog.Notes = req.Notes
+	}
+	if req.IsModerated != nil {
+		catalog.IsModerated = req.IsModerated
+	}
+	if req.Extras != nil {
+		catalog.Extras = *req.Extras
+	}
+
+	// Mark as updated by admin (prevents auto-sync from overwriting)
+	catalog.Status = domainmodel.ModelCatalogStatusUpdated
+
+	updatedCatalog, err := h.modelCatalogService.Update(ctx, catalog)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to update model catalog")
+	}
+
+	response := modelresponses.BuildModelCatalogResponse(updatedCatalog)
+	return &response, nil
+}
+
+func (h *ModelCatalogHandler) ListCatalogs(
+	ctx context.Context,
+	filterParams requestmodels.ModelCatalogFilterParams,
+	pagination *query.Pagination,
+) ([]modelresponses.ModelCatalogResponse, int64, error) {
+	filter := domainmodel.ModelCatalogFilter{}
+
+	if filterParams.Status != nil {
+		status := domainmodel.ModelCatalogStatus(*filterParams.Status)
+		filter.Status = &status
+	}
+
+	if filterParams.IsModerated != nil {
+		filter.IsModerated = filterParams.IsModerated
+	}
+
+	if filterParams.Active != nil {
+		filter.Active = filterParams.Active
+	}
+
+	total, err := h.modelCatalogService.Count(ctx, filter)
+	if err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to count model catalogs")
+	}
+
+	catalogs, err := h.modelCatalogService.FindByFilter(ctx, filter, pagination)
+	if err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to list model catalogs")
+	}
+
+	result := make([]modelresponses.ModelCatalogResponse, 0, len(catalogs))
+	for _, catalog := range catalogs {
+		result = append(result, modelresponses.BuildModelCatalogResponse(catalog))
+	}
+
+	return result, total, nil
+}
+
+func (h *ModelCatalogHandler) BulkToggleCatalogs(ctx context.Context, req requestmodels.BulkToggleCatalogsRequest) (*modelresponses.BulkOperationResponse, error) {
+	// Validate and normalize request
+	if req.Enable == nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "enable field is required", nil, "6080cd28-097d-4ba1-b740-9e50a1345461")
+	}
+	req.Normalize()
+
+	var catalogIDs []uint
+	exceptModelKeys := make(map[string]bool)
+	for _, key := range req.ExceptModels {
+		exceptModelKeys[key] = true
+	}
+
+	if len(req.CatalogIDs) > 0 {
+		// Use batch method to fetch all catalogs in a single query
+		catalogsMap, err := h.modelCatalogService.FindByPublicIDs(ctx, req.CatalogIDs)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to find catalogs")
+		}
+
+		// Validate all requested catalog IDs were found
+		if len(catalogsMap) != len(req.CatalogIDs) {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeNotFound, "some catalog(s) not found", nil, "e8b4a9b6-dc74-4b53-9ca5-9652bbd96b80")
+		} // Extract catalog IDs
+		catalogIDs = make([]uint, 0, len(catalogsMap))
+		for _, catalog := range catalogsMap {
+			catalogIDs = append(catalogIDs, catalog.ID)
+		}
+	} else {
+		allCatalogs, err := h.modelCatalogService.FindByFilter(ctx, domainmodel.ModelCatalogFilter{}, nil)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to retrieve all catalogs")
+		}
+		catalogIDs = make([]uint, 0, len(allCatalogs))
+		for _, catalog := range allCatalogs {
+			catalogIDs = append(catalogIDs, catalog.ID)
+		}
+	}
+
+	if len(catalogIDs) == 0 {
+		return &modelresponses.BulkOperationResponse{
+			UpdatedCount: 0,
+			SkippedCount: 0,
+			TotalChecked: 0,
+		}, nil
+	}
+
+	enableValue := *req.Enable
+
+	catalogFilter := domainmodel.ModelCatalogFilter{
+		IDs: &catalogIDs,
+	}
+	catalogsUpdated, err := h.modelCatalogService.BatchUpdateActive(ctx, catalogFilter, enableValue)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to batch update catalogs")
+	}
+
+	var modelsUpdated int64
+	var totalModelsChecked int64
+	var skippedCount int64
+
+	if !enableValue {
+		for _, catalogID := range catalogIDs {
+			filter := domainmodel.ProviderModelFilter{
+				ModelCatalogID: &catalogID,
+			}
+
+			count, err := h.providerModelService.Count(ctx, filter)
+			if err != nil {
+				return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to count provider models for catalog")
+			}
+			totalModelsChecked += count
+
+			if len(exceptModelKeys) > 0 {
+				allModels, err := h.providerModelService.FindByFilter(ctx, filter)
+				if err != nil {
+					return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to retrieve provider models for catalog")
+				}
+
+				idsToUpdate := make([]uint, 0)
+				for _, pm := range allModels {
+					if exceptModelKeys[pm.ModelPublicID] {
+						skippedCount++
+						continue
+					}
+					idsToUpdate = append(idsToUpdate, pm.ID)
+				}
+
+				if len(idsToUpdate) > 0 {
+					updateFilter := domainmodel.ProviderModelFilter{
+						IDs: &idsToUpdate,
+					}
+					updated, err := h.providerModelService.BatchUpdateActive(ctx, updateFilter, false)
+					if err != nil {
+						return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to batch update provider models")
+					}
+					modelsUpdated += updated
+				}
+			} else {
+				updated, err := h.providerModelService.BatchUpdateActive(ctx, filter, false)
+				if err != nil {
+					return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to batch update provider models")
+				}
+				modelsUpdated += updated
+			}
+		}
+	} else {
+		for _, catalogID := range catalogIDs {
+			filter := domainmodel.ProviderModelFilter{
+				ModelCatalogID: &catalogID,
+			}
+			count, err := h.providerModelService.Count(ctx, filter)
+			if err != nil {
+				return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to count provider models")
+			}
+			totalModelsChecked += count
+			skippedCount += count
+		}
+	}
+
+	return &modelresponses.BulkOperationResponse{
+		UpdatedCount: int(catalogsUpdated + modelsUpdated),
+		SkippedCount: int(skippedCount),
+		FailedCount:  0,
+		TotalChecked: int(totalModelsChecked),
+	}, nil
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/model_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/model_handler.go
new file mode 100644
index 00000000..10f6ef1c
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/model_handler.go
@@ -0,0 +1,155 @@
+package modelhandler
+
+import (
+	"context"
+	"sort"
+	"strings"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+)
+
+type ModelHandler struct {
+	provider             *domainmodel.ProviderService
+	providerModelService *domainmodel.ProviderModelService
+}
+
+func NewModelHandler(
+	provider *domainmodel.ProviderService,
+	providerModelService *domainmodel.ProviderModelService,
+) *ModelHandler {
+	return &ModelHandler{
+		provider:             provider,
+		providerModelService: providerModelService,
+	}
+}
+
+func (modelHandler *ModelHandler) BuildAccessibleProviderModels(ctx context.Context) (*domainmodel.AccessibleModels, error) {
+	providers, err := modelHandler.provider.FindAllActiveProviders(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	providerIDs := make([]uint, 0, len(providers))
+	for _, provider := range providers {
+		if provider == nil {
+			continue
+		}
+		providerIDs = append(providerIDs, provider.ID)
+	}
+
+	providerModels, err := modelHandler.providerModelService.ListActiveProviderByIDs(ctx, providerIDs)
+	if err != nil {
+		return nil, err
+	}
+
+	result := &domainmodel.AccessibleModels{
+		Providers:      providers,
+		ProviderModels: providerModels,
+	}
+	return result, nil
+}
+
+type modelAggregate struct {
+	response      domainmodel.ProviderModel
+	providerKind  domainmodel.ProviderKind
+	hasPricing    bool
+	cheapestPrice domainmodel.MicroUSD
+}
+
+func (modelHandler *ModelHandler) MergeModels(
+	providerModels []*domainmodel.ProviderModel,
+	providerByID map[uint]*domainmodel.Provider,
+) []*domainmodel.ProviderModel {
+	aggregated := map[string]modelAggregate{}
+
+	for _, pm := range providerModels {
+		if pm == nil {
+			continue
+		}
+		provider := providerByID[pm.ProviderID]
+		if provider == nil {
+			continue
+		}
+		cheapestPrice, hasPricing := lowestPricingAmount(pm.Pricing)
+		incoming := modelAggregate{
+			response:      *pm,
+			providerKind:  provider.Kind,
+			hasPricing:    hasPricing,
+			cheapestPrice: cheapestPrice,
+		}
+
+		if existing, ok := aggregated[strings.ToLower(pm.ModelPublicID)]; ok {
+			if !shouldReplaceModel(existing, incoming) {
+				continue
+			}
+		}
+
+		aggregated[pm.ModelPublicID] = incoming
+	}
+
+	candidates := make([]modelAggregate, 0, len(aggregated))
+	for _, model := range aggregated {
+		candidates = append(candidates, model)
+	}
+
+	sort.Slice(candidates, func(i, j int) bool {
+		// Jan models first, then by ID
+		iIsJan := candidates[i].providerKind == domainmodel.ProviderJan
+		jIsJan := candidates[j].providerKind == domainmodel.ProviderJan
+		if iIsJan && !jIsJan {
+			return true
+		} else if !iIsJan && jIsJan {
+			return false
+		}
+		return candidates[i].response.ID < candidates[j].response.ID
+	})
+
+	result := make([]*domainmodel.ProviderModel, len(candidates))
+	for idx, candidate := range candidates {
+		result[idx] = &candidate.response
+	}
+
+	return result
+}
+
+func lowestPricingAmount(pricing domainmodel.Pricing) (domainmodel.MicroUSD, bool) {
+	if len(pricing.Lines) == 0 {
+		return 0, false
+	}
+	lowest := pricing.Lines[0].Amount
+	for _, line := range pricing.Lines[1:] {
+		if line.Amount < lowest {
+			lowest = line.Amount
+		}
+	}
+	return lowest, true
+}
+
+func shouldReplaceModel(existing, incoming modelAggregate) bool {
+	if incoming.hasPricing && existing.hasPricing {
+		if incoming.cheapestPrice < existing.cheapestPrice {
+			return true
+		}
+		if incoming.cheapestPrice > existing.cheapestPrice {
+			return false
+		}
+	}
+
+	if incoming.hasPricing && !existing.hasPricing {
+		return true
+	}
+	if !incoming.hasPricing && existing.hasPricing {
+		return false
+	}
+
+	incomingIsJan := incoming.providerKind == domainmodel.ProviderJan
+	existingIsJan := existing.providerKind == domainmodel.ProviderJan
+	if incomingIsJan && !existingIsJan {
+		return true
+	}
+	if existingIsJan && !incomingIsJan {
+		return false
+	}
+
+	return false
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/provider_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/provider_handler.go
new file mode 100644
index 00000000..ee1fed93
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/provider_handler.go
@@ -0,0 +1,259 @@
+package modelhandler
+
+import (
+	"context"
+	"strings"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/infrastructure/inference"
+	requestmodels "jan-server/services/llm-api/internal/interfaces/httpserver/requests/models"
+	modelresponses "jan-server/services/llm-api/internal/interfaces/httpserver/responses/model"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type ProviderHandler struct {
+	providerService      *domainmodel.ProviderService
+	providerModelService *domainmodel.ProviderModelService
+	inferenceProvider    *inference.InferenceProvider
+}
+
+func NewProviderHandler(
+	providerService *domainmodel.ProviderService,
+	providerModelService *domainmodel.ProviderModelService,
+	inferenceProvider *inference.InferenceProvider,
+) *ProviderHandler {
+	return &ProviderHandler{
+		providerService:      providerService,
+		providerModelService: providerModelService,
+		inferenceProvider:    inferenceProvider,
+	}
+}
+
+func (providerHandler *ProviderHandler) RegisterProvider(addProviderRequest requestmodels.AddProviderRequest, ctx context.Context) (*modelresponses.ProviderWithModelsResponse, error) {
+
+	// Check if provider with the same vendor already exists if vendor != "custom"
+	provider, err := providerHandler.providerService.FindProviderByVendor(ctx, addProviderRequest.Vendor)
+	if err == nil && provider != nil && provider.Kind != domainmodel.ProviderCustom {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeConflict, "provider with vendor already exists", nil, "30c583bd-82f0-41c7-83ca-c2bf071cb018")
+	}
+
+	active := true
+	if addProviderRequest.Active != nil {
+		active = *addProviderRequest.Active
+	}
+
+	result, err := providerHandler.providerService.RegisterProvider(ctx, domainmodel.RegisterProviderInput{
+		Name:     addProviderRequest.Name,
+		Vendor:   addProviderRequest.Vendor,
+		BaseURL:  addProviderRequest.BaseURL,
+		APIKey:   addProviderRequest.APIKey,
+		Metadata: addProviderRequest.Metadata,
+		Active:   active,
+	})
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "register provider failed")
+	}
+	models, err := providerHandler.inferenceProvider.ListModels(ctx, result)
+	if err != nil {
+		return nil, err
+	}
+	syncModels, syncErr := providerHandler.providerService.SyncProviderModelsWithOptions(ctx, result, models, true)
+	if syncErr != nil {
+		return nil, syncErr
+	}
+
+	return modelresponses.BuildProviderResponseWithModels(result, syncModels), nil
+}
+
+func (providerHandler *ProviderHandler) GetAllProviders(ctx context.Context) ([]modelresponses.ProviderWithModelCountResponse, error) {
+	providers, err := providerHandler.providerService.FindAllProviders(ctx)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get providers")
+	}
+
+	if len(providers) == 0 {
+		return []modelresponses.ProviderWithModelCountResponse{}, nil
+	}
+
+	providerIDs := make([]uint, len(providers))
+	for i, provider := range providers {
+		providerIDs[i] = provider.ID
+	}
+
+	modelCounts, err := providerHandler.providerModelService.FindModelCountsByProviderIDs(ctx, providerIDs)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get model counts")
+	}
+
+	activeModelCounts, err := providerHandler.providerModelService.FindActiveModelCountsByProviderIDs(ctx, providerIDs)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get active model counts")
+	}
+
+	result := make([]modelresponses.ProviderWithModelCountResponse, 0, len(providers))
+	for _, provider := range providers {
+		modelCount := modelCounts[provider.ID]
+		activeModelCount := activeModelCounts[provider.ID]
+		result = append(result, modelresponses.BuildProviderWithModelCountResponse(provider, modelCount, activeModelCount))
+	}
+
+	return result, nil
+}
+
+func (providerHandler *ProviderHandler) SelectProviderModelForModelPublicID(ctx context.Context, modelPublicID string) (*domainmodel.ProviderModel, *domainmodel.Provider, error) {
+	if strings.TrimSpace(modelPublicID) == "" {
+		return nil, nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "model key is required", nil, "abeb247f-ef80-44bf-921b-6e2c92ffca73")
+	}
+	var providerModels []*domainmodel.ProviderModel
+
+	providerModels, err := providerHandler.providerModelService.FindActiveByModelKey(ctx, modelPublicID)
+	if err != nil {
+		return nil, nil, err
+	}
+	if len(providerModels) == 0 {
+		return nil, nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeNotFound, "model not found in accessible providers", nil, "caa8476d-1b95-42a7-a96b-18b0c11b2f64")
+	}
+
+	selectedProviderModel := providerHandler.selectBestProvider(providerModels)
+	if selectedProviderModel == nil {
+		return nil, nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeNotFound, "no valid provider found for model", nil, "265747b1-0aee-4a99-863e-99a7af8ada5e")
+	}
+	selectedProvider, err := providerHandler.providerService.GetByID(ctx, selectedProviderModel.ProviderID)
+	if err != nil {
+		return nil, nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get provider details")
+	}
+	return selectedProviderModel, selectedProvider, nil
+}
+
+// selectBestProvider selects the best provider for a model based on:
+// 1. LOWEST PRICING (if pricing data exists)
+// 2. MENLO PROVIDER (if prices are equal or no pricing)
+// 3. FIRST PROVIDER (if all criteria equal)
+func (providerHandler *ProviderHandler) selectBestProvider(
+	providerModels []*domainmodel.ProviderModel,
+) *domainmodel.ProviderModel {
+	if len(providerModels) == 0 {
+		return nil
+	}
+
+	type providerCandidate struct {
+		providerModel *domainmodel.ProviderModel
+		hasPricing    bool
+		lowestPrice   domainmodel.MicroUSD
+		isJan         bool
+	}
+
+	candidates := make([]providerCandidate, 0, len(providerModels))
+
+	for _, providerModel := range providerModels {
+		if providerModel == nil {
+			continue
+		}
+
+		lowestPrice, hasPricing := calculateLowestPrice(providerModel.Pricing)
+		isJan := providerModel.Kind == domainmodel.ProviderJan
+
+		candidates = append(candidates, providerCandidate{
+			providerModel: providerModel,
+			hasPricing:    hasPricing,
+			lowestPrice:   lowestPrice,
+			isJan:         isJan,
+		})
+	}
+
+	if len(candidates) == 0 {
+		return nil
+	}
+
+	// Find the best candidate
+	best := candidates[0]
+	for i := 1; i < len(candidates); i++ {
+		candidate := candidates[i]
+
+		// Compare pricing first (if both have pricing)
+		if candidate.hasPricing && best.hasPricing {
+			if candidate.lowestPrice < best.lowestPrice {
+				best = candidate
+				continue
+			} else if candidate.lowestPrice > best.lowestPrice {
+				continue
+			}
+			// Prices are equal, continue to next criteria
+		}
+
+		// Prefer candidate with pricing over one without
+		if candidate.hasPricing && !best.hasPricing {
+			best = candidate
+			continue
+		}
+		if !candidate.hasPricing && best.hasPricing {
+			continue
+		}
+
+		// Prefer Jan provider
+		if candidate.isJan && !best.isJan {
+			best = candidate
+			continue
+		}
+		if !candidate.isJan && best.isJan {
+			continue
+		}
+
+	}
+
+	return best.providerModel
+}
+
+func (h *ProviderHandler) UpdateProvider(
+	ctx context.Context,
+	publicID string,
+	req requestmodels.UpdateProviderRequest,
+) (*modelresponses.ProviderResponse, error) {
+	if publicID == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "provider public ID is required", nil, "140e34cf-ed9f-4008-9d9b-c3e7b9d183b8")
+	}
+
+	provider, err := h.providerService.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to find provider")
+	}
+	if provider == nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeNotFound, "provider not found", nil, "0d77a312-f914-492d-8dbc-7f1ba9d14da9")
+	}
+
+	updateInput := domainmodel.UpdateProviderInput{
+		Name:     req.Name,
+		BaseURL:  req.BaseURL,
+		APIKey:   req.APIKey,
+		Metadata: req.Metadata,
+		Active:   req.Active,
+	}
+
+	updatedProvider, err := h.providerService.UpdateProvider(ctx, provider, updateInput)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to update provider")
+	}
+
+	response := modelresponses.BuildProviderResponse(updatedProvider)
+	return &response, nil
+}
+
+// TODO(pricing): Remove pricing calculation from model handler
+// This function calculates the lowest price for a provider model, but pricing logic
+// should be handled by a dedicated billing domain, not in the model management layer.
+// Consider removing this once pricing is moved to the billing domain.
+// Related: See TODO in internal/domain/model/provider_model.go
+func calculateLowestPrice(pricing domainmodel.Pricing) (domainmodel.MicroUSD, bool) {
+	if len(pricing.Lines) == 0 {
+		return 0, false
+	}
+
+	lowest := pricing.Lines[0].Amount
+	for _, line := range pricing.Lines[1:] {
+		if line.Amount < lowest {
+			lowest = line.Amount
+		}
+	}
+
+	return lowest, true
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/provider_model_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/provider_model_handler.go
new file mode 100644
index 00000000..35f48cfe
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler/provider_model_handler.go
@@ -0,0 +1,300 @@
+package modelhandler
+
+import (
+	"context"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/domain/query"
+	requestmodels "jan-server/services/llm-api/internal/interfaces/httpserver/requests/models"
+	modelresponses "jan-server/services/llm-api/internal/interfaces/httpserver/responses/model"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type ProviderModelHandler struct {
+	providerModelService *domainmodel.ProviderModelService
+	providerService      *domainmodel.ProviderService
+	modelCatalogService  *domainmodel.ModelCatalogService
+}
+
+func NewProviderModelHandler(
+	providerModelService *domainmodel.ProviderModelService,
+	providerService *domainmodel.ProviderService,
+	modelCatalogService *domainmodel.ModelCatalogService,
+) *ProviderModelHandler {
+	return &ProviderModelHandler{
+		providerModelService: providerModelService,
+		providerService:      providerService,
+		modelCatalogService:  modelCatalogService,
+	}
+}
+
+func (h *ProviderModelHandler) GetProviderModel(ctx context.Context, publicID string) (*modelresponses.ProviderModelResponse, error) {
+	if publicID == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "provider model public ID is required", nil, "14c0d733-6143-4eac-b09f-f5475895fec1")
+	}
+
+	providerModel, err := h.providerModelService.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get provider model")
+	}
+
+	if providerModel == nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeNotFound, "provider model not found", nil, "9820aaa1-2f72-4a92-ba9f-e84d4bb103ca")
+	}
+
+	provider, err := h.providerService.GetByID(ctx, providerModel.ProviderID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get provider")
+	}
+
+	var modelCatalog *domainmodel.ModelCatalog
+	if providerModel.ModelCatalogID != nil {
+		modelCatalog, _ = h.modelCatalogService.FindByID(ctx, *providerModel.ModelCatalogID)
+	}
+
+	response := modelresponses.BuildProviderModelResponse(providerModel, provider, modelCatalog)
+	return &response, nil
+}
+
+func (h *ProviderModelHandler) ListProviderModels(
+	ctx context.Context,
+	filterParams requestmodels.ProviderModelFilterParams,
+	pagination *query.Pagination,
+) ([]modelresponses.ProviderModelResponse, int64, error) {
+	filter := domainmodel.ProviderModelFilter{}
+
+	if filterParams.ModelKey != nil {
+		filter.ModelPublicID = filterParams.ModelKey
+	}
+
+	if filterParams.Active != nil {
+		filter.Active = filterParams.Active
+	}
+
+	if filterParams.SupportsImages != nil {
+		filter.SupportsImages = filterParams.SupportsImages
+	}
+
+	if filterParams.ProviderPublicID != nil {
+		provider, err := h.providerService.FindByPublicID(ctx, *filterParams.ProviderPublicID)
+		if err != nil {
+			return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to find provider")
+		}
+		if provider != nil {
+			filter.ProviderID = &provider.ID
+		}
+	}
+
+	total, err := h.providerModelService.Count(ctx, filter)
+	if err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to count provider models")
+	}
+
+	providerModels, err := h.providerModelService.FindByFilterWithPagination(ctx, filter, pagination)
+	if err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to list provider models")
+	}
+
+	providerIDs := make(map[uint]bool)
+	catalogIDs := make(map[uint]bool)
+	for _, pm := range providerModels {
+		providerIDs[pm.ProviderID] = true
+		if pm.ModelCatalogID != nil {
+			catalogIDs[*pm.ModelCatalogID] = true
+		}
+	}
+
+	// Convert maps to slices for batch lookup
+	providerIDSlice := make([]uint, 0, len(providerIDs))
+	for id := range providerIDs {
+		providerIDSlice = append(providerIDSlice, id)
+	}
+
+	catalogIDSlice := make([]uint, 0, len(catalogIDs))
+	for id := range catalogIDs {
+		catalogIDSlice = append(catalogIDSlice, id)
+	}
+
+	// Batch fetch providers and catalogs
+	providerMap, err := h.providerService.GetByIDs(ctx, providerIDSlice)
+	if err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to fetch providers")
+	}
+
+	catalogMap, err := h.modelCatalogService.FindByIDs(ctx, catalogIDSlice)
+	if err != nil {
+		return nil, 0, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to fetch catalogs")
+	}
+
+	result := make([]modelresponses.ProviderModelResponse, 0, len(providerModels))
+	for _, pm := range providerModels {
+		provider := providerMap[pm.ProviderID]
+		if provider == nil {
+			continue
+		}
+
+		var catalog *domainmodel.ModelCatalog
+		if pm.ModelCatalogID != nil {
+			catalog = catalogMap[*pm.ModelCatalogID]
+		}
+
+		result = append(result, modelresponses.BuildProviderModelResponse(pm, provider, catalog))
+	}
+
+	return result, total, nil
+}
+
+func (h *ProviderModelHandler) UpdateProviderModel(
+	ctx context.Context,
+	publicID string,
+	req requestmodels.UpdateProviderModelRequest,
+) (*modelresponses.ProviderModelResponse, error) {
+	if publicID == "" {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "provider model public ID is required", nil, "794588fc-4a61-4f1f-bec7-7041091da4d3")
+	}
+
+	providerModel, err := h.providerModelService.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get provider model")
+	}
+
+	if providerModel == nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeNotFound, "provider model not found", nil, "bef76423-07b2-438a-9899-c7f8ecac3e09")
+	}
+
+	if req.DisplayName != nil {
+		providerModel.DisplayName = *req.DisplayName
+	}
+	if req.Pricing != nil {
+		providerModel.Pricing = *req.Pricing
+	}
+	if req.TokenLimits != nil {
+		providerModel.TokenLimits = req.TokenLimits
+	}
+	if req.Family != nil {
+		providerModel.Family = req.Family
+	}
+	if req.SupportsImages != nil {
+		providerModel.SupportsImages = *req.SupportsImages
+	}
+	if req.SupportsEmbeddings != nil {
+		providerModel.SupportsEmbeddings = *req.SupportsEmbeddings
+	}
+	if req.SupportsReasoning != nil {
+		providerModel.SupportsReasoning = *req.SupportsReasoning
+	}
+	if req.SupportsAudio != nil {
+		providerModel.SupportsAudio = *req.SupportsAudio
+	}
+	if req.SupportsVideo != nil {
+		providerModel.SupportsVideo = *req.SupportsVideo
+	}
+	if req.Active != nil {
+		providerModel.Active = *req.Active
+	}
+
+	updatedModel, err := h.providerModelService.Update(ctx, providerModel)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to update provider model")
+	}
+
+	provider, err := h.providerService.GetByID(ctx, updatedModel.ProviderID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get provider")
+	}
+
+	var modelCatalog *domainmodel.ModelCatalog
+	if updatedModel.ModelCatalogID != nil {
+		modelCatalog, _ = h.modelCatalogService.FindByID(ctx, *updatedModel.ModelCatalogID)
+	}
+
+	response := modelresponses.BuildProviderModelResponse(updatedModel, provider, modelCatalog)
+	return &response, nil
+}
+
+// Performs bulk enable/disable operations on provider models.
+// Example use cases:
+//   - "Disable all models except production whitelist"
+//   - "Enable all OpenAI models except experimental ones"
+//   - "Disable all models from a specific provider"
+func (h *ProviderModelHandler) BulkEnableDisableProviderModels(
+	ctx context.Context,
+	req requestmodels.BulkEnableModelsRequest,
+) (*modelresponses.BulkOperationResponse, error) {
+	// Validate and normalize request
+	if req.Enable == nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "enable field is required", nil, "3219f31b-1585-4e81-8976-06861cbf4358")
+	}
+	req.Normalize()
+
+	filter := domainmodel.ProviderModelFilter{}
+
+	if req.ProviderID != nil && *req.ProviderID != "" {
+		provider, err := h.providerService.FindByPublicID(ctx, *req.ProviderID)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to find provider")
+		}
+		if provider == nil {
+			return nil, platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeNotFound, "provider not found", nil, "03a1267a-1daf-499e-b926-343c9429b5f5")
+		}
+		filter.ProviderID = &provider.ID
+	}
+
+	enableValue := *req.Enable
+	exceptModelKeys := make(map[string]bool)
+	for _, key := range req.ExceptModels {
+		exceptModelKeys[key] = true
+	}
+
+	totalCount, err := h.providerModelService.Count(ctx, filter)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to count provider models")
+	}
+
+	var modelsUpdated int64
+	var skippedCount int64
+
+	if len(exceptModelKeys) > 0 {
+		allModels, err := h.providerModelService.FindByFilter(ctx, filter)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to list provider models")
+		}
+
+		idsToUpdate := make([]uint, 0)
+		for _, model := range allModels {
+			if exceptModelKeys[model.ModelPublicID] {
+				skippedCount++
+				continue
+			}
+			if model.Active == enableValue {
+				skippedCount++
+				continue
+			}
+			idsToUpdate = append(idsToUpdate, model.ID)
+		}
+
+		if len(idsToUpdate) > 0 {
+			updateFilter := domainmodel.ProviderModelFilter{
+				IDs: &idsToUpdate,
+			}
+			updated, err := h.providerModelService.BatchUpdateActive(ctx, updateFilter, enableValue)
+			if err != nil {
+				return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to batch update provider models")
+			}
+			modelsUpdated = updated
+		}
+	} else {
+		modelsUpdated, err = h.providerModelService.BatchUpdateActive(ctx, filter, enableValue)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to batch update provider models")
+		}
+		skippedCount = totalCount - modelsUpdated
+	}
+
+	return &modelresponses.BulkOperationResponse{
+		UpdatedCount: int(modelsUpdated),
+		SkippedCount: int(skippedCount),
+		FailedCount:  0,
+		TotalChecked: int(totalCount),
+	}, nil
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/projecthandler/project_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/projecthandler/project_handler.go
new file mode 100644
index 00000000..c2ce650b
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/projecthandler/project_handler.go
@@ -0,0 +1,161 @@
+package projecthandler
+
+import (
+	"context"
+	"strconv"
+	"strings"
+	"time"
+
+	"jan-server/services/llm-api/internal/domain/project"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/requests/projectreq"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses/projectres"
+	"jan-server/services/llm-api/internal/utils/idgen"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type ProjectHandler struct {
+	projectService *project.ProjectService
+}
+
+func NewProjectHandler(projectService *project.ProjectService) *ProjectHandler {
+	return &ProjectHandler{
+		projectService: projectService,
+	}
+}
+
+// CreateProject creates a new project
+func (h *ProjectHandler) CreateProject(
+	ctx context.Context,
+	userID uint,
+	req projectreq.CreateProjectRequest,
+) (*projectres.ProjectResponse, error) {
+	// Trim and validate input
+	req.Name = strings.TrimSpace(req.Name)
+	if req.Instruction != nil {
+		trimmed := strings.TrimSpace(*req.Instruction)
+		req.Instruction = &trimmed
+	}
+
+	// Generate public ID
+	publicID, err := idgen.GenerateSecureID("proj", 16)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to generate project ID")
+	}
+
+	// Create project entity
+	proj := project.NewProject(publicID, userID, req.Name, req.Instruction)
+
+	// Persist project
+	proj, err = h.projectService.CreateProject(ctx, proj)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to create project")
+	}
+
+	return projectres.NewProjectResponse(proj), nil
+}
+
+// GetProject retrieves a single project
+func (h *ProjectHandler) GetProject(
+	ctx context.Context,
+	userID uint,
+	projectID string,
+) (*projectres.ProjectResponse, error) {
+	proj, err := h.projectService.GetProjectByPublicIDAndUserID(ctx, projectID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get project")
+	}
+
+	return projectres.NewProjectResponse(proj), nil
+}
+
+// ListProjects lists all projects for a user
+func (h *ProjectHandler) ListProjects(
+	ctx context.Context,
+	userID uint,
+	pagination *query.Pagination,
+) (*projectres.ProjectListResponse, error) {
+	// Fetch limit+1 to determine hasMore
+	var requestedLimit *int
+	if pagination != nil && pagination.Limit != nil {
+		requestedLimit = pagination.Limit
+		extraLimit := *pagination.Limit + 1
+		pagination.Limit = &extraLimit
+	}
+
+	projects, total, err := h.projectService.ListProjectsByUserID(ctx, userID, pagination)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to list projects")
+	}
+
+	// Calculate hasMore
+	hasMore := false
+	var nextCursor *string
+	if requestedLimit != nil && len(projects) > *requestedLimit {
+		hasMore = true
+		lastIndex := *requestedLimit - 1
+		cursorValue := strconv.FormatUint(uint64(projects[lastIndex].ID), 10)
+		nextCursor = &cursorValue
+		projects = projects[:*requestedLimit]
+	}
+
+	return projectres.NewProjectListResponse(projects, hasMore, nextCursor, total), nil
+}
+
+// UpdateProject updates a project
+func (h *ProjectHandler) UpdateProject(
+	ctx context.Context,
+	userID uint,
+	projectID string,
+	req projectreq.UpdateProjectRequest,
+) (*projectres.ProjectResponse, error) {
+	// Get existing project
+	proj, err := h.projectService.GetProjectByPublicIDAndUserID(ctx, projectID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to get project")
+	}
+
+	// Update fields
+	if req.Name != nil {
+		proj.Name = strings.TrimSpace(*req.Name)
+	}
+	if req.Instruction != nil {
+		trimmed := strings.TrimSpace(*req.Instruction)
+		proj.Instruction = &trimmed
+	}
+	if req.Favorite != nil {
+		proj.Favorite = *req.Favorite
+	}
+	if req.Archived != nil {
+		if *req.Archived {
+			now := time.Now()
+			proj.ArchivedAt = &now
+		} else {
+			proj.ArchivedAt = nil
+		}
+	}
+
+	proj.UpdatedAt = time.Now()
+
+	// Persist changes
+	proj, err = h.projectService.UpdateProject(ctx, proj)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to update project")
+	}
+
+	return projectres.NewProjectResponse(proj), nil
+}
+
+// DeleteProject deletes a project
+func (h *ProjectHandler) DeleteProject(
+	ctx context.Context,
+	userID uint,
+	projectID string,
+) (*projectres.ProjectDeletedResponse, error) {
+	err := h.projectService.DeleteProject(ctx, projectID, userID)
+	if err != nil {
+		return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "failed to delete project")
+	}
+
+	return projectres.NewProjectDeletedResponse(projectID), nil
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/handlers/usersettingshandler/user_settings_handler.go b/services/llm-api/internal/interfaces/httpserver/handlers/usersettingshandler/user_settings_handler.go
new file mode 100644
index 00000000..dd31bcc7
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/handlers/usersettingshandler/user_settings_handler.go
@@ -0,0 +1,147 @@
+package usersettingshandler
+
+import (
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/llm-api/internal/domain/usersettings"
+	authhandler "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+)
+
+// UserSettingsHandler handles user settings HTTP requests.
+type UserSettingsHandler struct {
+	service *usersettings.Service
+	logger  zerolog.Logger
+}
+
+// NewUserSettingsHandler constructs a new handler instance.
+func NewUserSettingsHandler(service *usersettings.Service, logger zerolog.Logger) *UserSettingsHandler {
+	return &UserSettingsHandler{
+		service: service,
+		logger:  logger,
+	}
+}
+
+// GetSettings handles GET /v1/users/me/settings
+// @Summary Get user settings
+// @Description Retrieve current user's settings including memory preferences
+// @Tags User Settings
+// @Security BearerAuth
+// @Produce json
+// @Success 200 {object} UserSettingsResponse
+// @Failure 401 {object} responses.ErrorResponse
+// @Failure 500 {object} responses.ErrorResponse
+// @Router /v1/users/me/settings [get]
+func (h *UserSettingsHandler) GetSettings(c *gin.Context) {
+	user, ok := authhandler.GetUserFromContext(c)
+	if !ok {
+		responses.HandleErrorWithStatus(c, http.StatusUnauthorized, nil, "user not authenticated")
+		return
+	}
+
+	settings, err := h.service.GetOrCreateSettings(c.Request.Context(), user.ID)
+	if err != nil {
+		h.logger.Error().Err(err).Uint("user_id", user.ID).Msg("failed to get user settings")
+		responses.HandleErrorWithStatus(c, http.StatusInternalServerError, err, "failed to retrieve settings")
+		return
+	}
+
+	c.JSON(http.StatusOK, toResponse(settings))
+}
+
+// UpdateSettings handles PATCH /v1/users/me/settings
+// @Summary Update user settings
+// @Description Update current user's settings (partial update supported)
+// @Tags User Settings
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param settings body usersettings.UpdateRequest true "Settings to update"
+// @Success 200 {object} UserSettingsResponse
+// @Failure 400 {object} responses.ErrorResponse
+// @Failure 401 {object} responses.ErrorResponse
+// @Failure 500 {object} responses.ErrorResponse
+// @Router /v1/users/me/settings [patch]
+func (h *UserSettingsHandler) UpdateSettings(c *gin.Context) {
+	user, ok := authhandler.GetUserFromContext(c)
+	if !ok {
+		responses.HandleErrorWithStatus(c, http.StatusUnauthorized, nil, "user not authenticated")
+		return
+	}
+
+	var req usersettings.UpdateRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		responses.HandleErrorWithStatus(c, http.StatusBadRequest, err, "invalid request body")
+		return
+	}
+
+	// Validate profile settings if provided
+	if req.ProfileSettings != nil {
+		if req.ProfileSettings.BaseStyle != "" && !req.ProfileSettings.BaseStyle.IsValid() {
+			responses.HandleErrorWithStatus(c, http.StatusBadRequest, nil,
+				"profile_settings.base_style must be one of: Concise, Friendly, Professional")
+			return
+		}
+	}
+
+	// Validate memory config ranges if provided
+	if req.MemoryConfig != nil {
+		if req.MemoryConfig.MaxUserItems < 0 || req.MemoryConfig.MaxUserItems > 20 {
+			responses.HandleErrorWithStatus(c, http.StatusBadRequest, nil, "memory_config.max_user_items must be between 0 and 20")
+			return
+		}
+		if req.MemoryConfig.MaxProjectItems < 0 || req.MemoryConfig.MaxProjectItems > 50 {
+			responses.HandleErrorWithStatus(c, http.StatusBadRequest, nil, "memory_config.max_project_items must be between 0 and 50")
+			return
+		}
+		if req.MemoryConfig.MaxEpisodicItems < 0 || req.MemoryConfig.MaxEpisodicItems > 20 {
+			responses.HandleErrorWithStatus(c, http.StatusBadRequest, nil, "memory_config.max_episodic_items must be between 0 and 20")
+			return
+		}
+		if req.MemoryConfig.MinSimilarity < 0.0 || req.MemoryConfig.MinSimilarity > 1.0 {
+			responses.HandleErrorWithStatus(c, http.StatusBadRequest, nil, "memory_config.min_similarity must be between 0.0 and 1.0")
+			return
+		}
+	}
+
+	settings, err := h.service.UpdateSettings(c.Request.Context(), user.ID, req)
+	if err != nil {
+		h.logger.Error().Err(err).Uint("user_id", user.ID).Msg("failed to update user settings")
+		responses.HandleErrorWithStatus(c, http.StatusInternalServerError, err, "failed to update settings")
+		return
+	}
+
+	c.JSON(http.StatusOK, toResponse(settings))
+}
+
+// UserSettingsResponse is the JSON response for user settings.
+type UserSettingsResponse struct {
+	ID               uint                          `json:"id"`
+	UserID           uint                          `json:"user_id"`
+	MemoryConfig     usersettings.MemoryConfig     `json:"memory_config"`
+	ProfileSettings  usersettings.ProfileSettings  `json:"profile_settings"`
+	AdvancedSettings usersettings.AdvancedSettings `json:"advanced_settings"`
+	EnableTrace      bool                          `json:"enable_trace"`
+	EnableTools      bool                          `json:"enable_tools"`
+	Preferences      map[string]interface{}        `json:"preferences"`
+	CreatedAt        string                        `json:"created_at"`
+	UpdatedAt        string                        `json:"updated_at"`
+}
+
+func toResponse(settings *usersettings.UserSettings) UserSettingsResponse {
+	return UserSettingsResponse{
+		ID:               settings.ID,
+		UserID:           settings.UserID,
+		MemoryConfig:     settings.MemoryConfig,
+		ProfileSettings:  settings.ProfileSettings,
+		AdvancedSettings: settings.AdvancedSettings,
+		EnableTrace:      settings.EnableTrace,
+		EnableTools:      settings.EnableTools,
+		Preferences:      settings.Preferences,
+		CreatedAt:        settings.CreatedAt.Format("2006-01-02T15:04:05Z07:00"),
+		UpdatedAt:        settings.UpdatedAt.Format("2006-01-02T15:04:05Z07:00"),
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/http_server.go b/services/llm-api/internal/interfaces/httpserver/http_server.go
new file mode 100644
index 00000000..f4ea4b44
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/http_server.go
@@ -0,0 +1,110 @@
+package httpserver
+
+import (
+	"fmt"
+	"net/http"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/infrastructure"
+	middleware "jan-server/services/llm-api/internal/interfaces/httpserver/middlewares"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/auth"
+	v1 "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1"
+
+	"github.com/gin-gonic/gin"
+	swaggerFiles "github.com/swaggo/files"
+	ginSwagger "github.com/swaggo/gin-swagger"
+
+	_ "jan-server/services/llm-api/docs/swagger"
+)
+
+type HTTPServer struct {
+	engine    *gin.Engine
+	infra     *infrastructure.Infrastructure
+	v1Route   *v1.V1Route
+	authRoute *auth.AuthRoute
+	config    *config.Config
+}
+
+func (s *HTTPServer) bindSwagger() {
+	g := s.engine.Group("/")
+
+	// Serve swagger UI with custom URL pointing to combined swagger if available
+	g.GET("/api/swagger/*any", func(c *gin.Context) {
+		// If requesting doc.json, serve the combined version
+		if c.Param("any") == "/doc.json" {
+			ServeCombinedSwagger()(c)
+			return
+		}
+		// Otherwise serve from swagger assets
+		ginSwagger.WrapHandler(swaggerFiles.Handler)(c)
+	})
+}
+
+func NewHttpServer(
+	v1Route *v1.V1Route,
+	authRoute *auth.AuthRoute,
+	infra *infrastructure.Infrastructure,
+	cfg *config.Config,
+) *HTTPServer {
+	gin.SetMode(gin.ReleaseMode)
+	server := HTTPServer{
+		gin.New(),
+		infra,
+		v1Route,
+		authRoute,
+		cfg,
+	}
+	server.engine.Use(middleware.RequestID())
+	server.engine.Use(middleware.TracingMiddleware(cfg.ServiceName))
+	server.engine.Use(middleware.LoggingMiddleware(infra.Logger))
+	server.engine.Use(middleware.CORSMiddleware())
+
+	// Root health check (for backwards compatibility)
+	server.engine.GET("/healthz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "ok"})
+	})
+
+	server.engine.GET("/readyz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "ready"})
+	})
+
+	server.engine.GET("/healthcheck", func(c *gin.Context) {
+		c.JSON(200, "ok")
+	})
+
+	server.bindSwagger()
+	return &server
+}
+
+func (httpServer *HTTPServer) Run() error {
+	// Public routes (no auth required)
+	root := httpServer.engine.Group("/")
+
+	// Protected routes (auth middleware applied)
+	protected := httpServer.engine.Group("/")
+	protected.Use(
+		middleware.AuthMiddleware(httpServer.infra.KeycloakValidator, httpServer.infra.Logger, httpServer.config.Issuer),
+		middleware.CORSMiddleware(),
+	)
+
+	// /llm prefixed routes (mirror behaviour for Kong proxy paths)
+	llmRoot := httpServer.engine.Group("/llm")
+	llmProtected := llmRoot.Group("/")
+	llmProtected.Use(
+		middleware.AuthMiddleware(httpServer.infra.KeycloakValidator, httpServer.infra.Logger, httpServer.config.Issuer),
+		middleware.CORSMiddleware(),
+	)
+
+	// Register auth routes (passes both public and protected routers)
+	httpServer.authRoute.RegisterRouter(root, protected)
+	httpServer.authRoute.RegisterRouter(llmRoot, llmProtected)
+
+	// Register v1 routes (with auth middleware)
+	httpServer.v1Route.RegisterRouter(protected)
+	httpServer.v1Route.RegisterRouter(llmProtected)
+
+	if err := httpServer.engine.Run(fmt.Sprintf(":%d", httpServer.config.HTTPPort)); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/middlewares/auth.go b/services/llm-api/internal/interfaces/httpserver/middlewares/auth.go
new file mode 100644
index 00000000..e177485f
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/middlewares/auth.go
@@ -0,0 +1,331 @@
+package middlewares
+
+import (
+	"errors"
+	"net/http"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/llm-api/internal/domain"
+	authvalidator "jan-server/services/llm-api/internal/infrastructure/auth"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+)
+
+const principalContextKey = "principal"
+
+// AuthMiddleware validates API key headers injected by Kong or JWT bearer tokens issued by Keycloak.
+func AuthMiddleware(validator *authvalidator.KeycloakValidator, logger zerolog.Logger, fallbackIssuer string) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		apiPrincipal, hasAPIKey := principalFromAPIKey(c, fallbackIssuer)
+		jwtPrincipal, hasJWT, jwtErr := principalFromJWT(c, validator)
+
+		if jwtErr != nil && !errors.Is(jwtErr, http.ErrNoCookie) {
+			logger.Error().Err(jwtErr).Msg("jwt validation failed")
+			responses.HandleErrorWithStatus(c, http.StatusUnauthorized, jwtErr, "unauthorized")
+			return
+		}
+
+		switch {
+		case hasAPIKey && hasJWT:
+			merged, err := mergePrincipals(apiPrincipal, jwtPrincipal)
+			if err != nil {
+				logger.Warn().Err(err).Msg("principal mismatch between JWT and API key")
+				responses.HandleErrorWithStatus(c, http.StatusUnauthorized, err, "conflicting credentials")
+				return
+			}
+			setPrincipal(c, merged)
+		case hasJWT:
+			setPrincipal(c, jwtPrincipal)
+		case hasAPIKey:
+			setPrincipal(c, apiPrincipal)
+		default:
+			logger.Warn().
+				Str("path", c.FullPath()).
+				Str("method", c.Request.Method).
+				Msg("unauthenticated request")
+			responses.HandleErrorWithStatus(c, http.StatusUnauthorized, errors.New("authentication required"), "unauthorized")
+			return
+		}
+
+		c.Next()
+	}
+}
+
+// PrincipalFromContext returns the authenticated principal, if any.
+func PrincipalFromContext(c *gin.Context) (domain.Principal, bool) {
+	val, ok := c.Get(principalContextKey)
+	if !ok {
+		return domain.Principal{}, false
+	}
+	principal, ok := val.(domain.Principal)
+	return principal, ok
+}
+
+func setPrincipal(c *gin.Context, principal domain.Principal) {
+	c.Set(principalContextKey, principal)
+	c.Request.Header.Set("X-Principal-Id", principal.ID)
+	c.Request.Header.Set("X-Auth-Method", string(principal.AuthMethod))
+	if principal.ID != "" {
+		c.Request.Header.Set("X-User-ID", principal.ID)
+		c.Writer.Header().Set("X-User-ID", principal.ID)
+	}
+	if principal.Subject != "" {
+		c.Request.Header.Set("X-User-Subject", principal.Subject)
+		c.Writer.Header().Set("X-User-Subject", principal.Subject)
+	}
+	if principal.Username != "" {
+		c.Request.Header.Set("X-User-Username", principal.Username)
+		c.Writer.Header().Set("X-User-Username", principal.Username)
+	}
+	if principal.Email != "" {
+		c.Request.Header.Set("X-User-Email", principal.Email)
+		c.Writer.Header().Set("X-User-Email", principal.Email)
+	}
+	if len(principal.Scopes) > 0 {
+		c.Request.Header.Set("X-Scopes", strings.Join(principal.Scopes, " "))
+	}
+	c.Writer.Header().Set("X-Principal-Id", principal.ID)
+	c.Writer.Header().Set("X-Auth-Method", string(principal.AuthMethod))
+	if len(principal.Scopes) > 0 {
+		c.Writer.Header().Set("X-Scopes", strings.Join(principal.Scopes, " "))
+	}
+}
+
+func principalFromAPIKey(c *gin.Context, fallbackIssuer string) (domain.Principal, bool) {
+	headers := c.Request.Header
+
+	// Prefer gateway injected headers (custom plugin) if available
+	if principal, ok := principalFromGatewayHeaders(headers, fallbackIssuer); ok {
+		return principal, true
+	}
+
+	// Fallback to classic Kong consumer headers
+	if headers.Get("X-Credential-Identifier") == "" {
+		return domain.Principal{}, false
+	}
+
+	consumerID := headers.Get("X-Consumer-ID")
+	if consumerID == "" {
+		return domain.Principal{}, false
+	}
+
+	username := headers.Get("X-Consumer-Username")
+	customID := headers.Get("X-Consumer-Custom-ID")
+
+	principalID := firstNonEmpty(customID, username, consumerID)
+	if principalID == "" {
+		return domain.Principal{}, false
+	}
+
+	scopes := parseScopes(headers.Get("X-Consumer-Groups"))
+	credentials := map[string]string{
+		"consumer_id":        consumerID,
+		"consumer_custom_id": customID,
+		"consumer_username":  username,
+	}
+	if credID := headers.Get("X-Credential-Identifier"); credID != "" {
+		credentials["credential_identifier"] = credID
+	}
+	if route := headers.Get("X-Route-Id"); route != "" {
+		credentials["route_id"] = route
+	}
+
+	return domain.Principal{
+		ID:          principalID,
+		AuthMethod:  domain.AuthMethodAPIKey,
+		Subject:     principalID,
+		Issuer:      fallbackIssuer,
+		Username:    username,
+		Scopes:      scopes,
+		Credentials: credentials,
+	}, true
+}
+
+func principalFromJWT(c *gin.Context, validator *authvalidator.KeycloakValidator) (domain.Principal, bool, error) {
+	if validator == nil {
+		return domain.Principal{}, false, http.ErrNoCookie
+	}
+
+	authHeader := c.GetHeader("Authorization")
+	if authHeader == "" {
+		return domain.Principal{}, false, http.ErrNoCookie
+	}
+	parts := strings.SplitN(authHeader, " ", 2)
+	if len(parts) != 2 || !strings.EqualFold(parts[0], "Bearer") {
+		return domain.Principal{}, false, http.ErrNoCookie
+	}
+	token := strings.TrimSpace(parts[1])
+	if token == "" {
+		return domain.Principal{}, false, http.ErrNoCookie
+	}
+	claims, err := validator.Validate(c.Request.Context(), token)
+	if err != nil {
+		return domain.Principal{}, false, err
+	}
+	credentials := map[string]string{
+		"token_id": claims.TokenID,
+	}
+	if claims.Issuer != "" {
+		credentials["issuer"] = claims.Issuer
+	}
+	if claims.Picture != "" {
+		credentials["picture"] = claims.Picture
+	}
+	if claims.AuthorizedParty != "" {
+		credentials["authorized_party"] = claims.AuthorizedParty
+	}
+
+	return domain.Principal{
+		ID:              claims.Subject,
+		AuthMethod:      domain.AuthMethodJWT,
+		Subject:         claims.Subject,
+		Issuer:          claims.Issuer,
+		AuthorizedParty: claims.AuthorizedParty,
+		Audience:        claims.Audience,
+		Username:        claims.PreferredUsername,
+		Email:           claims.Email,
+		Name:            claims.Name,
+		Scopes:          claims.Scopes,
+		Credentials:     credentials,
+	}, true, nil
+}
+
+func mergePrincipals(apiPrincipal, jwtPrincipal domain.Principal) (domain.Principal, error) {
+	if apiPrincipal.Subject != "" && jwtPrincipal.Subject != "" && !strings.EqualFold(apiPrincipal.Subject, jwtPrincipal.Subject) {
+		return domain.Principal{}, errors.New("principal subjects mismatch")
+	}
+
+	merged := jwtPrincipal
+	merged.AuthMethod = domain.AuthMethodJWT
+	merged.Credentials = map[string]string{}
+	for k, v := range jwtPrincipal.Credentials {
+		merged.Credentials[k] = v
+	}
+	for k, v := range apiPrincipal.Credentials {
+		merged.Credentials[k] = v
+	}
+	merged.Credentials["authenticated_via"] = "jwt+api_key"
+	merged.Credentials["api_key_subject"] = apiPrincipal.Subject
+	merged.Credentials["api_key_consumer_id"] = apiPrincipal.Credentials["consumer_id"]
+	merged.Credentials["api_key_username"] = apiPrincipal.Username
+
+	if merged.Username == "" {
+		merged.Username = apiPrincipal.Username
+	}
+	if merged.Email == "" {
+		merged.Email = apiPrincipal.Email
+	}
+	if merged.Name == "" {
+		merged.Name = apiPrincipal.Name
+	}
+
+	merged.Scopes = mergeScopes(jwtPrincipal.Scopes, apiPrincipal.Scopes)
+
+	return merged, nil
+}
+
+func mergeScopes(primary, secondary []string) []string {
+	if len(secondary) == 0 {
+		return primary
+	}
+	seen := make(map[string]struct{}, len(primary)+len(secondary))
+	var out []string
+	for _, scope := range primary {
+		scope = strings.TrimSpace(scope)
+		if scope == "" {
+			continue
+		}
+		if _, exists := seen[scope]; !exists {
+			out = append(out, scope)
+			seen[scope] = struct{}{}
+		}
+	}
+	for _, scope := range secondary {
+		scope = strings.TrimSpace(scope)
+		if scope == "" {
+			continue
+		}
+		if _, exists := seen[scope]; !exists {
+			out = append(out, scope)
+			seen[scope] = struct{}{}
+		}
+	}
+	return out
+}
+
+func principalFromGatewayHeaders(headers http.Header, fallbackIssuer string) (domain.Principal, bool) {
+	userID := strings.TrimSpace(headers.Get("X-User-ID"))
+	subject := strings.TrimSpace(headers.Get("X-User-Subject"))
+	authMethod := strings.TrimSpace(headers.Get("X-Auth-Method"))
+
+	if userID == "" && subject == "" && !strings.EqualFold(authMethod, string(domain.AuthMethodAPIKey)) {
+		return domain.Principal{}, false
+	}
+
+	principalID := firstNonEmpty(
+		userID,
+		subject,
+		headers.Get("X-Consumer-Custom-ID"),
+		headers.Get("X-Consumer-ID"),
+	)
+	if principalID == "" {
+		return domain.Principal{}, false
+	}
+
+	credentials := map[string]string{}
+	if userID != "" {
+		credentials["gateway_user_id"] = userID
+	}
+	if subject != "" {
+		credentials["gateway_subject"] = subject
+	}
+	if consumerID := headers.Get("X-Consumer-ID"); consumerID != "" {
+		credentials["consumer_id"] = consumerID
+	}
+	if consumerCustomID := headers.Get("X-Consumer-Custom-ID"); consumerCustomID != "" {
+		credentials["consumer_custom_id"] = consumerCustomID
+	}
+	if consumerUsername := headers.Get("X-Consumer-Username"); consumerUsername != "" {
+		credentials["consumer_username"] = consumerUsername
+	}
+	if credID := headers.Get("X-Credential-Identifier"); credID != "" {
+		credentials["credential_identifier"] = credID
+	}
+
+	return domain.Principal{
+		ID:          principalID,
+		AuthMethod:  domain.AuthMethodAPIKey,
+		Subject:     firstNonEmpty(subject, principalID),
+		Issuer:      fallbackIssuer,
+		Username:    firstNonEmpty(headers.Get("X-User-Username"), headers.Get("X-Consumer-Username")),
+		Email:       headers.Get("X-User-Email"),
+		Scopes:      parseScopes(headers.Get("X-Consumer-Groups")),
+		Credentials: credentials,
+	}, true
+}
+
+func firstNonEmpty(values ...string) string {
+	for _, value := range values {
+		if trimmed := strings.TrimSpace(value); trimmed != "" {
+			return trimmed
+		}
+	}
+	return ""
+}
+
+func parseScopes(raw string) []string {
+	if raw == "" {
+		return nil
+	}
+	items := strings.Split(raw, ",")
+	var out []string
+	for _, item := range items {
+		item = strings.TrimSpace(item)
+		if item != "" {
+			out = append(out, item)
+		}
+	}
+	return out
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/middlewares/cors.go b/services/llm-api/internal/interfaces/httpserver/middlewares/cors.go
new file mode 100644
index 00000000..b18941f7
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/middlewares/cors.go
@@ -0,0 +1,39 @@
+package middlewares
+
+import (
+	"github.com/gin-gonic/gin"
+)
+
+// CORSMiddleware returns a middleware that handles CORS
+func CORSMiddleware() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		origin := c.Request.Header.Get("Origin")
+
+		// List of allowed origins
+		allowedOrigins := map[string]bool{
+			"http://localhost":      true,
+			"http://localhost:3000": true,
+			"http://localhost:8080": true,
+			"http://127.0.0.1":      true,
+		}
+
+		// Check if origin is allowed
+		if allowedOrigins[origin] {
+			c.Writer.Header().Set("Access-Control-Allow-Origin", origin)
+			c.Writer.Header().Set("Access-Control-Allow-Credentials", "true")
+		}
+
+		c.Writer.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, PATCH, DELETE, OPTIONS")
+		c.Writer.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, X-API-Key, Idempotency-Key, X-Request-Id, Mcp-Session-Id")
+		c.Writer.Header().Set("Access-Control-Expose-Headers", "X-Request-Id")
+		c.Writer.Header().Set("Access-Control-Max-Age", "3600")
+
+		// Handle preflight requests
+		if c.Request.Method == "OPTIONS" {
+			c.AbortWithStatus(204)
+			return
+		}
+
+		c.Next()
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/middlewares/logging.go b/services/llm-api/internal/interfaces/httpserver/middlewares/logging.go
new file mode 100644
index 00000000..de975824
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/middlewares/logging.go
@@ -0,0 +1,60 @@
+package middlewares
+
+import (
+	"time"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+	"go.opentelemetry.io/otel/trace"
+)
+
+// LoggingMiddleware logs HTTP requests with OpenTelemetry trace context
+func LoggingMiddleware(logger zerolog.Logger) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		start := time.Now()
+		path := c.Request.URL.Path
+		raw := c.Request.URL.RawQuery
+
+		// Process request
+		c.Next()
+
+		// Calculate latency
+		latency := time.Since(start)
+		clientIP := c.ClientIP()
+		method := c.Request.Method
+		statusCode := c.Writer.Status()
+		errorMessage := c.Errors.ByType(gin.ErrorTypePrivate).String()
+
+		// Build log event
+		logEvent := logger.Info()
+		if statusCode >= 500 {
+			logEvent = logger.Error()
+		} else if statusCode >= 400 {
+			logEvent = logger.Warn()
+		}
+
+		// Add OpenTelemetry trace context if available
+		span := trace.SpanFromContext(c.Request.Context())
+		if span.SpanContext().IsValid() {
+			logEvent = logEvent.
+				Str("trace_id", span.SpanContext().TraceID().String()).
+				Str("span_id", span.SpanContext().SpanID().String())
+		}
+
+		// Add request ID if available
+		if requestID := c.GetString("request_id"); requestID != "" {
+			logEvent = logEvent.Str("request_id", requestID)
+		}
+
+		// Log the request
+		logEvent.
+			Str("client_ip", clientIP).
+			Str("method", method).
+			Str("path", path).
+			Str("query", raw).
+			Int("status", statusCode).
+			Dur("latency", latency).
+			Str("user_agent", c.Request.UserAgent()).
+			Msg(errorMessage)
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/middlewares/request_id.go b/services/llm-api/internal/interfaces/httpserver/middlewares/request_id.go
new file mode 100644
index 00000000..ccfcf721
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/middlewares/request_id.go
@@ -0,0 +1,32 @@
+package middlewares
+
+import (
+	"github.com/gin-gonic/gin"
+	"github.com/google/uuid"
+)
+
+const requestIDHeader = "X-Request-Id"
+
+// RequestID injects an X-Request-Id header when missing and makes it available via gin context.
+func RequestID() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		requestID := c.GetHeader(requestIDHeader)
+		if requestID == "" {
+			requestID = uuid.NewString()
+			c.Request.Header.Set(requestIDHeader, requestID)
+		}
+		c.Writer.Header().Set(requestIDHeader, requestID)
+		c.Set(requestIDHeader, requestID)
+		c.Next()
+	}
+}
+
+// RequestIDFromContext returns the request id stored in the gin context.
+func RequestIDFromContext(c *gin.Context) string {
+	if val, ok := c.Get(requestIDHeader); ok {
+		if id, ok := val.(string); ok {
+			return id
+		}
+	}
+	return ""
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/middlewares/sse.go b/services/llm-api/internal/interfaces/httpserver/middlewares/sse.go
new file mode 100644
index 00000000..047a08aa
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/middlewares/sse.go
@@ -0,0 +1,17 @@
+package middlewares
+
+import (
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+)
+
+// PrepareSSE configures the HTTP response for Server Sent Events responses.
+func PrepareSSE(c *gin.Context) (http.Flusher, bool) {
+	c.Writer.Header().Set("Content-Type", "text/event-stream")
+	c.Writer.Header().Set("Cache-Control", "no-cache")
+	c.Writer.Header().Set("Connection", "keep-alive")
+	c.Writer.Header().Set("Transfer-Encoding", "chunked")
+	flusher, ok := c.Writer.(http.Flusher)
+	return flusher, ok
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/middlewares/tracing.go b/services/llm-api/internal/interfaces/httpserver/middlewares/tracing.go
new file mode 100644
index 00000000..570e39a1
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/middlewares/tracing.go
@@ -0,0 +1,70 @@
+package middlewares
+
+import (
+	"github.com/gin-gonic/gin"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/codes"
+	"go.opentelemetry.io/otel/propagation"
+	semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
+	"go.opentelemetry.io/otel/trace"
+)
+
+// TracingMiddleware creates spans for each HTTP request
+func TracingMiddleware(serviceName string) gin.HandlerFunc {
+	tracer := otel.Tracer(serviceName)
+	propagator := otel.GetTextMapPropagator()
+
+	return func(c *gin.Context) {
+		// Extract trace context from incoming request headers
+		ctx := propagator.Extract(c.Request.Context(), propagation.HeaderCarrier(c.Request.Header))
+
+		// Start a new span
+		spanName := c.Request.Method + " " + c.FullPath()
+		if spanName == "" || c.FullPath() == "" {
+			spanName = c.Request.Method + " " + c.Request.URL.Path
+		}
+
+		ctx, span := tracer.Start(
+			ctx,
+			spanName,
+			trace.WithSpanKind(trace.SpanKindServer),
+			trace.WithAttributes(
+				semconv.HTTPMethod(c.Request.Method),
+				semconv.HTTPRoute(c.FullPath()),
+				semconv.HTTPURL(c.Request.URL.String()),
+				semconv.HTTPTarget(c.Request.URL.Path),
+				semconv.HTTPScheme(c.Request.URL.Scheme),
+				semconv.NetHostName(c.Request.Host),
+				semconv.HTTPUserAgent(c.Request.UserAgent()),
+				attribute.String("http.client_ip", c.ClientIP()),
+			),
+		)
+		defer span.End()
+
+		// Store span in context for use in handlers
+		c.Request = c.Request.WithContext(ctx)
+
+		// Add request ID to span if available
+		if requestID := c.GetString("request_id"); requestID != "" {
+			span.SetAttributes(attribute.String("request.id", requestID))
+		}
+
+		// Process request
+		c.Next()
+
+		// Record response status
+		status := c.Writer.Status()
+		span.SetAttributes(semconv.HTTPStatusCode(status))
+
+		// Set span status based on HTTP status code
+		if status >= 400 {
+			span.SetStatus(codes.Error, c.Errors.String())
+			if len(c.Errors) > 0 {
+				span.RecordError(c.Errors.Last())
+			}
+		} else {
+			span.SetStatus(codes.Ok, "")
+		}
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/requests/chat/chat.go b/services/llm-api/internal/interfaces/httpserver/requests/chat/chat.go
new file mode 100644
index 00000000..ccf3b662
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/requests/chat/chat.go
@@ -0,0 +1,92 @@
+package chatrequests
+
+import (
+	"encoding/json"
+
+	"jan-server/services/llm-api/internal/domain/conversation"
+
+	openai "github.com/sashabaranov/go-openai"
+)
+
+// ChatCompletionRequest extends OpenAI's ChatCompletionRequest with conversation support
+type ChatCompletionRequest struct {
+	openai.ChatCompletionRequest
+
+	// Conversation can be either a string (conversation ID) or a conversation object
+	// Items from this conversation are prepended to Messages for this response request.
+	// Input items and output items from this response are automatically added to this conversation after completion.
+	Conversation *ConversationReference `json:"conversation,omitempty"`
+	// Store controls whether the latest input and generated response should be persisted
+	Store *bool `json:"store,omitempty"`
+	// StoreReasoning controls whether reasoning content (if present) should also be persisted
+	StoreReasoning *bool `json:"store_reasoning,omitempty"`
+}
+
+// ConversationReference can unmarshal from either a string (ID) or an object
+type ConversationReference struct {
+	ID     *string                    `json:"-"` // Conversation ID when provided as string
+	Object *conversation.Conversation `json:"-"` // Conversation object when provided as object
+}
+
+// UnmarshalJSON implements custom unmarshaling to support both string and object types
+// This is required because OpenAI's API allows conversation to be either:
+//   - A string: "conversation": "conv_abc123"
+//   - An object: "conversation": {"id": "conv_abc123", ...}
+func (c *ConversationReference) UnmarshalJSON(data []byte) error {
+	// Try to unmarshal as string first
+	var str string
+	if err := json.Unmarshal(data, &str); err == nil {
+		c.ID = &str
+		return nil
+	}
+
+	// If not a string, try to unmarshal as conversation object
+	var obj conversation.Conversation
+	if err := json.Unmarshal(data, &obj); err != nil {
+		return err
+	}
+	c.Object = &obj
+	return nil
+}
+
+// MarshalJSON implements custom marshaling
+func (c *ConversationReference) MarshalJSON() ([]byte, error) {
+	if c.ID != nil {
+		return json.Marshal(*c.ID)
+	}
+	if c.Object != nil {
+		return json.Marshal(*c.Object)
+	}
+	return json.Marshal(nil)
+}
+
+// IsEmpty returns true if the conversation reference is empty
+// Note: Includes nil check for defensive programming. Callers should still check for nil
+// before calling this method to avoid potential panics.
+func (c *ConversationReference) IsEmpty() bool {
+	return c == nil || (c.ID == nil && c.Object == nil)
+}
+
+// GetID returns the conversation ID, whether it was provided directly or from an object
+// Returns empty string if the reference is nil or has no ID.
+func (c *ConversationReference) GetID() string {
+	if c == nil {
+		return ""
+	}
+	if c.ID != nil {
+		return *c.ID
+	}
+	if c.Object != nil {
+		return c.Object.PublicID
+	}
+	return ""
+}
+
+// GetConversation returns the conversation object if provided
+// Returns nil if the reference is nil or contains only an ID string.
+func (c *ConversationReference) GetConversation() *conversation.Conversation {
+	if c == nil {
+		return nil
+	}
+	return c.Object
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/requests/conversation/conversation.go b/services/llm-api/internal/interfaces/httpserver/requests/conversation/conversation.go
new file mode 100644
index 00000000..dcb3f85e
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/requests/conversation/conversation.go
@@ -0,0 +1,46 @@
+package conversationrequests
+
+import "jan-server/services/llm-api/internal/domain/conversation"
+
+// CreateConversationRequest represents the request to create a conversation
+type CreateConversationRequest struct {
+	Title     *string             `json:"title,omitempty"`
+	Items     []conversation.Item `json:"items,omitempty"`
+	Metadata  map[string]string   `json:"metadata,omitempty"`
+	Referrer  *string             `json:"referrer,omitempty"`
+	ProjectID *string             `json:"project_id,omitempty"`
+}
+
+// UpdateConversationRequest represents the request to update a conversation
+type UpdateConversationRequest struct {
+	Title    *string           `json:"title,omitempty"`
+	Metadata map[string]string `json:"metadata,omitempty"`
+	Referrer *string           `json:"referrer,omitempty"`
+}
+
+// CreateItemsRequest represents the request to create items in a conversation
+type CreateItemsRequest struct {
+	Items []conversation.Item `json:"items" binding:"required"`
+}
+
+// ListConversationsQueryParams represents query parameters for listing conversations
+type ListConversationsQueryParams struct {
+	Referrer *string `form:"referrer"`
+	Limit    *int    `form:"limit"`
+	Order    *string `form:"order"`
+	After    *string `form:"after"`
+	Scope    *string `form:"scope"`
+}
+
+// ListItemsQueryParams represents query parameters for listing items
+type ListItemsQueryParams struct {
+	After   *string  `form:"after"`
+	Include []string `form:"include"`
+	Limit   *int     `form:"limit"`
+	Order   *string  `form:"order"`
+}
+
+// GetItemQueryParams represents query parameters for getting a single item
+type GetItemQueryParams struct {
+	Include []string `form:"include"`
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/requests/models/filters.go b/services/llm-api/internal/interfaces/httpserver/requests/models/filters.go
new file mode 100644
index 00000000..38ad4d86
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/requests/models/filters.go
@@ -0,0 +1,14 @@
+package requestmodels
+
+type ModelCatalogFilterParams struct {
+	Status      *string `form:"status"`
+	IsModerated *bool   `form:"is_moderated"`
+	Active      *bool   `form:"active"`
+}
+
+type ProviderModelFilterParams struct {
+	ProviderPublicID *string `form:"provider_id"`
+	ModelKey         *string `form:"model_key"`
+	Active           *bool   `form:"active"`
+	SupportsImages   *bool   `form:"supports_images"`
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/requests/models/model.go b/services/llm-api/internal/interfaces/httpserver/requests/models/model.go
new file mode 100644
index 00000000..e5742143
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/requests/models/model.go
@@ -0,0 +1,138 @@
+package requestmodels
+
+import (
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+)
+
+type AddProviderRequest struct {
+	Name     string            `json:"name" binding:"required"`
+	Vendor   string            `json:"vendor" binding:"required"`
+	BaseURL  string            `json:"base_url" binding:"required"`
+	APIKey   string            `json:"api_key"`
+	Metadata map[string]string `json:"metadata"`
+	Active   *bool             `json:"active"`
+}
+
+type UpdateProviderRequest struct {
+	Name     *string            `json:"name"`
+	BaseURL  *string            `json:"base_url"`
+	APIKey   *string            `json:"api_key"`
+	Metadata *map[string]string `json:"metadata"`
+	Active   *bool              `json:"active"`
+}
+
+type UpdateModelCatalogRequest struct {
+	SupportedParameters *domainmodel.SupportedParameters `json:"supported_parameters"`
+	Architecture        *domainmodel.Architecture        `json:"architecture"`
+	Tags                *[]string                        `json:"tags"`
+	Notes               *string                          `json:"notes"`
+	IsModerated         *bool                            `json:"is_moderated"`
+	Extras              *map[string]any                  `json:"extras"`
+}
+
+type UpdateProviderModelRequest struct {
+	DisplayName        *string                  `json:"display_name"`
+	Pricing            *domainmodel.Pricing     `json:"pricing"`
+	TokenLimits        *domainmodel.TokenLimits `json:"token_limits"`
+	Family             *string                  `json:"family"`
+	SupportsImages     *bool                    `json:"supports_images"`
+	SupportsEmbeddings *bool                    `json:"supports_embeddings"`
+	SupportsReasoning  *bool                    `json:"supports_reasoning"`
+	SupportsAudio      *bool                    `json:"supports_audio"`
+	SupportsVideo      *bool                    `json:"supports_video"`
+	Active             *bool                    `json:"active"`
+}
+
+type BulkEnableModelsRequest struct {
+	Enable       *bool    `json:"enable" binding:"required"`             // Required: true to enable, false to disable
+	ExceptModels []string `json:"except_models"`                         // List of model keys to exclude
+	ProviderID   *string  `json:"provider_id" binding:"omitempty,min=1"` // Optional: filter by provider
+}
+
+// Normalize removes duplicates and trims whitespace from model keys
+func (r *BulkEnableModelsRequest) Normalize() {
+	if len(r.ExceptModels) == 0 {
+		return
+	}
+
+	seen := make(map[string]bool)
+	normalized := make([]string, 0, len(r.ExceptModels))
+	for _, key := range r.ExceptModels {
+		trimmed := trimWhitespace(key)
+		if trimmed == "" {
+			continue
+		}
+		if !seen[trimmed] {
+			seen[trimmed] = true
+			normalized = append(normalized, trimmed)
+		}
+	}
+	r.ExceptModels = normalized
+}
+
+type BulkToggleCatalogsRequest struct {
+	Enable       *bool    `json:"enable" binding:"required"`        // Required: true to enable, false to disable
+	CatalogIDs   []string `json:"catalog_ids"  binding:"omitempty"` // Optional: specific catalog public IDs. If empty, applies to all catalogs
+	ExceptModels []string `json:"except_models"`                    // List of model keys to exclude from the operation
+}
+
+// Normalize removes duplicates and trims whitespace from catalog IDs and model keys
+func (r *BulkToggleCatalogsRequest) Normalize() {
+	// Normalize catalog IDs
+	if len(r.CatalogIDs) > 0 {
+		seen := make(map[string]bool)
+		normalized := make([]string, 0, len(r.CatalogIDs))
+		for _, id := range r.CatalogIDs {
+			trimmed := trimWhitespace(id)
+			if trimmed == "" {
+				continue
+			}
+			if !seen[trimmed] {
+				seen[trimmed] = true
+				normalized = append(normalized, trimmed)
+			}
+		}
+		r.CatalogIDs = normalized
+	}
+
+	// Normalize except models
+	if len(r.ExceptModels) > 0 {
+		seen := make(map[string]bool)
+		normalized := make([]string, 0, len(r.ExceptModels))
+		for _, key := range r.ExceptModels {
+			trimmed := trimWhitespace(key)
+			if trimmed == "" {
+				continue
+			}
+			if !seen[trimmed] {
+				seen[trimmed] = true
+				normalized = append(normalized, trimmed)
+			}
+		}
+		r.ExceptModels = normalized
+	}
+}
+
+// trimWhitespace is a helper function to trim whitespace
+func trimWhitespace(input string) string {
+	// Trim leading and trailing whitespace
+	result := ""
+	start := 0
+	end := len(input) - 1
+
+	// Find first non-space character
+	for start <= end && (input[start] == ' ' || input[start] == '\t' || input[start] == '\n' || input[start] == '\r') {
+		start++
+	}
+
+	// Find last non-space character
+	for end >= start && (input[end] == ' ' || input[end] == '\t' || input[end] == '\n' || input[end] == '\r') {
+		end--
+	}
+
+	if start <= end {
+		result = input[start : end+1]
+	}
+
+	return result
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/requests/projectreq/requests.go b/services/llm-api/internal/interfaces/httpserver/requests/projectreq/requests.go
new file mode 100644
index 00000000..f07a74c9
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/requests/projectreq/requests.go
@@ -0,0 +1,15 @@
+package projectreq
+
+// CreateProjectRequest represents the request to create a project
+type CreateProjectRequest struct {
+	Name        string  `json:"name" binding:"required"`
+	Instruction *string `json:"instruction,omitempty"`
+}
+
+// UpdateProjectRequest represents the request to update a project
+type UpdateProjectRequest struct {
+	Name        *string `json:"name,omitempty"`
+	Instruction *string `json:"instruction,omitempty"`
+	Archived    *bool   `json:"is_archived,omitempty"`
+	Favorite    *bool   `json:"is_favorite,omitempty"`
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/requests/query.go b/services/llm-api/internal/interfaces/httpserver/requests/query.go
new file mode 100644
index 00000000..9a57d233
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/requests/query.go
@@ -0,0 +1,72 @@
+package requests
+
+import (
+	"strconv"
+
+	"github.com/gin-gonic/gin"
+	"jan-server/services/llm-api/internal/domain/query"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+func GetCursorPaginationFromQuery(reqCtx *gin.Context, findByLastID func(string) (*uint, error)) (*query.Pagination, error) {
+	limitStr := reqCtx.DefaultQuery("limit", "20")
+	offsetStr := reqCtx.Query("offset")
+	order := reqCtx.DefaultQuery("order", "desc")
+	afterStr := reqCtx.DefaultQuery("after", "")
+	if afterStr == "" {
+		if cursor := reqCtx.Query("cursor"); cursor != "" {
+			afterStr = cursor
+		}
+	}
+
+	var limit *int
+	if limitStr != "" {
+		limitInt, err := strconv.Atoi(limitStr)
+		if err != nil || limitInt < 1 {
+			return nil, platformerrors.NewError(reqCtx.Request.Context(), platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "invalid limit number", nil, "04aecd25-bd32-428b-864d-aeb7ecb06e53")
+		}
+		limit = &limitInt
+	}
+
+	var offset *int
+	var after *uint
+	if offsetStr != "" {
+		offsetInt, err := strconv.Atoi(offsetStr)
+		if err != nil {
+			return nil, platformerrors.NewError(reqCtx.Request.Context(), platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "invalid offset number", nil, "a3e0ea22-afc6-45df-b686-a194868af415")
+		}
+		offset = &offsetInt
+	} else if afterStr != "" {
+		if findByLastID != nil {
+			lastID, err := findByLastID(afterStr)
+			if err != nil {
+				return nil, platformerrors.NewError(reqCtx.Request.Context(), platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "invalid offset number", nil, "1f9ee4ee-56ed-448e-9296-d978c9a03726")
+			}
+			after = lastID
+		} else {
+			parsedID, err := strconv.ParseUint(afterStr, 10, 64)
+			if err != nil {
+				return nil, platformerrors.NewError(reqCtx.Request.Context(), platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "invalid pagination cursor", err, "9a5c2c48-5c59-4f40-9f27-5861e9c62d2f")
+			}
+			tempID := uint(parsedID)
+			after = &tempID
+		}
+	}
+
+	if order != "asc" && order != "desc" {
+		return nil, platformerrors.NewError(reqCtx.Request.Context(), platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "invalid order", nil, "c3598493-7770-4e94-b44f-f571aabf2bdd")
+	}
+
+	return &query.Pagination{
+		Limit:  limit,
+		Offset: offset,
+		Order:  order,
+		After:  after,
+	}, nil
+}
+
+func GetPaginationFromQuery(reqCtx *gin.Context) (*query.Pagination, error) {
+	return GetCursorPaginationFromQuery(reqCtx, func(s string) (*uint, error) {
+		return nil, platformerrors.NewError(reqCtx.Request.Context(), platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "invalid query parameter: last", nil, "6b72a4af-ea95-4fbc-b141-486f4da86e79")
+	})
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/responses/chat/chat.go b/services/llm-api/internal/interfaces/httpserver/responses/chat/chat.go
new file mode 100644
index 00000000..9cf605c5
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/responses/chat/chat.go
@@ -0,0 +1,33 @@
+package chatresponses
+
+import (
+	openai "github.com/sashabaranov/go-openai"
+)
+
+// ChatCompletionResponse extends OpenAI's ChatCompletionResponse with conversation context
+type ChatCompletionResponse struct {
+	openai.ChatCompletionResponse
+	Conversation *ConversationContext `json:"conversation,omitempty"`
+}
+
+// ConversationContext represents the conversation associated with this response
+type ConversationContext struct {
+	ID    string  `json:"id"`              // The unique ID of the conversation
+	Title *string `json:"title,omitempty"` // The title of the conversation (optional)
+}
+
+// NewChatCompletionResponse creates a response with optional conversation context
+func NewChatCompletionResponse(openaiResp *openai.ChatCompletionResponse, conversationID string, conversationTitle *string) *ChatCompletionResponse {
+	resp := &ChatCompletionResponse{
+		ChatCompletionResponse: *openaiResp,
+	}
+
+	if conversationID != "" {
+		resp.Conversation = &ConversationContext{
+			ID:    conversationID,
+			Title: conversationTitle,
+		}
+	}
+
+	return resp
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/responses/conversation/conversation.go b/services/llm-api/internal/interfaces/httpserver/responses/conversation/conversation.go
new file mode 100644
index 00000000..4d091ed3
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/responses/conversation/conversation.go
@@ -0,0 +1,149 @@
+package conversationresponses
+
+import (
+	"jan-server/services/llm-api/internal/domain/conversation"
+)
+
+// ConversationResponse represents the OpenAI-compatible conversation response
+type ConversationResponse struct {
+	ID        string            `json:"id"`
+	Object    string            `json:"object"`
+	Title     *string           `json:"title,omitempty"`
+	CreatedAt int64             `json:"created_at"`
+	Metadata  map[string]string `json:"metadata,omitempty"`
+	Referrer  *string           `json:"referrer,omitempty"`
+	ProjectID *string           `json:"project_id,omitempty"`
+}
+
+// ConversationListResponse represents a paginated list of conversations
+type ConversationListResponse struct {
+	Object  string                 `json:"object"`
+	Data    []ConversationResponse `json:"data"`
+	FirstID string                 `json:"first_id"`
+	LastID  string                 `json:"last_id"`
+	HasMore bool                   `json:"has_more"`
+	Total   int64                  `json:"total"`
+}
+
+// ConversationDeletedResponse represents the delete confirmation response
+type ConversationDeletedResponse struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Deleted bool   `json:"deleted"`
+}
+
+// ItemListResponse represents the OpenAI-compatible item list response
+type ItemListResponse struct {
+	Object  string              `json:"object"`
+	Data    []conversation.Item `json:"data"`
+	FirstID string              `json:"first_id"`
+	LastID  string              `json:"last_id"`
+	HasMore bool                `json:"has_more"`
+}
+
+// NewConversationResponse creates a response from a domain conversation
+func NewConversationResponse(conv *conversation.Conversation) *ConversationResponse {
+	response := &ConversationResponse{
+		ID:        conv.PublicID,
+		Object:    "conversation",
+		Title:     conv.Title,
+		CreatedAt: conv.CreatedAt.Unix(),
+		Metadata:  conv.Metadata,
+		Referrer:  conv.Referrer,
+		ProjectID: conv.ProjectPublicID,
+	}
+	return response
+}
+
+// NewConversationListResponse creates a conversation list response
+func NewConversationListResponse(conversations []*conversation.Conversation, hasMore bool, total int64) *ConversationListResponse {
+	data := make([]ConversationResponse, 0, len(conversations))
+	for _, conv := range conversations {
+		if conv == nil {
+			continue
+		}
+		resp := NewConversationResponse(conv)
+		if resp != nil {
+			data = append(data, *resp)
+		}
+	}
+
+	firstID := ""
+	lastID := ""
+	if len(data) > 0 {
+		firstID = data[0].ID
+		lastID = data[len(data)-1].ID
+	}
+
+	return &ConversationListResponse{
+		Object:  "list",
+		Data:    data,
+		FirstID: firstID,
+		LastID:  lastID,
+		HasMore: hasMore,
+		Total:   total,
+	}
+}
+
+// NewConversationDeletedResponse creates a delete response
+func NewConversationDeletedResponse(publicID string) *ConversationDeletedResponse {
+	return &ConversationDeletedResponse{
+		ID:      publicID,
+		Object:  "conversation.deleted",
+		Deleted: true,
+	}
+}
+
+// NewItemListResponse creates an item list response
+func NewItemListResponse(items []conversation.Item, hasMore bool) *ItemListResponse {
+	if len(items) == 0 {
+		return &ItemListResponse{
+			Object:  "list",
+			Data:    []conversation.Item{},
+			FirstID: "",
+			LastID:  "",
+			HasMore: false,
+		}
+	}
+
+	return &ItemListResponse{
+		Object:  "list",
+		Data:    items,
+		FirstID: items[0].PublicID,
+		LastID:  items[len(items)-1].PublicID,
+		HasMore: hasMore,
+	}
+}
+
+// ItemResponse is just the item itself (OpenAI compatibility)
+type ItemResponse = conversation.Item
+
+// ConversationItemCreatedResponse represents the response after adding items
+type ConversationItemCreatedResponse struct {
+	Object  string              `json:"object"`
+	Data    []conversation.Item `json:"data"`
+	FirstID string              `json:"first_id"`
+	LastID  string              `json:"last_id"`
+	HasMore bool                `json:"has_more"`
+}
+
+// NewConversationItemCreatedResponse creates a response for created items
+func NewConversationItemCreatedResponse(items []conversation.Item) *ConversationItemCreatedResponse {
+	if len(items) == 0 {
+		return &ConversationItemCreatedResponse{
+			Object:  "list",
+			Data:    []conversation.Item{},
+			FirstID: "",
+			LastID:  "",
+			HasMore: false,
+		}
+	}
+
+	return &ConversationItemCreatedResponse{
+		Object:  "list",
+		Data:    items,
+		FirstID: items[0].PublicID,
+		LastID:  items[len(items)-1].PublicID,
+		HasMore: false,
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/responses/model/model.go b/services/llm-api/internal/interfaces/httpserver/responses/model/model.go
new file mode 100644
index 00000000..a7459df9
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/responses/model/model.go
@@ -0,0 +1,300 @@
+package modelresponses
+
+import (
+	"strings"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+)
+
+type ModelResponse struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Created int64  `json:"created"`
+	OwnedBy string `json:"owned_by"`
+}
+
+type ModelResponseList struct {
+	Object string          `json:"object"`
+	Data   []ModelResponse `json:"data"`
+}
+
+type ModelResponseWithProvider struct {
+	ID             string `json:"id"`
+	Object         string `json:"object"`
+	Created        int64  `json:"created"`
+	OwnedBy        string `json:"owned_by"`
+	ProviderID     string `json:"provider_id"`
+	ProviderVendor string `json:"provider_vendor"`
+	ProviderName   string `json:"provider_name"`
+}
+
+type ModelWithProviderResponseList struct {
+	Object string                      `json:"object"`
+	Data   []ModelResponseWithProvider `json:"data"`
+}
+
+type ProviderResponse struct {
+	ID       string            `json:"id"`
+	Name     string            `json:"name"`
+	Vendor   string            `json:"vendor"`
+	BaseURL  string            `json:"base_url"`
+	Active   bool              `json:"active"`
+	Metadata map[string]string `json:"metadata,omitempty"`
+}
+
+type ProviderWithModelCountResponse struct {
+	ID               string            `json:"id"`
+	Name             string            `json:"name"`
+	Vendor           string            `json:"vendor"`
+	BaseURL          string            `json:"base_url"`
+	Active           bool              `json:"active"`
+	ModelCount       int64             `json:"model_count"`
+	ModelActiveCount int64             `json:"model_active_count"`
+	Metadata         map[string]string `json:"metadata,omitempty"`
+}
+
+type ProviderWithModelsResponse struct {
+	ID       string            `json:"id"`
+	Name     string            `json:"name"`
+	Vendor   string            `json:"vendor"`
+	BaseURL  string            `json:"base_url"`
+	Models   []ModelResponse   `json:"models"`
+	Active   bool              `json:"active"`
+	Metadata map[string]string `json:"metadata,omitempty"`
+}
+
+type ProviderResponseList struct {
+	Object string             `json:"object"`
+	Data   []ProviderResponse `json:"data"`
+}
+
+func BuildModelResponseListWithProvider(
+	providerModels []*domainmodel.ProviderModel,
+	providerByID map[uint]*domainmodel.Provider,
+) []ModelResponseWithProvider {
+	items := make([]ModelResponseWithProvider, 0, len(providerModels))
+
+	for _, pm := range providerModels {
+		if pm == nil {
+			continue
+		}
+		provider := providerByID[pm.ProviderID]
+		if provider == nil {
+			continue
+		}
+		items = append(items, ModelResponseWithProvider{
+			ID:             pm.ModelPublicID,
+			Object:         "model",
+			Created:        pm.CreatedAt.Unix(),
+			OwnedBy:        provider.DisplayName,
+			ProviderID:     provider.PublicID,
+			ProviderVendor: strings.ToLower(string(provider.Kind)),
+			ProviderName:   provider.DisplayName,
+		})
+	}
+
+	return items
+}
+
+func BuildModelResponseList(
+	providerModels []*domainmodel.ProviderModel,
+	providerByID map[uint]*domainmodel.Provider,
+) []ModelResponse {
+	items := make([]ModelResponse, 0, len(providerModels))
+
+	for _, pm := range providerModels {
+		if pm == nil {
+			continue
+		}
+		provider := providerByID[pm.ProviderID]
+		if provider == nil {
+			continue
+		}
+		items = append(items, ModelResponse{
+			ID:      pm.ModelPublicID,
+			Object:  "model",
+			Created: pm.CreatedAt.Unix(),
+			OwnedBy: provider.DisplayName,
+		})
+	}
+
+	return items
+}
+
+func BuildProviderResponse(provider *domainmodel.Provider) ProviderResponse {
+	return ProviderResponse{
+		ID:       provider.PublicID,
+		Name:     provider.DisplayName,
+		Vendor:   strings.ToLower(string(provider.Kind)),
+		BaseURL:  provider.BaseURL,
+		Active:   provider.Active,
+		Metadata: provider.Metadata,
+	}
+}
+
+func BuildProviderWithModelCountResponse(
+	provider *domainmodel.Provider,
+	modelCount int64,
+	activeCount int64,
+) ProviderWithModelCountResponse {
+	return ProviderWithModelCountResponse{
+		ID:               provider.PublicID,
+		Name:             provider.DisplayName,
+		Vendor:           strings.ToLower(string(provider.Kind)),
+		BaseURL:          provider.BaseURL,
+		Active:           provider.Active,
+		ModelCount:       modelCount,
+		ModelActiveCount: activeCount,
+		Metadata:         provider.Metadata,
+	}
+}
+
+func BuildProviderWithModelsResponse(
+	provider *domainmodel.Provider,
+	models []*domainmodel.ProviderModel,
+) *ProviderWithModelsResponse {
+	if provider == nil {
+		return nil
+	}
+
+	modelResponses := make([]ModelResponse, 0, len(models))
+	for _, model := range models {
+		if model == nil {
+			continue
+		}
+		modelResponses = append(modelResponses, ModelResponse{
+			ID:      model.ModelPublicID,
+			Object:  "model",
+			Created: model.CreatedAt.Unix(),
+			OwnedBy: provider.DisplayName,
+		})
+	}
+	return &ProviderWithModelsResponse{
+		ID:       provider.PublicID,
+		Name:     provider.DisplayName,
+		Vendor:   strings.ToLower(string(provider.Kind)),
+		BaseURL:  provider.BaseURL,
+		Models:   modelResponses,
+		Active:   provider.Active,
+		Metadata: provider.Metadata,
+	}
+}
+
+func BuildProviderResponseWithModels(
+	provider *domainmodel.Provider,
+	models []*domainmodel.ProviderModel,
+) *ProviderWithModelsResponse {
+	return BuildProviderWithModelsResponse(provider, models)
+}
+
+func BuildProviderResponseList(providers []*domainmodel.Provider) []ProviderResponse {
+	items := make([]ProviderResponse, 0, len(providers))
+
+	for _, provider := range providers {
+		if provider == nil {
+			continue
+		}
+		items = append(items, BuildProviderResponse(provider))
+	}
+
+	return items
+}
+
+type ModelCatalogResponse struct {
+	ID                  string                          `json:"id"`
+	SupportedParameters domainmodel.SupportedParameters `json:"supported_parameters"`
+	Architecture        domainmodel.Architecture        `json:"architecture"`
+	Tags                []string                        `json:"tags,omitempty"`
+	Notes               *string                         `json:"notes,omitempty"`
+	IsModerated         *bool                           `json:"is_moderated,omitempty"`
+	Active              *bool                           `json:"active,omitempty"`
+	Extras              map[string]any                  `json:"extras,omitempty"`
+	Status              domainmodel.ModelCatalogStatus  `json:"status"`
+	LastSyncedAt        *int64                          `json:"last_synced_at,omitempty"`
+	CreatedAt           int64                           `json:"created_at"`
+	UpdatedAt           int64                           `json:"updated_at"`
+}
+
+type ProviderModelResponse struct {
+	ID                      string                   `json:"id"`
+	ProviderID              string                   `json:"provider_id"`
+	ProviderVendor          string                   `json:"provider_vendor"`
+	ModelCatalogID          *string                  `json:"model_catalog_id,omitempty"`
+	ModelPublicID           string                   `json:"model_public_id"`
+	ProviderOriginalModelID string                   `json:"provider_original_model_id"`
+	DisplayName             string                   `json:"display_name"`
+	Pricing                 domainmodel.Pricing      `json:"pricing"`
+	TokenLimits             *domainmodel.TokenLimits `json:"token_limits,omitempty"`
+	Family                  *string                  `json:"family,omitempty"`
+	SupportsImages          bool                     `json:"supports_images"`
+	SupportsEmbeddings      bool                     `json:"supports_embeddings"`
+	SupportsReasoning       bool                     `json:"supports_reasoning"`
+	SupportsAudio           bool                     `json:"supports_audio"`
+	SupportsVideo           bool                     `json:"supports_video"`
+	Active                  bool                     `json:"active"`
+	CreatedAt               int64                    `json:"created_at"`
+	UpdatedAt               int64                    `json:"updated_at"`
+}
+
+func BuildModelCatalogResponse(catalog *domainmodel.ModelCatalog) ModelCatalogResponse {
+	var lastSyncedAt *int64
+	if catalog.LastSyncedAt != nil {
+		ts := catalog.LastSyncedAt.Unix()
+		lastSyncedAt = &ts
+	}
+
+	return ModelCatalogResponse{
+		ID:                  catalog.PublicID,
+		SupportedParameters: catalog.SupportedParameters,
+		Architecture:        catalog.Architecture,
+		Tags:                catalog.Tags,
+		Notes:               catalog.Notes,
+		IsModerated:         catalog.IsModerated,
+		Active:              catalog.Active,
+		Extras:              catalog.Extras,
+		Status:              catalog.Status,
+		LastSyncedAt:        lastSyncedAt,
+		CreatedAt:           catalog.CreatedAt.Unix(),
+		UpdatedAt:           catalog.UpdatedAt.Unix(),
+	}
+}
+
+func BuildProviderModelResponse(
+	providerModel *domainmodel.ProviderModel,
+	provider *domainmodel.Provider,
+	modelCatalog *domainmodel.ModelCatalog,
+) ProviderModelResponse {
+	var modelCatalogID *string
+	if modelCatalog != nil {
+		modelCatalogID = &modelCatalog.PublicID
+	}
+
+	return ProviderModelResponse{
+		ID:                      providerModel.PublicID,
+		ProviderID:              provider.PublicID,
+		ProviderVendor:          strings.ToLower(string(provider.Kind)),
+		ModelCatalogID:          modelCatalogID,
+		ModelPublicID:           providerModel.ModelPublicID,
+		ProviderOriginalModelID: providerModel.ProviderOriginalModelID,
+		DisplayName:             providerModel.DisplayName,
+		Pricing:                 providerModel.Pricing,
+		TokenLimits:             providerModel.TokenLimits,
+		Family:                  providerModel.Family,
+		SupportsImages:          providerModel.SupportsImages,
+		SupportsEmbeddings:      providerModel.SupportsEmbeddings,
+		SupportsReasoning:       providerModel.SupportsReasoning,
+		SupportsAudio:           providerModel.SupportsAudio,
+		SupportsVideo:           providerModel.SupportsVideo,
+		Active:                  providerModel.Active,
+		CreatedAt:               providerModel.CreatedAt.Unix(),
+		UpdatedAt:               providerModel.UpdatedAt.Unix(),
+	}
+}
+
+type BulkOperationResponse struct {
+	UpdatedCount int      `json:"updated_count"`
+	SkippedCount int      `json:"skipped_count,omitempty"`
+	FailedCount  int      `json:"failed_count,omitempty"`
+	TotalChecked int      `json:"total_checked,omitempty"`
+	FailedModels []string `json:"failed_models,omitempty"`
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/responses/projectres/responses.go b/services/llm-api/internal/interfaces/httpserver/responses/projectres/responses.go
new file mode 100644
index 00000000..8c4a18e1
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/responses/projectres/responses.go
@@ -0,0 +1,89 @@
+package projectres
+
+import (
+	"jan-server/services/llm-api/internal/domain/project"
+)
+
+// ProjectResponse represents a single project response
+type ProjectResponse struct {
+	ID          string  `json:"id"`
+	Object      string  `json:"object"`
+	Name        string  `json:"name"`
+	Instruction *string `json:"instruction,omitempty"`
+	Favorite    bool    `json:"is_favorite"`
+	IsArchived  bool    `json:"is_archived"`
+	ArchivedAt  *int64  `json:"archived_at,omitempty"`
+	CreatedAt   int64   `json:"created_at"`
+	UpdatedAt   int64   `json:"updated_at"`
+}
+
+// ProjectListResponse represents a paginated list of projects
+type ProjectListResponse struct {
+	Object  string            `json:"object"`
+	Data    []ProjectResponse `json:"data"`
+	FirstID string            `json:"first_id,omitempty"`
+	LastID  string            `json:"last_id,omitempty"`
+	NextCursor *string        `json:"next_cursor,omitempty"`
+	HasMore bool              `json:"has_more"`
+	Total   int64             `json:"total"`
+}
+
+// ProjectDeletedResponse represents the delete confirmation response
+type ProjectDeletedResponse struct {
+	ID      string `json:"id"`
+	Object  string `json:"object"`
+	Deleted bool   `json:"deleted"`
+}
+
+// NewProjectResponse creates a response from a domain project
+func NewProjectResponse(proj *project.Project) *ProjectResponse {
+	resp := &ProjectResponse{
+		ID:          proj.PublicID,
+		Object:      "project",
+		Name:        proj.Name,
+		Instruction: proj.Instruction,
+		Favorite:    proj.Favorite,
+		IsArchived:  proj.ArchivedAt != nil,
+		CreatedAt:   proj.CreatedAt.Unix(),
+		UpdatedAt:   proj.UpdatedAt.Unix(),
+	}
+
+	if proj.ArchivedAt != nil {
+		archivedUnix := proj.ArchivedAt.Unix()
+		resp.ArchivedAt = &archivedUnix
+	}
+
+	return resp
+}
+
+// NewProjectListResponse creates a list response from domain projects
+func NewProjectListResponse(projects []*project.Project, hasMore bool, nextCursor *string, total int64) *ProjectListResponse {
+	data := make([]ProjectResponse, len(projects))
+	for i, proj := range projects {
+		data[i] = *NewProjectResponse(proj)
+	}
+
+	resp := &ProjectListResponse{
+		Object:  "list",
+		Data:    data,
+		HasMore: hasMore,
+		Total:   total,
+		NextCursor: nextCursor,
+	}
+
+	if len(data) > 0 {
+		resp.FirstID = data[0].ID
+		resp.LastID = data[len(data)-1].ID
+	}
+
+	return resp
+}
+
+// NewProjectDeletedResponse creates a delete response
+func NewProjectDeletedResponse(publicID string) *ProjectDeletedResponse {
+	return &ProjectDeletedResponse{
+		ID:      publicID,
+		Object:  "project",
+		Deleted: true,
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/responses/response.go b/services/llm-api/internal/interfaces/httpserver/responses/response.go
new file mode 100644
index 00000000..dd4814a2
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/responses/response.go
@@ -0,0 +1,177 @@
+package responses
+
+import (
+	"errors"
+	"net/http"
+	"time"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+
+	"github.com/gin-gonic/gin"
+)
+
+type ErrorResponse struct {
+	Code          string `json:"code"` // UUID from PlatformError
+	Error         string `json:"error"`
+	Message       string `json:"message,omitempty"`
+	ErrorInstance error  `json:"-"`
+	RequestID     string `json:"request_id,omitempty"`
+}
+
+func NewInternalServerError(reqCtx *gin.Context, errResp ErrorResponse) {
+	if errResp.ErrorInstance != nil {
+		reqCtx.Error(errResp.ErrorInstance)
+	}
+	if errResp.Message == "" {
+		errResp.Message = errResp.Error
+	}
+	reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, errResp)
+}
+
+// HandleError handles domain errors and returns appropriate HTTP responses
+// The message parameter is used directly as the error message in the response
+// Status code is automatically determined from the error type
+func HandleError(reqCtx *gin.Context, err error, message string) {
+	var domainErr *platformerrors.PlatformError
+	if errors.As(err, &domainErr) {
+		statusCode := platformerrors.ErrorTypeToHTTPStatus(domainErr.GetErrorType())
+
+		errResp := ErrorResponse{
+			Code:          domainErr.GetUUID(),
+			Error:         message,
+			Message:       message,
+			ErrorInstance: domainErr,
+			RequestID:     domainErr.GetRequestID(),
+		}
+
+		reqCtx.AbortWithStatusJSON(statusCode, errResp)
+		return
+	} else {
+		// assign generic error response for non-domain errors
+		errResp := ErrorResponse{
+			Error:         message,
+			Message:       message,
+			ErrorInstance: err,
+		}
+		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, errResp)
+	}
+}
+
+// HandleErrorWithStatus handles domain errors with a custom status code
+// Use this when you need to override the default status code mapping
+func HandleErrorWithStatus(reqCtx *gin.Context, statusCode int, err error, message string) {
+	var domainErr *platformerrors.PlatformError
+	if errors.As(err, &domainErr) {
+		errResp := ErrorResponse{
+			Code:          domainErr.GetUUID(),
+			Error:         message,
+			Message:       message,
+			ErrorInstance: domainErr,
+			RequestID:     domainErr.GetRequestID(),
+		}
+
+		reqCtx.AbortWithStatusJSON(statusCode, errResp)
+		return
+	} else {
+		// assign generic error response for non-domain errors
+		errResp := ErrorResponse{
+			Error:         message,
+			Message:       message,
+			ErrorInstance: err,
+		}
+		reqCtx.AbortWithStatusJSON(statusCode, errResp)
+	}
+}
+
+// HandleNewError creates a new typed error at the route layer and handles it
+// This is a convenience function for route-level validations and errors
+// The uuid parameter should be provided from the route for error tracking
+func HandleNewError(reqCtx *gin.Context, errorType platformerrors.ErrorType, message string, uuid string) {
+	ctx := reqCtx.Request.Context()
+	// Use the provided UUID
+	err := platformerrors.NewError(ctx, platformerrors.LayerRoute, errorType, message, nil, uuid)
+
+	statusCode := platformerrors.ErrorTypeToHTTPStatus(err.GetErrorType())
+
+	errResp := ErrorResponse{
+		Code:          err.GetUUID(),
+		Error:         message,
+		Message:       message,
+		ErrorInstance: err,
+		RequestID:     err.GetRequestID(),
+	}
+
+	reqCtx.AbortWithStatusJSON(statusCode, errResp)
+}
+
+type GeneralResponse[T any] struct {
+	Status string `json:"status"`
+	Result T      `json:"result"`
+}
+
+type ListResponse[T any] struct {
+	Total   int64   `json:"total"`
+	Results []T     `json:"results"`
+	FirstID *string `json:"first_id"`
+	LastID  *string `json:"last_id"`
+	HasMore bool    `json:"has_more"`
+}
+
+type PageCursor struct {
+	FirstID *string
+	LastID  *string
+	HasMore bool
+	Total   int64
+}
+
+func BuildCursorPage[T any](
+	items []*T,
+	getID func(*T) *string,
+	hasMoreFunc func() ([]*T, error),
+	CountFunc func() (int64, error),
+) (*PageCursor, error) {
+	cursorPage := &PageCursor{}
+	if len(items) > 0 {
+		cursorPage.FirstID = getID(items[0])
+		cursorPage.LastID = getID(items[len(items)-1])
+		moreRecords, err := hasMoreFunc()
+		if len(moreRecords) > 0 {
+			cursorPage.HasMore = true
+		}
+		if err != nil {
+			return nil, err
+		}
+	}
+	count, err := CountFunc()
+	if err != nil {
+		return cursorPage, err
+	}
+	cursorPage.Total = count
+	return cursorPage, nil
+}
+
+func NewCookieWithSecurity(name string, value string, expires time.Time) *http.Cookie {
+	// For cross-origin requests (e.g., frontend at different domain), we need SameSite=None with Secure
+	// This is required for both dev and production when the frontend is on a different origin
+	if config.IsDev() {
+		return &http.Cookie{
+			Name:     name,
+			Value:    value,
+			Expires:  expires,
+			HttpOnly: true,
+			Secure:   true,
+			Path:     "/",
+			SameSite: http.SameSiteNoneMode,
+		}
+	}
+	return &http.Cookie{
+		Name:     name,
+		Value:    value,
+		Expires:  expires,
+		HttpOnly: true,
+		Secure:   true,
+		Path:     "/",
+		SameSite: http.SameSiteNoneMode,
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/auth/auth_route.go b/services/llm-api/internal/interfaces/httpserver/routes/auth/auth_route.go
new file mode 100644
index 00000000..c4ea855b
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/auth/auth_route.go
@@ -0,0 +1,279 @@
+package auth
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/apikeyhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	guestauth "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/guesthandler"
+)
+
+// AuthRoute handles authentication routes
+type AuthRoute struct {
+	guestHandler         *guestauth.GuestHandler
+	upgradeHandler       *guestauth.UpgradeHandler
+	tokenHandler         *authhandler.TokenHandler
+	apiKeyHandler        *apikeyhandler.Handler
+	authHandler          *authhandler.AuthHandler
+	keycloakOAuthHandler *authhandler.KeycloakOAuthHandler
+}
+
+// NewAuthRoute creates a new auth route
+func NewAuthRoute(
+	guestHandler *guestauth.GuestHandler,
+	upgradeHandler *guestauth.UpgradeHandler,
+	tokenHandler *authhandler.TokenHandler,
+	apiKeyHandler *apikeyhandler.Handler,
+	authHandler *authhandler.AuthHandler,
+	keycloakOAuthHandler *authhandler.KeycloakOAuthHandler,
+) *AuthRoute {
+	return &AuthRoute{
+		guestHandler:         guestHandler,
+		upgradeHandler:       upgradeHandler,
+		tokenHandler:         tokenHandler,
+		apiKeyHandler:        apiKeyHandler,
+		authHandler:          authHandler,
+		keycloakOAuthHandler: keycloakOAuthHandler,
+	}
+}
+
+// RegisterRouter registers auth routes
+func (a *AuthRoute) RegisterRouter(router gin.IRouter, protectedRouter gin.IRouter) {
+	// Public routes - Guest login
+	router.POST("/auth/guest-login", a.CreateGuestLogin)
+	router.POST("/auth/refresh-token", a.RefreshToken)
+	router.GET("/auth/logout", a.Logout)
+	router.POST("/auth/logout", a.Logout) // Support both GET and POST for logout
+
+	// Public routes - Keycloak OAuth2/OIDC (simplified)
+	router.GET("/auth/login", a.KeycloakLogin)
+	router.GET("/auth/callback", a.KeycloakCallback)
+	router.POST("/auth/validate", a.ValidateKeycloakToken)
+	router.POST("/auth/revoke", a.RevokeKeycloakToken)
+
+	// API key validation endpoint (for Kong plugin)
+	router.POST("/auth/validate-api-key", a.ValidateAPIKey)
+
+	// Protected routes (require authentication)
+	protectedRouter.POST("/auth/upgrade", a.UpgradeAccount)
+	protectedRouter.GET("/auth/me", a.GetMe)
+	protectedRouter.POST("/auth/api-keys", a.authHandler.WithAppUserAuthChain(a.CreateAPIKey)...)
+	protectedRouter.GET("/auth/api-keys", a.authHandler.WithAppUserAuthChain(a.ListAPIKeys)...)
+	protectedRouter.DELETE("/auth/api-keys/:id", a.authHandler.WithAppUserAuthChain(a.DeleteAPIKey)...)
+}
+
+// CreateGuestLogin godoc
+// @Summary Create guest user account
+// @Description Creates a temporary guest user account and returns JWT tokens. Guest users have limited access and can be upgraded to full accounts later.
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Success 200 {object} object "Guest user created with access and refresh tokens"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error - failed to create guest user"
+// @Router /auth/guest-login [post]
+func (a *AuthRoute) CreateGuestLogin(c *gin.Context) {
+	a.guestHandler.CreateGuest(c)
+}
+
+// RefreshToken godoc
+// @Summary Refresh access token
+// @Description Exchanges a valid refresh token for a new access token. Refresh token must be provided in Authorization header or refresh_token cookie.
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param refresh_token body string false "Refresh token (can also be in Authorization header)"
+// @Success 200 {object} object "New access token and refresh token"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or expired refresh token"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /auth/refresh-token [post]
+func (a *AuthRoute) RefreshToken(c *gin.Context) {
+	a.tokenHandler.RefreshToken(c)
+}
+
+// Logout godoc
+// @Summary Logout user
+// @Description Revokes the current access token and clears authentication cookies. After logout, the user must re-authenticate.
+// @Tags Authentication API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Success 200 {object} object "Successfully logged out"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid token"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /auth/logout [get]
+func (a *AuthRoute) Logout(c *gin.Context) {
+	a.tokenHandler.Logout(c)
+}
+
+// UpgradeAccount godoc
+// @Summary Upgrade guest to permanent account
+// @Description Converts a guest user account to a permanent account with email/password credentials. Guest flag is removed and user gains full access.
+// @Tags Authentication API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param request body object true "Upgrade request with email and password"
+// @Success 200 {object} object "Account upgraded successfully with new tokens"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request - missing email or password"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - not a guest user or invalid token"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /auth/upgrade [post]
+func (a *AuthRoute) UpgradeAccount(c *gin.Context) {
+	a.upgradeHandler.Upgrade(c)
+}
+
+// GetMe godoc
+// @Summary Get current user information
+// @Description Returns the authenticated user's profile information including user ID, email, roles, and guest status.
+// @Tags Authentication API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Success 200 {object} object "User profile information"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or expired token"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /auth/me [get]
+func (a *AuthRoute) GetMe(c *gin.Context) {
+	a.tokenHandler.GetMe(c)
+}
+
+// CreateAPIKey godoc
+// @Summary Create API key
+// @Description Creates a new API key for the authenticated user. API keys provide programmatic access without requiring user credentials.
+// @Tags Authentication API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param request body object true "API key creation request with name and optional scopes"
+// @Success 201 {object} object "API key created successfully with key value"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request - missing required fields"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or expired token"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /auth/api-keys [post]
+func (a *AuthRoute) CreateAPIKey(c *gin.Context) {
+	a.apiKeyHandler.Create(c)
+}
+
+// ListAPIKeys godoc
+// @Summary List user's API keys
+// @Description Returns all API keys created by the authenticated user. Key values are not returned, only metadata.
+// @Tags Authentication API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Success 200 {object} object "List of API keys with metadata"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or expired token"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /auth/api-keys [get]
+func (a *AuthRoute) ListAPIKeys(c *gin.Context) {
+	a.apiKeyHandler.List(c)
+}
+
+// DeleteAPIKey godoc
+// @Summary Delete API key
+// @Description Revokes and deletes an API key by ID. Deleted keys can no longer be used for authentication.
+// @Tags Authentication API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param id path string true "API key ID"
+// @Success 204 "API key deleted successfully"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - invalid or expired token"
+// @Failure 404 {object} responses.ErrorResponse "API key not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /auth/api-keys/{id} [delete]
+func (a *AuthRoute) DeleteAPIKey(c *gin.Context) {
+	a.apiKeyHandler.Delete(c)
+}
+
+// ValidateAPIKey godoc
+// @Summary Validate API key (Kong Plugin)
+// @Description Internal endpoint used by Kong API Gateway to validate API keys. Not intended for direct client use.
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param request body object true "API key validation request"
+// @Success 200 {object} object "API key is valid with user information"
+// @Failure 401 {object} responses.ErrorResponse "Invalid API key"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /auth/validate-api-key [post]
+func (a *AuthRoute) ValidateAPIKey(c *gin.Context) {
+	a.apiKeyHandler.Validate(c)
+}
+
+// KeycloakLogin godoc
+// @Summary Initiate Keycloak OAuth2 login
+// @Description Returns the Keycloak authorization URL for frontend to redirect users. Supports OAuth2 authorization code flow with PKCE.
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param redirect_url query string false "URL to redirect after successful login"
+// @Success 200 {object} object{authorization_url=string,state=string} "Authorization URL and state parameter"
+// @Failure 500 {object} responses.ErrorResponse "Failed to initiate login"
+// @Router /auth/login [get]
+func (a *AuthRoute) KeycloakLogin(c *gin.Context) {
+	if a.keycloakOAuthHandler != nil {
+		a.keycloakOAuthHandler.InitiateLogin(c)
+	} else {
+		c.JSON(500, gin.H{"error": "Keycloak OAuth is not configured"})
+	}
+}
+
+// KeycloakCallback godoc
+// @Summary Handle Keycloak OAuth2 callback
+// @Description Handles the OAuth2 callback from Keycloak, exchanges authorization code for JWT tokens
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param code query string true "Authorization code from Keycloak"
+// @Param state query string true "State parameter for CSRF protection"
+// @Success 200 {object} object{access_token=string,refresh_token=string,expires_in=int,token_type=string} "JWT tokens"
+// @Failure 400 {object} responses.ErrorResponse "Missing code or state"
+// @Failure 401 {object} responses.ErrorResponse "Invalid state parameter"
+// @Failure 500 {object} responses.ErrorResponse "Failed to exchange code for tokens"
+// @Router /auth/callback [get]
+func (a *AuthRoute) KeycloakCallback(c *gin.Context) {
+	if a.keycloakOAuthHandler != nil {
+		a.keycloakOAuthHandler.HandleCallback(c)
+	} else {
+		c.JSON(500, gin.H{"error": "Keycloak OAuth is not configured"})
+	}
+}
+
+// ValidateKeycloakToken godoc
+// @Summary Validate Keycloak access token
+// @Description Validates an access token against Keycloak's userinfo endpoint
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param Authorization header string true "Bearer token"
+// @Success 200 {object} object{valid=bool,user_info=object} "Token is valid with user information"
+// @Failure 401 {object} responses.ErrorResponse "Invalid or expired token"
+// @Failure 500 {object} responses.ErrorResponse "Keycloak OAuth is not configured"
+// @Router /auth/validate [post]
+func (a *AuthRoute) ValidateKeycloakToken(c *gin.Context) {
+	if a.keycloakOAuthHandler != nil {
+		a.keycloakOAuthHandler.ValidateAccessToken(c)
+	} else {
+		c.JSON(500, gin.H{"error": "Keycloak OAuth is not configured"})
+	}
+}
+
+// RevokeKeycloakToken godoc
+// @Summary Revoke Keycloak refresh token
+// @Description Revokes a refresh token to invalidate it
+// @Tags Authentication API
+// @Accept json
+// @Produce json
+// @Param request body object{refresh_token=string} true "Token to revoke"
+// @Success 200 {object} object{message=string} "Token revoked successfully"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request body"
+// @Failure 500 {object} responses.ErrorResponse "Keycloak OAuth is not configured"
+// @Router /auth/revoke [post]
+func (a *AuthRoute) RevokeKeycloakToken(c *gin.Context) {
+	if a.keycloakOAuthHandler != nil {
+		a.keycloakOAuthHandler.RevokeKeycloakToken(c)
+	} else {
+		c.JSON(500, gin.H{"error": "Keycloak OAuth is not configured"})
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/routes_provider.go b/services/llm-api/internal/interfaces/httpserver/routes/routes_provider.go
new file mode 100644
index 00000000..18552ef8
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/routes_provider.go
@@ -0,0 +1,59 @@
+package routes
+
+import (
+	"github.com/google/wire"
+
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/apikeyhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/chathandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/conversationhandler"
+	guestauth "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/guesthandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/projecthandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/usersettingshandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/auth"
+	v1 "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/admin"
+	adminModel "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/model"
+	adminProvider "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/provider"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/chat"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/conversation"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/llm/projects"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/model"
+	modelProvider "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/model/provider"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/users"
+)
+
+var RouteProvider = wire.NewSet(
+	// Handlers
+	authhandler.NewAuthHandler,
+	authhandler.NewTokenHandler,
+	authhandler.ProvideKeycloakOAuthHandler,
+	apikeyhandler.NewHandler,
+	handlers.ProvideMemoryHandler,
+	chathandler.NewChatHandler,
+	conversationhandler.NewConversationHandler,
+	guestauth.NewGuestHandler,
+	guestauth.NewUpgradeHandler,
+	modelhandler.NewProviderHandler,
+	modelhandler.NewModelHandler,
+	modelhandler.NewModelCatalogHandler,
+	modelhandler.NewProviderModelHandler,
+	projecthandler.NewProjectHandler,
+	usersettingshandler.NewUserSettingsHandler,
+
+	// Routes
+	auth.NewAuthRoute,
+	v1.NewV1Route,
+	admin.NewAdminRoute,
+	adminModel.NewAdminModelRoute,
+	adminProvider.NewAdminProviderRoute,
+	chat.NewChatRoute,
+	chat.NewChatCompletionRoute,
+	conversation.NewConversationRoute,
+	projects.NewProjectRoute,
+	model.NewModelRoute,
+	modelProvider.NewModelProviderRoute,
+	users.NewUsersRoute,
+)
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/admin_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/admin_route.go
new file mode 100644
index 00000000..ca89687f
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/admin_route.go
@@ -0,0 +1,34 @@
+package admin
+
+import (
+	adminmodel "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/model"
+	adminprovider "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/provider"
+
+	"github.com/gin-gonic/gin"
+)
+
+// AdminRoute aggregates all admin sub-routes
+type AdminRoute struct {
+	adminModelRoute    *adminmodel.AdminModelRoute
+	adminProviderRoute *adminprovider.AdminProviderRoute
+}
+
+// NewAdminRoute creates a new AdminRoute
+func NewAdminRoute(
+	adminModelRoute *adminmodel.AdminModelRoute,
+	adminProviderRoute *adminprovider.AdminProviderRoute,
+) *AdminRoute {
+	return &AdminRoute{
+		adminModelRoute:    adminModelRoute,
+		adminProviderRoute: adminProviderRoute,
+	}
+}
+
+// RegisterRouter registers admin routes under /admin prefix
+func (r *AdminRoute) RegisterRouter(router gin.IRouter) {
+	adminGroup := router.Group("/admin")
+	{
+		r.adminModelRoute.RegisterRouter(adminGroup)
+		r.adminProviderRoute.RegisterRouter(adminGroup)
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/model/admin_model_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/model/admin_model_route.go
new file mode 100644
index 00000000..83606e29
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/model/admin_model_route.go
@@ -0,0 +1,413 @@
+package model
+
+import (
+	modelHandler "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler"
+
+	"jan-server/services/llm-api/internal/interfaces/httpserver/requests"
+	requestmodels "jan-server/services/llm-api/internal/interfaces/httpserver/requests/models"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+	"net/http"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+)
+
+const HeaderIncludeProviderData = "X-PROVIDER-DATA"
+const MaxExceptModelsLimit = 1000
+
+type AdminModelRoute struct {
+	modelHandler         *modelHandler.ModelHandler
+	modelCatalogHandler  *modelHandler.ModelCatalogHandler
+	providerModelHandler *modelHandler.ProviderModelHandler
+}
+
+func NewAdminModelRoute(
+	modelHandler *modelHandler.ModelHandler,
+	modelCatalogHandler *modelHandler.ModelCatalogHandler,
+	providerModelHandler *modelHandler.ProviderModelHandler,
+) *AdminModelRoute {
+	return &AdminModelRoute{
+		modelHandler:         modelHandler,
+		modelCatalogHandler:  modelCatalogHandler,
+		providerModelHandler: providerModelHandler,
+	}
+}
+
+func (route *AdminModelRoute) RegisterRouter(router *gin.RouterGroup) {
+	modelsRoute := router.Group("models")
+
+	// Model Catalog endpoints
+	catalogRoute := modelsRoute.Group("catalogs")
+	catalogRoute.GET("", route.ListModelCatalogs)
+	catalogRoute.POST("/bulk-toggle", route.BulkToggleModelCatalogs)
+	catalogRoute.GET("/*model_public_id", route.GetModelCatalog)
+	catalogRoute.PATCH("/*model_public_id", route.UpdateModelCatalog)
+
+	// Provider Model endpoints
+	providerModelsRoute := modelsRoute.Group("provider-models")
+	providerModelsRoute.GET("", route.ListProviderModels)
+	providerModelsRoute.GET("/:provider_model_public_id", route.GetProviderModel)
+	providerModelsRoute.PATCH("/:provider_model_public_id", route.UpdateProviderModel)
+	providerModelsRoute.POST("/bulk-toggle", route.BulkToggleProviderModels)
+}
+
+// ListModelCatalogs
+// @Summary List all model catalogs
+// @Description Retrieves a paginated list of model catalogs with optional filtering and searching
+// @Tags Admin Model API
+// @Security BearerAuth
+// @Produce json
+// @Param limit query int false "Number of records to return (default: 20, max: 100)"
+// @Param offset query int false "Number of records to skip for pagination"
+// @Param order query string false "Sort order: asc or desc (default: desc)"
+// @Param status query string false "Filter by status: init, filled, updated"
+// @Param is_moderated query bool false "Filter by moderation status"
+// @Success 200 {object} modelresponses.ModelCatalogResponse "List of model catalogs"
+// @Failure 400 {object} responses.ErrorResponse "Invalid query parameters"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/admin/models/catalogs [get]
+func (route *AdminModelRoute) ListModelCatalogs(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	pagination, err := requests.GetPaginationFromQuery(reqCtx)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Invalid pagination parameters")
+		return
+	}
+
+	filter := route.buildModelCatalogFilter(reqCtx)
+
+	catalogs, total, err := route.modelCatalogHandler.ListCatalogs(ctx, filter, pagination)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to retrieve model catalogs")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, gin.H{
+		"data":  catalogs,
+		"total": total,
+		"limit": pagination.Limit,
+	})
+}
+
+// GetModelCatalog
+// @Summary Get a model catalog entry
+// @Description Retrieves detailed information about a model catalog entry by its public ID (supports IDs with slashes)
+// @Tags Admin Model API
+// @Security BearerAuth
+// @Produce json
+// @Param model_public_id path string true "Model Catalog Public ID (can contain slashes)"
+// @Success 200 {object} modelresponses.ModelCatalogResponse "Model catalog details"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request"
+// @Failure 404 {object} responses.ErrorResponse "Model catalog not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/admin/models/catalogs/{model_public_id} [get]
+func (route *AdminModelRoute) GetModelCatalog(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	publicID := strings.TrimPrefix(reqCtx.Param("model_public_id"), "/")
+
+	catalog, err := route.modelCatalogHandler.GetCatalog(ctx, publicID)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to retrieve model catalog")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, catalog)
+}
+
+// UpdateModelCatalog
+// @Summary Update a model catalog entry
+// @Description Updates metadata for a model catalog entry. Marks it as manually updated to prevent auto-sync overwrites.
+// @Tags Admin Model API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param model_public_id path string true "Model Catalog Public ID (can contain slashes)"
+// @Param payload body requestmodels.UpdateModelCatalogRequest true "Update payload"
+// @Success 200 {object} modelresponses.ModelCatalogResponse "Updated model catalog"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload"
+// @Failure 404 {object} responses.ErrorResponse "Model catalog not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/admin/models/catalogs/{model_public_id} [patch]
+func (route *AdminModelRoute) UpdateModelCatalog(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	publicID := strings.TrimPrefix(reqCtx.Param("model_public_id"), "/")
+
+	var request requestmodels.UpdateModelCatalogRequest
+	if err := reqCtx.ShouldBindJSON(&request); err != nil {
+		responses.HandleError(reqCtx, err, "Invalid request body")
+		return
+	}
+
+	catalog, err := route.modelCatalogHandler.UpdateCatalog(ctx, publicID, request)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to update model catalog")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, catalog)
+}
+
+// BulkToggleCatalogs performs bulk enable/disable operations on provider models
+// associated with model catalogs.
+//
+// This operation supports two modes:
+//
+// Mode 1: Specific Catalogs (catalog_ids provided)
+//  1. Looks up all specified catalog IDs and validates they exist
+//  2. Queries all provider models associated with those catalogs
+//  3. Filters out models in the exception list
+//  4. Skips models already in the desired state (optimization)
+//  5. Updates remaining models and tracks success/failure metrics
+//  6. Returns detailed results including counts and any failures
+//
+// Mode 2: All Catalogs (catalog_ids empty or omitted)
+//  1. Queries ALL model catalogs in the system
+//  2. Queries all provider models for all those catalogs
+//  3. Filters out models in the exception list
+//  4. Skips models already in the desired state (optimization)
+//  5. Updates remaining models and tracks success/failure metrics
+//  6. Returns detailed results including counts and any failures
+//
+// The operation is designed to be fault-tolerant: if individual model updates fail,
+// the operation continues and reports the failures in the response.
+//
+// Supported patterns:
+//   - Enable all models in specific catalogs: {"enable": true, "catalog_ids": ["cat1", "cat2"]}
+//   - Disable all models in specific catalogs: {"enable": false, "catalog_ids": ["cat1", "cat2"]}
+//   - Enable all catalog models except some: {"enable": true, "catalog_ids": ["cat1"], "except_models": ["model1"]}
+//   - Disable all catalog models except some: {"enable": false, "catalog_ids": ["cat1"], "except_models": ["model1"]}
+//   - Enable ALL catalog models globally: {"enable": true}
+//   - Disable ALL catalog models globally: {"enable": false}
+//   - Disable ALL catalog models except specific ones: {"enable": false, "except_models": ["model1", "model2", "model3"]}
+//   - Enable ALL catalog models except specific ones: {"enable": true, "except_models": ["model1", "model2", "model3"]}
+//
+// Example use cases:
+//   - "Enable all GPT-4 models except GPT-4-vision" (provide GPT-4 catalog ID)
+//   - "Disable all Claude models except Claude-3-Opus" (provide Claude catalog IDs)
+//   - "Disable ALL catalog models except 3 specific ones" (no catalog_ids, use except_models)
+//   - "Enable all models in the system" (no catalog_ids, no except_models)
+//
+// @Summary Bulk enable/disable provider models by catalog IDs or all catalogs
+// @Description Enable or disable provider models for specific catalogs or ALL catalogs, with optional exception list. Supports "enable/disable all except" patterns globally or scoped to catalogs.
+// @Tags Admin Model API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param request body requestmodels.BulkToggleCatalogsRequest true "Bulk toggle request. If catalog_ids is empty, applies to ALL catalogs. Use except_models to exclude specific models."
+// @Success 200 {object} modelresponses.BulkOperationResponse "Bulk operation result with counts and status"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request - exceeds limits or validation error"
+// @Failure 404 {object} responses.ErrorResponse "One or more catalog IDs not found (when catalog_ids provided)"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error during bulk operation"
+// @Router /v1/admin/models/catalogs/bulk-toggle [post]
+func (route *AdminModelRoute) BulkToggleModelCatalogs(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	var request requestmodels.BulkToggleCatalogsRequest
+	if err := reqCtx.ShouldBindJSON(&request); err != nil {
+		responses.HandleError(reqCtx, err, "Invalid request body")
+		return
+	}
+
+	if len(request.ExceptModels) > MaxExceptModelsLimit {
+		err := platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "except_models list exceeds maximum limit", nil, "e0e2b831-9ce4-4fff-8fb8-8aef01979d5f")
+		responses.HandleError(reqCtx, err, "Validation error: except_models list exceeds maximum limit")
+		return
+	}
+
+	response, err := route.modelCatalogHandler.BulkToggleCatalogs(ctx, request)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to perform bulk toggle operation on model catalogs")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+func (route *AdminModelRoute) buildModelCatalogFilter(reqCtx *gin.Context) requestmodels.ModelCatalogFilterParams {
+	var filter requestmodels.ModelCatalogFilterParams
+
+	if status := reqCtx.Query("status"); status != "" {
+		filter.Status = &status
+	}
+
+	if isModeratedStr := reqCtx.Query("is_moderated"); isModeratedStr != "" {
+		isModerated := isModeratedStr == "true"
+		filter.IsModerated = &isModerated
+	}
+
+	return filter
+}
+
+func (route *AdminModelRoute) buildProviderModelFilter(reqCtx *gin.Context) requestmodels.ProviderModelFilterParams {
+	var filter requestmodels.ProviderModelFilterParams
+
+	if providerID := reqCtx.Query("provider_id"); providerID != "" {
+		filter.ProviderPublicID = &providerID
+	}
+
+	if modelKey := reqCtx.Query("model_key"); modelKey != "" {
+		filter.ModelKey = &modelKey
+	}
+
+	if activeStr := reqCtx.Query("active"); activeStr != "" {
+		active := activeStr == "true"
+		filter.Active = &active
+	}
+
+	if supportsImagesStr := reqCtx.Query("supports_images"); supportsImagesStr != "" {
+		supportsImages := supportsImagesStr == "true"
+		filter.SupportsImages = &supportsImages
+	}
+
+	return filter
+}
+
+// ListProviderModels
+// @Summary List all provider models
+// @Description Retrieves a paginated list of provider models with optional filtering
+// @Tags Admin Model API
+// @Security BearerAuth
+// @Produce json
+// @Param limit query int false "Number of records to return (default: 20, max: 100)"
+// @Param offset query int false "Number of records to skip for pagination"
+// @Param order query string false "Sort order: asc or desc (default: desc)"
+// @Param provider_id query string false "Filter by provider public ID"
+// @Param model_key query string false "Filter by model key"
+// @Param active query bool false "Filter by active status"
+// @Param supports_images query bool false "Filter by image support"
+// @Success 200 {object} modelresponses.ProviderModelResponse "List of provider models"
+// @Failure 400 {object} responses.ErrorResponse "Invalid query parameters"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/admin/models/provider-models [get]
+func (route *AdminModelRoute) ListProviderModels(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	pagination, err := requests.GetPaginationFromQuery(reqCtx)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Invalid pagination parameters")
+		return
+	}
+
+	filter := route.buildProviderModelFilter(reqCtx)
+
+	providerModels, total, err := route.providerModelHandler.ListProviderModels(ctx, filter, pagination)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to retrieve provider models")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, gin.H{
+		"data":  providerModels,
+		"total": total,
+		"limit": pagination.Limit,
+	})
+}
+
+// GetProviderModel
+// @Summary Get a provider model
+// @Description Retrieves detailed information about a provider model by its public ID
+// @Tags Admin Model API
+// @Security BearerAuth
+// @Produce json
+// @Param provider_model_public_id path string true "Provider Model Public ID"
+// @Success 200 {object} modelresponses.ProviderModelResponse "Provider model details"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request"
+// @Failure 404 {object} responses.ErrorResponse "Provider model not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/admin/models/provider-models/{provider_model_public_id} [get]
+func (route *AdminModelRoute) GetProviderModel(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	publicID := reqCtx.Param("provider_model_public_id")
+
+	providerModel, err := route.providerModelHandler.GetProviderModel(ctx, publicID)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to retrieve provider model")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, providerModel)
+}
+
+// UpdateProviderModel
+// @Summary Update a provider model
+// @Description Updates configuration for a provider model including pricing, limits, and feature flags
+// @Tags Admin Model API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param provider_model_public_id path string true "Provider Model Public ID"
+// @Param payload body requestmodels.UpdateProviderModelRequest true "Update payload"
+// @Success 200 {object} modelresponses.ProviderModelResponse "Updated provider model"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload"
+// @Failure 404 {object} responses.ErrorResponse "Provider model not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/admin/models/provider-models/{provider_model_public_id} [patch]
+func (route *AdminModelRoute) UpdateProviderModel(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	publicID := reqCtx.Param("provider_model_public_id")
+
+	var request requestmodels.UpdateProviderModelRequest
+	if err := reqCtx.ShouldBindJSON(&request); err != nil {
+		responses.HandleError(reqCtx, err, "Invalid request body")
+		return
+	}
+
+	providerModel, err := route.providerModelHandler.UpdateProviderModel(ctx, publicID, request)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to update provider model")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, providerModel)
+}
+
+// BulkToggleProviderModels
+// @Summary Bulk enable or disable provider models
+// @Description Enables or disables provider models with flexible patterns: enable all, disable all, enable all except, or disable all except. Optionally filter by provider.
+// @Tags Admin Model API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param payload body requestmodels.BulkEnableModelsRequest true "Bulk toggle payload with enable flag, optional provider filter, and exception list"
+// @Success 200 {object} modelresponses.BulkOperationResponse "Bulk operation result with counts and status"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/admin/models/provider-models/bulk-toggle [post]
+//
+// Supported patterns:
+//   - Enable all: {"enable": true, "except_models": []}
+//   - Disable all: {"enable": false, "except_models": []}
+//   - Enable all except: {"enable": true, "except_models": ["model-key-1", "model-key-2"]}
+//   - Disable all except: {"enable": false, "except_models": ["model-key-1", "model-key-2"]}
+//   - Enable all for provider except: {"enable": true, "provider_id": "prov_abc", "except_models": ["model-x"]}
+//
+// Example use cases:
+//   - "Enable all models": {"enable": true, "except_models": []}
+//   - "Disable all except production whitelist": {"enable": false, "except_models": ["gpt-4o", "claude-3-opus"]}
+//   - "Enable all OpenAI models except experimental": {"enable": true, "provider_id": "prov_openai_123", "except_models": ["gpt-5-preview"]}
+//   - "Disable all models from specific provider": {"enable": false, "provider_id": "prov_xyz_789", "except_models": []}
+func (route *AdminModelRoute) BulkToggleProviderModels(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	var request requestmodels.BulkEnableModelsRequest
+	if err := reqCtx.ShouldBindJSON(&request); err != nil {
+		responses.HandleError(reqCtx, err, "Invalid request body")
+		return
+	}
+
+	if len(request.ExceptModels) > MaxExceptModelsLimit {
+		err := platformerrors.NewError(ctx, platformerrors.LayerHandler, platformerrors.ErrorTypeValidation, "except_models list exceeds maximum limit", nil, "5e2bfc34-7433-4022-8996-928852526723")
+		responses.HandleError(reqCtx, err, "Validation error: except_models list exceeds maximum limit")
+		return
+	}
+
+	response, err := route.providerModelHandler.BulkEnableDisableProviderModels(ctx, request)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to perform bulk toggle operation on provider models")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, response)
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/provider/admin_provider_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/provider/admin_provider_route.go
new file mode 100644
index 00000000..4e07e83e
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/admin/provider/admin_provider_route.go
@@ -0,0 +1,116 @@
+package provider
+
+import (
+	modelHandler "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler"
+	requestmodels "jan-server/services/llm-api/internal/interfaces/httpserver/requests/models"
+	"net/http"
+
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+
+	"github.com/gin-gonic/gin"
+)
+
+type AdminProviderRoute struct {
+	providerHandler *modelHandler.ProviderHandler
+}
+
+func NewAdminProviderRoute(
+	providerHandler *modelHandler.ProviderHandler,
+) *AdminProviderRoute {
+	return &AdminProviderRoute{
+		providerHandler: providerHandler,
+	}
+}
+
+func (AdminProviderRoute *AdminProviderRoute) RegisterRouter(router *gin.RouterGroup) {
+	providerRoute := router.Group("providers")
+
+	providerRoute.GET("", AdminProviderRoute.GetAllProviders)
+	providerRoute.POST("", AdminProviderRoute.RegisterProvider)
+	providerRoute.PATCH("/:provider_public_id", AdminProviderRoute.UpdateProvider)
+
+}
+
+// GetAllProviders
+// @Summary Get all providers
+// @Description Retrieves all providers with their model counts
+// @Tags Admin Provider API
+// @Security BearerAuth
+// @Produce json
+// @Success 200 {array} modelresponses.ProviderWithModelCountResponse "List of providers with model counts"
+// @Failure 500 {object} responses.ErrorResponse "Failed to retrieve providers"
+// @Router /v1/admin/providers [get]
+func (route *AdminProviderRoute) GetAllProviders(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	providersWithCounts, err := route.providerHandler.GetAllProviders(ctx)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to retrieve providers")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, providersWithCounts)
+}
+
+// RegisterProvider
+// @Summary Register a provider
+// @Description Registers a new provider and synchronizes its available models.
+// @Tags Admin Provider API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param payload body requestmodels.AddProviderRequest true "Provider registration payload"
+// @Success 200 {object} modelresponses.ProviderWithModelsResponse "Registered provider with synced models"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload"
+// @Failure 500 {object} responses.ErrorResponse "Failed to register provider"
+// @Router /v1/admin/providers [post]
+func (route *AdminProviderRoute) RegisterProvider(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	var request requestmodels.AddProviderRequest
+	if err := reqCtx.ShouldBindJSON(&request); err != nil {
+		responses.HandleError(reqCtx, err, "Invalid request body")
+		return
+	}
+
+	providerWithModels, err := route.providerHandler.RegisterProvider(request, ctx)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to register provider")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, providerWithModels)
+}
+
+// UpdateProvider
+// @Summary Update a provider
+// @Description Updates an existing provider's configuration
+// @Tags Admin Provider API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param provider_public_id path string true "Provider public ID"
+// @Param payload body requestmodels.UpdateProviderRequest true "Provider update payload"
+// @Success 200 {object} modelresponses.ProviderResponse "Updated provider"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload"
+// @Failure 404 {object} responses.ErrorResponse "Provider not found"
+// @Failure 500 {object} responses.ErrorResponse "Failed to update provider"
+// @Router /v1/admin/providers/{provider_public_id} [patch]
+func (route *AdminProviderRoute) UpdateProvider(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	publicID := reqCtx.Param("provider_public_id")
+
+	var request requestmodels.UpdateProviderRequest
+	if err := reqCtx.ShouldBindJSON(&request); err != nil {
+		responses.HandleError(reqCtx, err, "Invalid request body")
+		return
+	}
+
+	providerResponse, err := route.providerHandler.UpdateProvider(ctx, publicID, request)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to update provider")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, providerResponse)
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/chat/chat_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/chat/chat_route.go
new file mode 100644
index 00000000..66461964
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/chat/chat_route.go
@@ -0,0 +1,22 @@
+package chat
+
+import (
+	"github.com/gin-gonic/gin"
+)
+
+type ChatRoute struct {
+	completionAPI *ChatCompletionRoute
+}
+
+func NewChatRoute(
+	completionAPI *ChatCompletionRoute,
+) *ChatRoute {
+	return &ChatRoute{
+		completionAPI: completionAPI,
+	}
+}
+
+func (chatRoute *ChatRoute) RegisterRouter(router gin.IRouter) {
+	chatRouter := router.Group("/chat")
+	chatRoute.completionAPI.RegisterRouter(chatRouter)
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/chat/completion_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/chat/completion_route.go
new file mode 100644
index 00000000..e4f2198e
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/chat/completion_route.go
@@ -0,0 +1,103 @@
+package chat
+
+import (
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/chathandler"
+	chatrequests "jan-server/services/llm-api/internal/interfaces/httpserver/requests/chat"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+	chatresponses "jan-server/services/llm-api/internal/interfaces/httpserver/responses/chat"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+// ChatCompletionRoute handles chat completion requests with streaming support by delegating to the chat handler.
+type ChatCompletionRoute struct {
+	chatHandler *chathandler.ChatHandler
+	authHandler *authhandler.AuthHandler
+}
+
+func NewChatCompletionRoute(
+	chatHandler *chathandler.ChatHandler,
+	authHandler *authhandler.AuthHandler,
+) *ChatCompletionRoute {
+	return &ChatCompletionRoute{
+		chatHandler: chatHandler,
+		authHandler: authHandler,
+	}
+}
+
+func (chatCompletionRoute *ChatCompletionRoute) RegisterRouter(router *gin.RouterGroup) {
+	router.POST("/completions",
+		chatCompletionRoute.authHandler.WithAppUserAuthChain(
+			chatCompletionRoute.PostCompletion,
+		)...,
+	)
+}
+
+// PostCompletion
+// @Summary Create a chat completion
+// @Description Generates a model response for the given chat conversation. This is a standard chat completion API that supports both streaming and non-streaming modes without conversation persistence.
+// @Description
+// @Description **Streaming Mode (stream=true):**
+// @Description - Returns Server-Sent Events (SSE) with real-time streaming
+// @Description - Streams completion chunks directly from the inference model
+// @Description - Final event contains "[DONE]" marker
+// @Description
+// @Description **Non-Streaming Mode (stream=false or omitted):**
+// @Description - Returns single JSON response with complete completion
+// @Description - Standard OpenAI ChatCompletionResponse format
+// @Description
+// @Description **Storage Options:**
+// @Description - `store=true`: Persist the latest input message and assistant response to the active conversation
+// @Description - `store_reasoning=true`: Additionally persist reasoning content provided by the model
+// @Description - When `store` is omitted or false, the conversation remains read-only
+// @Description
+// @Description **Features:**
+// @Description - Supports all OpenAI ChatCompletionRequest parameters
+// @Description - Optional conversation context for conversation persistence
+// @Description - User authentication required
+// @Description - Direct inference model integration
+// @Tags Chat Completions API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Produce text/event-stream
+// @Param request body chatrequests.ChatCompletionRequest true "Chat completion request with streaming options and optional conversation"
+// @Success 200 {object} chatresponses.ChatCompletionResponse "Successful non-streaming response (when stream=false)"
+// @Success 200 {string} string "Successful streaming response (when stream=true) - SSE format with data: {json} events"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request payload, empty messages, or inference failure"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/chat/completions [post]
+func (chatCompletionRoute *ChatCompletionRoute) PostCompletion(reqCtx *gin.Context) {
+	// Get authenticated user ID
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "81b47b8b-ddaa-4819-a7b4-a29042c60100")
+		return
+	}
+
+	var request chatrequests.ChatCompletionRequest
+	if err := reqCtx.ShouldBindJSON(&request); err != nil {
+		responses.HandleError(reqCtx, err, "Invalid request body")
+		return
+	}
+
+	// Delegate to chat handler
+	result, err := chatCompletionRoute.chatHandler.CreateChatCompletion(reqCtx.Request.Context(), reqCtx, user.ID, request)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to complete chat request")
+		return
+	}
+
+	// For non-streaming requests, return the response with conversation context
+	if !request.Stream {
+		// Wrap the OpenAI response with conversation context (including title)
+		chatResponse := chatresponses.NewChatCompletionResponse(result.Response, result.ConversationID, result.ConversationTitle)
+		reqCtx.JSON(http.StatusOK, chatResponse)
+	}
+
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/conversation/conversation_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/conversation/conversation_route.go
new file mode 100644
index 00000000..b4023fd8
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/conversation/conversation_route.go
@@ -0,0 +1,607 @@
+package conversation
+
+import (
+	"net/http"
+	"strings"
+
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/conversationhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/requests"
+	conversationrequests "jan-server/services/llm-api/internal/interfaces/httpserver/requests/conversation"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+	conversationresponses "jan-server/services/llm-api/internal/interfaces/httpserver/responses/conversation"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+
+	"github.com/gin-gonic/gin"
+)
+
+type ConversationRoute struct {
+	handler     *conversationhandler.ConversationHandler
+	authHandler *authhandler.AuthHandler
+}
+
+func NewConversationRoute(
+	handler *conversationhandler.ConversationHandler,
+	authHandler *authhandler.AuthHandler,
+) *ConversationRoute {
+	return &ConversationRoute{
+		handler:     handler,
+		authHandler: authHandler,
+	}
+}
+
+func (route *ConversationRoute) RegisterRouter(router gin.IRouter) {
+	conversations := router.Group("/conversations")
+	conversations.GET("", route.authHandler.WithAppUserAuthChain(route.listConversations)...)
+	conversations.POST("", route.authHandler.WithAppUserAuthChain(route.createConversation)...)
+	conversations.GET("/:conv_public_id", route.authHandler.WithAppUserAuthChain(route.handler.ConversationMiddleware(), route.getConversation)...)
+	conversations.POST("/:conv_public_id", route.authHandler.WithAppUserAuthChain(route.handler.ConversationMiddleware(), route.updateConversation)...)
+	conversations.DELETE("/:conv_public_id", route.authHandler.WithAppUserAuthChain(route.handler.ConversationMiddleware(), route.deleteConversation)...)
+	conversations.GET("/:conv_public_id/items", route.authHandler.WithAppUserAuthChain(route.handler.ConversationMiddleware(), route.listItems)...)
+	conversations.POST("/:conv_public_id/items", route.authHandler.WithAppUserAuthChain(route.handler.ConversationMiddleware(), route.createItems)...)
+	conversations.GET("/:conv_public_id/items/:item_id", route.authHandler.WithAppUserAuthChain(route.handler.ConversationMiddleware(), route.getItem)...)
+	conversations.DELETE("/:conv_public_id/items/:item_id", route.authHandler.WithAppUserAuthChain(route.handler.ConversationMiddleware(), route.deleteItem)...)
+}
+
+// listConversations godoc
+// @Summary List conversations
+// @Description List conversations for the authenticated user with optional referrer filtering.
+// @Tags Conversations API
+// @Security BearerAuth
+// @Produce json
+// @Param referrer query string false "Referrer filter"
+// @Param limit query int false "Maximum number of conversations to return"
+// @Param after query string false "Return conversations created after the given numeric ID"
+// @Param order query string false "Sort order (asc or desc)"
+// @Param scope query string false "Set to 'all' to list conversations across the workspace (requires elevated permissions)"
+// @Success 200 {object} conversationresponses.ConversationListResponse "Successfully retrieved conversations"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request parameters"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/conversations [get]
+func (route *ConversationRoute) listConversations(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "3296ce86-783b-4c05-9fdb-930d3713024e")
+		return
+	}
+
+	var params conversationrequests.ListConversationsQueryParams
+	if err := reqCtx.ShouldBindQuery(&params); err != nil {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "invalid query parameters", "f8a3d4e2-6b9c-4d7e-a1f3-2c5e8d9f0b4a")
+		return
+	}
+
+	// Use the standard cursor pagination helper that properly resolves public IDs to numeric IDs
+	// This follows the same pattern as API keys route (see apikeys_route.go:84)
+	pagination, err := requests.GetCursorPaginationFromQuery(reqCtx, func(publicID string) (*uint, error) {
+		// Resolve conversation public ID to numeric ID for cursor pagination
+		// We need to call the handler's method which internally uses the service
+		id, err := route.handler.ResolveConversationPublicIDToNumericID(ctx, user.ID, publicID)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "invalid cursor: conversation not found or not accessible")
+		}
+		return id, nil
+	})
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to process pagination")
+		return
+	}
+
+	// Override limit if provided in params (GetCursorPaginationFromQuery uses query params)
+	if params.Limit != nil {
+		if *params.Limit <= 0 {
+			responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "limit must be greater than zero", "a7b2c3d4-e5f6-4g7h-8i9j-0k1l2m3n4o5p")
+			return
+		}
+		pagination.Limit = params.Limit
+	}
+
+	if params.Order != nil {
+		pagination.Order = strings.ToLower(strings.TrimSpace(*params.Order))
+	}
+
+	var referrerPtr *string
+	if params.Referrer != nil {
+		trimmed := strings.TrimSpace(*params.Referrer)
+		if trimmed != "" {
+			referrerValue := trimmed
+			referrerPtr = &referrerValue
+		}
+	}
+
+	var response *conversationresponses.ConversationListResponse
+	response, err = route.handler.ListConversations(ctx, &user.ID, referrerPtr, pagination)
+
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to list conversations")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+// createConversation godoc
+// @Summary Create a conversation
+// @Description Create a new conversation to store and retrieve conversation state across Response API calls
+// @Description
+// @Description **Features:**
+// @Description - Create conversation with optional metadata (max 16 key-value pairs)
+// @Description - Add up to 20 initial items to the conversation
+// @Description - Returns conversation ID with `conv_` prefix
+// @Description - Supports OpenAI Conversations API format
+// @Description
+// @Description **Metadata Constraints:**
+// @Description - Maximum 16 key-value pairs
+// @Description - Keys: max 64 characters
+// @Description - Values: max 512 characters
+// @Tags Conversations API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param request body conversationrequests.CreateConversationRequest true "Create conversation request with optional items and metadata"
+// @Success 200 {object} conversationresponses.ConversationResponse "Successfully created conversation"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request - validation failed or too many items"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error - conversation creation failed"
+// @Router /v1/conversations [post]
+func (route *ConversationRoute) createConversation(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "3296ce86-783b-4c05-9fdb-930d3713024e")
+		return
+	}
+
+	var req conversationrequests.CreateConversationRequest
+	if err := reqCtx.ShouldBindJSON(&req); err != nil {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "invalid request body", "b9c8d7e6-f5a4-4d3e-a1b2-0c9d8e7f6g5h")
+		return
+	}
+	response, err := route.handler.CreateConversation(ctx, user.ID, req)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to create conversation")
+		return
+	}
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+// getConversation godoc
+// @Summary Get a conversation
+// @Description Retrieve a conversation by ID with ownership verification
+// @Description
+// @Description **Features:**
+// @Description - Retrieves conversation metadata including creation timestamp
+// @Description - Automatic ownership verification (user can only access their own conversations)
+// @Description - Returns OpenAI-compatible conversation object
+// @Description
+// @Description **Response Fields:**
+// @Description - `id`: Conversation ID with `conv_` prefix
+// @Description - `object`: Always "conversation"
+// @Description - `created_at`: Unix timestamp
+// @Description - `metadata`: User-defined key-value pairs
+// @Tags Conversations API
+// @Security BearerAuth
+// @Produce json
+// @Param conv_public_id path string true "Conversation ID (format: conv_xxxxx)"
+// @Success 200 {object} conversationresponses.ConversationResponse "Successfully retrieved conversation"
+// @Failure 400 {object} responses.ErrorResponse "Invalid conversation ID format"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 404 {object} responses.ErrorResponse "Conversation not found or access denied"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/conversations/{conv_public_id} [get]
+func (route *ConversationRoute) getConversation(reqCtx *gin.Context) {
+	// Get conversation from context (set by middleware)
+	conv, ok := conversationhandler.GetConversationFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeInternal, "conversation not found in context", "c1d2e3f4-a5b6-4c7d-8e9f-0a1b2c3d4e5f")
+		return
+	}
+
+	response := conversationresponses.NewConversationResponse(conv)
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+// updateConversation godoc
+// @Summary Update a conversation
+// @Description Update a conversation's metadata while preserving existing items
+// @Description
+// @Description **Features:**
+// @Description - Update metadata key-value pairs
+// @Description - Replaces entire metadata object (not merged)
+// @Description - Items remain unchanged
+// @Description - Automatic ownership verification
+// @Description
+// @Description **Metadata Constraints:**
+// @Description - Maximum 16 key-value pairs
+// @Description - Keys: max 64 characters
+// @Description - Values: max 512 characters
+// @Tags Conversations API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param conv_public_id path string true "Conversation ID (format: conv_xxxxx)"
+// @Param request body conversationrequests.UpdateConversationRequest true "Update conversation request with new metadata"
+// @Success 200 {object} conversationresponses.ConversationResponse "Successfully updated conversation"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request - validation failed or invalid metadata"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 404 {object} responses.ErrorResponse "Conversation not found or access denied"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error - update failed"
+// @Router /v1/conversations/{conv_public_id} [post]
+func (route *ConversationRoute) updateConversation(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	// Get conversation and user from context (set by middlewares)
+	conv, ok := conversationhandler.GetConversationFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeInternal, "conversation not found in context", "d2e3f4a5-b6c7-4d8e-9f0a-1b2c3d4e5f6g")
+		return
+	}
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "e3f4a5b6-c7d8-4e9f-0a1b-2c3d4e5f6g7h")
+		return
+	}
+
+	var req conversationrequests.UpdateConversationRequest
+	if err := reqCtx.ShouldBindJSON(&req); err != nil {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "invalid request body", "f4a5b6c7-d8e9-4f0a-1b2c-3d4e5f6g7h8i")
+		return
+	}
+
+	response, err := route.handler.UpdateConversation(ctx, user.ID, conv.PublicID, req)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to update conversation")
+		return
+	}
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+// deleteConversation godoc
+// @Summary Delete a conversation
+// @Description Delete a conversation (soft delete). Items in the conversation will not be deleted but will be inaccessible.
+// @Description
+// @Description **Features:**
+// @Description - Soft delete (conversation marked as deleted, not physically removed)
+// @Description - Items remain in database but become inaccessible
+// @Description - Automatic ownership verification
+// @Description - Returns deletion confirmation with conversation ID
+// @Description
+// @Description **Response:**
+// @Description - `id`: Deleted conversation ID
+// @Description - `object`: Always "conversation.deleted"
+// @Description - `deleted`: Always true
+// @Tags Conversations API
+// @Security BearerAuth
+// @Produce json
+// @Param conv_public_id path string true "Conversation ID (format: conv_xxxxx)"
+// @Success 200 {object} conversationresponses.ConversationDeletedResponse "Successfully deleted conversation"
+// @Failure 400 {object} responses.ErrorResponse "Invalid conversation ID format"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 404 {object} responses.ErrorResponse "Conversation not found or access denied"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error - deletion failed"
+// @Router /v1/conversations/{conv_public_id} [delete]
+func (route *ConversationRoute) deleteConversation(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	// Get conversation and user from context (set by middlewares)
+	conv, ok := conversationhandler.GetConversationFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeInternal, "conversation not found in context", "a5b6c7d8-e9f0-4a1b-2c3d-4e5f6g7h8i9j")
+		return
+	}
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "b6c7d8e9-f0a1-4b2c-3d4e-5f6g7h8i9j0k")
+		return
+	}
+
+	response, err := route.handler.DeleteConversation(ctx, user.ID, conv.PublicID)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to delete conversation")
+		return
+	}
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+// listItems godoc
+// @Summary List conversation items
+// @Description List all items in a conversation with cursor-based pagination support
+// @Description
+// @Description **Features:**
+// @Description - Cursor-based pagination using item IDs
+// @Description - Configurable page size (1-100 items, default 20)
+// @Description - Sort order control (ascending or descending)
+// @Description - Optional include parameter for additional fields
+// @Description - Returns paginated list with navigation cursors
+// @Description
+// @Description **Pagination:**
+// @Description - Use `after` cursor from previous response for next page
+// @Description - `has_more` indicates if more items are available
+// @Description - `first_id` and `last_id` provide cursor references
+// @Description
+// @Description **Query Parameters:**
+// @Description - `limit`: Number of items (1-100, default 20)
+// @Description - `order`: Sort order ("asc" or "desc", default "desc")
+// @Description - `after`: Item ID cursor for pagination
+// @Description - `include`: Additional fields to include (optional)
+// @Tags Conversations API
+// @Security BearerAuth
+// @Produce json
+// @Param conv_public_id path string true "Conversation ID (format: conv_xxxxx)"
+// @Param after query string false "Item ID cursor to list items after (pagination)"
+// @Param limit query integer false "Number of items to return (1-100)" default(20) minimum(1) maximum(100)
+// @Param order query string false "Sort order: asc or desc" default(desc) Enums(asc, desc)
+// @Param include query []string false "Additional fields to include in response"
+// @Success 200 {object} conversationresponses.ItemListResponse "Successfully retrieved items list"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request - invalid parameters or conversation ID"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 404 {object} responses.ErrorResponse "Conversation not found or access denied"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error - listing failed"
+// @Router /v1/conversations/{conv_public_id}/items [get]
+func (route *ConversationRoute) listItems(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	// Get conversation from context (set by middleware)
+	conv, ok := conversationhandler.GetConversationFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeInternal, "conversation not found in context", "c7d8e9f0-a1b2-4c3d-4e5f-6g7h8i9j0k1l")
+		return
+	}
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "d8e9f0a1-b2c3-4d4e-5f6g-7h8i9j0k1l2m")
+		return
+	}
+
+	var params conversationrequests.ListItemsQueryParams
+	if err := reqCtx.ShouldBindQuery(&params); err != nil {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "invalid query parameters", "e9f0a1b2-c3d4-4e5f-6g7h-8i9j0k1l2m3n")
+		return
+	}
+
+	// Build pagination using standard cursor helper for query parameter parsing
+	pagination, err := requests.GetCursorPaginationFromQuery(reqCtx, func(itemPublicID string) (*uint, error) {
+		id, err := route.handler.ResolveItemPublicIDToNumericID(ctx, user.ID, conv.PublicID, itemPublicID)
+		if err != nil {
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerHandler, err, "invalid cursor: item not found or not accessible")
+		}
+		return id, nil
+	})
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to process pagination")
+		return
+	}
+
+	// Apply default limit if not specified (default 20, max 100)
+	requestedLimit := 20 // default
+	if pagination.Limit == nil {
+		pagination.Limit = &requestedLimit
+	} else if *pagination.Limit < 1 {
+		requestedLimit = 1
+		pagination.Limit = &requestedLimit
+	} else if *pagination.Limit > 100 {
+		requestedLimit = 100
+		pagination.Limit = &requestedLimit
+	} else {
+		requestedLimit = *pagination.Limit
+	}
+
+	// Note: We use manual pagination building instead of responses.BuildCursorPage because:
+	// 1. OpenAI Conversations API format doesn't include total counts
+	// 2. The limit+1 fetch pattern is more efficient than separate hasMore queries
+	// 3. ItemListResponse structure differs from the generic PageCursor structure
+
+	// Fetch limit+1 items to determine if there are more pages
+	fetchLimit := requestedLimit + 1
+	pagination.Limit = &fetchLimit
+
+	// Get items from handler
+	items, err := route.handler.ListItems(ctx, user.ID, conv.PublicID, pagination)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to list items")
+		return
+	}
+
+	// Calculate hasMore by checking if we got more than requested
+	hasMore := len(items) > requestedLimit
+	if hasMore {
+		// Trim to requested limit
+		items = items[:requestedLimit]
+	}
+
+	// Calculate cursor IDs
+	var firstID, lastID string
+	if len(items) > 0 {
+		firstID = items[0].PublicID
+		lastID = items[len(items)-1].PublicID
+	}
+
+	// Build response matching OpenAI format
+	response := conversationresponses.ItemListResponse{
+		Object:  "list",
+		Data:    items,
+		FirstID: firstID,
+		LastID:  lastID,
+		HasMore: hasMore,
+	}
+
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+// createItems godoc
+// @Summary Create conversation items
+// @Description Add items to a conversation. You may add up to 20 items at a time.
+// @Description
+// @Description **Features:**
+// @Description - Bulk item creation (max 20 items per request)
+// @Description - Automatic item ID generation with `msg_` prefix
+// @Description - Items added to conversation's active branch (default: MAIN)
+// @Description - Returns list of created items with generated IDs
+// @Description
+// @Description **Item Types:**
+// @Description - `message`: User or assistant messages
+// @Description - `tool_call`: Tool/function call items
+// @Description - `tool_response`: Tool/function response items
+// @Description - Other OpenAI-compatible item types
+// @Description
+// @Description **Constraints:**
+// @Description - Maximum 20 items per request
+// @Description - Each item must have valid type and content
+// @Description - Items are immutable after creation
+// @Tags Conversations API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param conv_public_id path string true "Conversation ID (format: conv_xxxxx)"
+// @Param include query []string false "Additional fields to include in response"
+// @Param request body conversationrequests.CreateItemsRequest true "Create items request with array of items"
+// @Success 200 {object} conversationresponses.ConversationItemCreatedResponse "Successfully created items"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request - too many items, invalid format, or validation failed"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 404 {object} responses.ErrorResponse "Conversation not found or access denied"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error - item creation failed"
+// @Router /v1/conversations/{conv_public_id}/items [post]
+func (route *ConversationRoute) createItems(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	// Get conversation from context (set by middleware)
+	conv, ok := conversationhandler.GetConversationFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeInternal, "conversation not found in context", "f0a1b2c3-d4e5-4f6g-7h8i-9j0k1l2m3n4o")
+		return
+	}
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "a1b2c3d4-e5f6-4g7h-8i9j-0k1l2m3n4o5p")
+		return
+	}
+
+	var req conversationrequests.CreateItemsRequest
+	if err := reqCtx.ShouldBindJSON(&req); err != nil {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "invalid request body", "b2c3d4e5-f6g7-4h8i-9j0k-1l2m3n4o5p6q")
+		return
+	}
+
+	response, err := route.handler.CreateItems(ctx, user.ID, conv.PublicID, req)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to create items")
+		return
+	}
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+// getItem godoc
+// @Summary Get a conversation item
+// @Description Retrieve a single item from a conversation by item ID
+// @Description
+// @Description **Features:**
+// @Description - Retrieve specific item by ID
+// @Description - Returns complete item with all content
+// @Description - Automatic ownership verification via conversation
+// @Description - Optional include parameter for additional fields
+// @Description
+// @Description **Response Fields:**
+// @Description - `id`: Item ID with `msg_` prefix
+// @Description - `type`: Item type (message, tool_call, etc.)
+// @Description - `role`: Role for message items (user, assistant)
+// @Description - `content`: Item content array
+// @Description - `status`: Item status (completed, incomplete, etc.)
+// @Description - `created_at`: Unix timestamp
+// @Tags Conversations API
+// @Security BearerAuth
+// @Produce json
+// @Param conv_public_id path string true "Conversation ID (format: conv_xxxxx)"
+// @Param item_id path string true "Item ID (format: msg_xxxxx)"
+// @Param include query []string false "Additional fields to include in response"
+// @Success 200 {object} conversationresponses.ItemResponse "Successfully retrieved item"
+// @Failure 400 {object} responses.ErrorResponse "Invalid conversation ID or item ID format"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 404 {object} responses.ErrorResponse "Conversation or item not found, or access denied"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/conversations/{conv_public_id}/items/{item_id} [get]
+func (route *ConversationRoute) getItem(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	// Get conversation from context (set by middleware)
+	conv, ok := conversationhandler.GetConversationFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeInternal, "conversation not found in context", "c3d4e5f6-g7h8-4i9j-0k1l-2m3n4o5p6q7r")
+		return
+	}
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "d4e5f6g7-h8i9-4j0k-1l2m-3n4o5p6q7r8s")
+		return
+	}
+
+	itemID := reqCtx.Param("item_id")
+	response, err := route.handler.GetItem(ctx, user.ID, conv.PublicID, itemID)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to get item")
+		return
+	}
+	reqCtx.JSON(http.StatusOK, response)
+}
+
+// deleteItem godoc
+// @Summary Delete a conversation item
+// @Description Delete an item from a conversation. The item will be removed from the conversation.
+// @Description
+// @Description **Features:**
+// @Description - Remove specific item from conversation
+// @Description - Automatic ownership verification
+// @Description - Returns updated conversation object after deletion
+// @Description - Items are permanently removed (not soft delete)
+// @Description
+// @Description **Important:**
+// @Description - Deleting an item may affect conversation flow
+// @Description - Item IDs are not reused after deletion
+// @Description - Other items in conversation remain unchanged
+// @Description - Consider creating a new branch instead of deleting items
+// @Description
+// @Description **Response:**
+// @Description Returns the conversation object (not the deleted item)
+// @Tags Conversations API
+// @Security BearerAuth
+// @Produce json
+// @Param conv_public_id path string true "Conversation ID (format: conv_xxxxx)"
+// @Param item_id path string true "Item ID to delete (format: msg_xxxxx)"
+// @Success 200 {object} conversationresponses.ConversationResponse "Successfully deleted item, returns conversation"
+// @Failure 400 {object} responses.ErrorResponse "Invalid conversation ID or item ID format"
+// @Failure 401 {object} responses.ErrorResponse "Unauthorized - missing or invalid authentication"
+// @Failure 404 {object} responses.ErrorResponse "Conversation or item not found, or access denied"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error - deletion failed"
+// @Router /v1/conversations/{conv_public_id}/items/{item_id} [delete]
+func (route *ConversationRoute) deleteItem(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	// Get conversation from context (set by middleware)
+	conv, ok := conversationhandler.GetConversationFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeInternal, "conversation not found in context", "e5f6g7h8-i9j0-4k1l-2m3n-4o5p6q7r8s9t")
+		return
+	}
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "f6g7h8i9-j0k1-4l2m-3n4o-5p6q7r8s9t0u")
+		return
+	}
+
+	itemID := reqCtx.Param("item_id")
+	response, err := route.handler.DeleteItem(ctx, user.ID, conv.PublicID, itemID)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to delete item")
+		return
+	}
+	reqCtx.JSON(http.StatusOK, response)
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/llm/projects/routes.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/llm/projects/routes.go
new file mode 100644
index 00000000..1bac962b
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/llm/projects/routes.go
@@ -0,0 +1,213 @@
+package projects
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/projecthandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/requests"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/requests/projectreq"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+)
+
+type ProjectRoute struct {
+	handler     *projecthandler.ProjectHandler
+	authHandler *authhandler.AuthHandler
+}
+
+func NewProjectRoute(handler *projecthandler.ProjectHandler, authHandler *authhandler.AuthHandler) *ProjectRoute {
+	return &ProjectRoute{
+		handler:     handler,
+		authHandler: authHandler,
+	}
+}
+
+// RegisterRoutes registers project routes
+func (r *ProjectRoute) RegisterRoutes(rg *gin.RouterGroup) {
+	projects := rg.Group("/projects")
+	projects.POST("", r.authHandler.WithAppUserAuthChain(r.createProject)...)
+	projects.GET("", r.authHandler.WithAppUserAuthChain(r.listProjects)...)
+	projects.GET("/:project_id", r.authHandler.WithAppUserAuthChain(r.getProject)...)
+	projects.PATCH("/:project_id", r.authHandler.WithAppUserAuthChain(r.updateProject)...)
+	projects.DELETE("/:project_id", r.authHandler.WithAppUserAuthChain(r.deleteProject)...)
+}
+
+// createProject godoc
+// @Summary Create project
+// @Description Create a new project for grouping conversations
+// @Tags Projects API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param request body projectreq.CreateProjectRequest true "Create project request"
+// @Success 201 {object} projectres.ProjectResponse
+// @Failure 400 {object} responses.ErrorResponse
+// @Failure 401 {object} responses.ErrorResponse
+// @Failure 500 {object} responses.ErrorResponse
+// @Router /v1/projects [post]
+func (r *ProjectRoute) createProject(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "proj-create-001")
+		return
+	}
+
+	var req projectreq.CreateProjectRequest
+	if err := reqCtx.ShouldBindJSON(&req); err != nil {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "invalid request body", "proj-create-002")
+		return
+	}
+
+	response, err := r.handler.CreateProject(ctx, user.ID, req)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to create project")
+		return
+	}
+
+	reqCtx.JSON(201, response)
+}
+
+// listProjects godoc
+// @Summary List projects
+// @Description List all projects for the authenticated user
+// @Tags Projects API
+// @Security BearerAuth
+// @Produce json
+// @Param limit query int false "Maximum number of projects to return"
+// @Param after query string false "Return projects after the given numeric ID"
+// @Param order query string false "Sort order (asc or desc)"
+// @Success 200 {object} projectres.ProjectListResponse
+// @Failure 401 {object} responses.ErrorResponse
+// @Failure 500 {object} responses.ErrorResponse
+// @Router /v1/projects [get]
+func (r *ProjectRoute) listProjects(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "proj-list-001")
+		return
+	}
+
+	pagination, err := requests.GetCursorPaginationFromQuery(reqCtx, nil)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to process pagination")
+		return
+	}
+
+	response, err := r.handler.ListProjects(ctx, user.ID, pagination)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to list projects")
+		return
+	}
+
+	reqCtx.JSON(200, response)
+}
+
+// getProject godoc
+// @Summary Get project
+// @Description Get a single project by ID
+// @Tags Projects API
+// @Security BearerAuth
+// @Produce json
+// @Param project_id path string true "Project ID"
+// @Success 200 {object} projectres.ProjectResponse
+// @Failure 401 {object} responses.ErrorResponse
+// @Failure 404 {object} responses.ErrorResponse
+// @Failure 500 {object} responses.ErrorResponse
+// @Router /v1/projects/{project_id} [get]
+func (r *ProjectRoute) getProject(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "proj-get-001")
+		return
+	}
+
+	projectID := reqCtx.Param("project_id")
+
+	response, err := r.handler.GetProject(ctx, user.ID, projectID)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to get project")
+		return
+	}
+
+	reqCtx.JSON(200, response)
+}
+
+// updateProject godoc
+// @Summary Update project
+// @Description Update project name, instruction, or archived status
+// @Tags Projects API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param project_id path string true "Project ID"
+// @Param request body projectreq.UpdateProjectRequest true "Update request"
+// @Success 200 {object} projectres.ProjectResponse
+// @Failure 400 {object} responses.ErrorResponse
+// @Failure 401 {object} responses.ErrorResponse
+// @Failure 404 {object} responses.ErrorResponse
+// @Failure 500 {object} responses.ErrorResponse
+// @Router /v1/projects/{project_id} [patch]
+func (r *ProjectRoute) updateProject(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "proj-update-001")
+		return
+	}
+
+	projectID := reqCtx.Param("project_id")
+
+	var req projectreq.UpdateProjectRequest
+	if err := reqCtx.ShouldBindJSON(&req); err != nil {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "invalid request body", "proj-update-002")
+		return
+	}
+
+	response, err := r.handler.UpdateProject(ctx, user.ID, projectID, req)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to update project")
+		return
+	}
+
+	reqCtx.JSON(200, response)
+}
+
+// deleteProject godoc
+// @Summary Delete project
+// @Description Soft-delete a project
+// @Tags Projects API
+// @Security BearerAuth
+// @Produce json
+// @Param project_id path string true "Project ID"
+// @Success 200 {object} projectres.ProjectDeletedResponse
+// @Failure 401 {object} responses.ErrorResponse
+// @Failure 404 {object} responses.ErrorResponse
+// @Failure 500 {object} responses.ErrorResponse
+// @Router /v1/projects/{project_id} [delete]
+func (r *ProjectRoute) deleteProject(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+
+	user, ok := authhandler.GetUserFromContext(reqCtx)
+	if !ok {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeUnauthorized, "authentication required", "proj-delete-001")
+		return
+	}
+
+	projectID := reqCtx.Param("project_id")
+
+	response, err := r.handler.DeleteProject(ctx, user.ID, projectID)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to delete project")
+		return
+	}
+
+	reqCtx.JSON(200, response)
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/model/model_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/model/model_route.go
new file mode 100644
index 00000000..2648ff23
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/model/model_route.go
@@ -0,0 +1,131 @@
+package model
+
+import (
+	"net/http"
+	"strings"
+
+	domainmodel "jan-server/services/llm-api/internal/domain/model"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	modelHandler "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+	modelresponses "jan-server/services/llm-api/internal/interfaces/httpserver/responses/model"
+	modelProvider "jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/model/provider"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+
+	"github.com/gin-gonic/gin"
+)
+
+const HeaderIncludeProviderData = "X-PROVIDER-DATA"
+
+type ModelRoute struct {
+	modelHandler        *modelHandler.ModelHandler
+	modelCatalogHandler *modelHandler.ModelCatalogHandler
+	modelProvider       *modelProvider.ModelProviderRoute
+	authHandler         *authhandler.AuthHandler
+}
+
+func NewModelRoute(
+	modelHandler *modelHandler.ModelHandler,
+	modelCatalogHandler *modelHandler.ModelCatalogHandler,
+	modelProvider *modelProvider.ModelProviderRoute,
+	authHandler *authhandler.AuthHandler,
+) *ModelRoute {
+	return &ModelRoute{
+		modelHandler:        modelHandler,
+		modelCatalogHandler: modelCatalogHandler,
+		modelProvider:       modelProvider,
+		authHandler:         authHandler,
+	}
+}
+
+func (ModelRoute *ModelRoute) RegisterRouter(router *gin.RouterGroup) {
+	modelsRoute := router.Group("models")
+	modelsRoute.GET(
+		"",
+		ModelRoute.authHandler.WithAppUserAuthChain(ModelRoute.GetModels)...,
+	)
+	modelsRoute.GET("/catalogs/*model_public_id", ModelRoute.GetModelCatalog)
+
+	ModelRoute.modelProvider.RegisterRouter(modelsRoute)
+
+}
+
+// ListModels
+// @Summary List available models
+// @Description Retrieves a list of available models that can be used for chat completions or other tasks. Returns either simple model list or detailed list with provider metadata based on X-PROVIDER-DATA header.
+// @Tags Chat Completions API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Param X-PROVIDER-DATA header string false "Set to 'true' to include provider metadata in response" Enums(true, false)
+// @Success 200 {object} modelresponses.ModelResponseList "List of models (when X-PROVIDER-DATA header is not true)"
+// @Success 200 {object} modelresponses.ModelWithProviderResponseList "List of models with provider metadata (when X-PROVIDER-DATA=true)"
+// @Failure 404 {object} responses.ErrorResponse "Models or providers not found"
+// @Failure 500 {object} responses.ErrorResponse "Failed to retrieve models"
+// @Router /v1/models [get]
+func (ModelRoute *ModelRoute) GetModels(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	includeProviderData := strings.EqualFold(reqCtx.GetHeader(HeaderIncludeProviderData), "true")
+
+	accessibleModels, err := ModelRoute.modelHandler.BuildAccessibleProviderModels(ctx)
+	if err != nil || accessibleModels == nil {
+		responses.HandleError(reqCtx, err, "Failed to retrieve accessible models")
+		return
+	}
+
+	if len(accessibleModels.ProviderModels) == 0 || len(accessibleModels.Providers) == 0 {
+		responses.HandleNewError(reqCtx, platformerrors.ErrorTypeNotFound, "no models or providers found", "92597a6f-3846-451e-b2de-f41bf1fbff68")
+		return
+	}
+
+	providerByID := make(map[uint]*domainmodel.Provider, len(accessibleModels.Providers))
+	for _, provider := range accessibleModels.Providers {
+		if provider == nil {
+			continue
+		}
+		providerByID[provider.ID] = provider
+	}
+
+	if includeProviderData {
+		models := modelresponses.BuildModelResponseListWithProvider(accessibleModels.ProviderModels, providerByID)
+		reqCtx.JSON(http.StatusOK, modelresponses.ModelWithProviderResponseList{
+			Object: "list",
+			Data:   models,
+		})
+
+	} else {
+		mergedProviderModels := ModelRoute.modelHandler.MergeModels(accessibleModels.ProviderModels, providerByID)
+		mergedModels := modelresponses.BuildModelResponseList(mergedProviderModels, providerByID)
+		reqCtx.JSON(http.StatusOK, modelresponses.ModelResponseList{
+			Object: "list",
+			Data:   mergedModels,
+		})
+	}
+
+}
+
+// GetModelCatalog
+// @Summary Get a model catalog entry
+// @Description Retrieves detailed information about a model catalog entry by its public ID (supports IDs with slashes like openrouter/nova-lite-v1)
+// @Tags Model API
+// @Security BearerAuth
+// @Produce json
+// @Param model_public_id path string true "Model Catalog Public ID (can contain slashes)"
+// @Success 200 {object} modelresponses.ModelCatalogResponse "Model catalog details"
+// @Failure 400 {object} responses.ErrorResponse "Invalid request"
+// @Failure 404 {object} responses.ErrorResponse "Model catalog not found"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/models/catalogs/{model_public_id} [get]
+func (route *ModelRoute) GetModelCatalog(reqCtx *gin.Context) {
+	ctx := reqCtx.Request.Context()
+	// Wildcard param includes leading slash, so trim it
+	publicID := strings.TrimPrefix(reqCtx.Param("model_public_id"), "/")
+
+	catalog, err := route.modelCatalogHandler.GetCatalog(ctx, publicID)
+	if err != nil {
+		responses.HandleError(reqCtx, err, "Failed to retrieve model catalog")
+		return
+	}
+
+	reqCtx.JSON(http.StatusOK, catalog)
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/model/provider/model_provider_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/model/provider/model_provider_route.go
new file mode 100644
index 00000000..2d252a50
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/model/provider/model_provider_route.go
@@ -0,0 +1,45 @@
+package provider
+
+import (
+	"net/http"
+
+	modelHandler "jan-server/services/llm-api/internal/interfaces/httpserver/handlers/modelhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/responses"
+	modelresponses "jan-server/services/llm-api/internal/interfaces/httpserver/responses/model"
+
+	"github.com/gin-gonic/gin"
+)
+
+type ModelProviderRoute struct {
+	modelHandler *modelHandler.ModelHandler
+}
+
+func NewModelProviderRoute(modelHandler *modelHandler.ModelHandler) *ModelProviderRoute {
+	return &ModelProviderRoute{
+		modelHandler: modelHandler,
+	}
+}
+
+func (modelProviderRoute *ModelProviderRoute) RegisterRouter(router *gin.RouterGroup) {
+	group := router.Group("providers")
+	group.GET("", modelProviderRoute.listProviders)
+}
+
+// listProviders godoc
+// @Summary List model providers
+// @Description Retrieves a list of available model providers that can be used for inference.
+// @Tags Model API
+// @Security BearerAuth
+// @Accept json
+// @Produce json
+// @Success 200 {object} modelresponses.ProviderResponseList "List of providers"
+// @Failure 500 {object} responses.ErrorResponse "Failed to retrieve providers"
+// @Router /v1/models/providers [get]
+func (modelProviderRoute *ModelProviderRoute) listProviders(reqCtx *gin.Context) {
+	accessibleModels, err := modelProviderRoute.modelHandler.BuildAccessibleProviderModels(reqCtx)
+	if err != nil || accessibleModels == nil {
+		responses.HandleError(reqCtx, err, "Failed to retrieve providers")
+		return
+	}
+	reqCtx.JSON(http.StatusOK, modelresponses.BuildProviderResponseList(accessibleModels.Providers))
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/users/users_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/users/users_route.go
new file mode 100644
index 00000000..cf51b17e
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/users/users_route.go
@@ -0,0 +1,38 @@
+package users
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/authhandler"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/handlers/usersettingshandler"
+)
+
+// UsersRoute handles /v1/users routes
+type UsersRoute struct {
+	settingsHandler *usersettingshandler.UserSettingsHandler
+	authHandler     *authhandler.AuthHandler
+}
+
+// NewUsersRoute constructs a new users route handler
+func NewUsersRoute(
+	settingsHandler *usersettingshandler.UserSettingsHandler,
+	authHandler *authhandler.AuthHandler,
+) *UsersRoute {
+	return &UsersRoute{
+		settingsHandler: settingsHandler,
+		authHandler:     authHandler,
+	}
+}
+
+// RegisterRouter registers user-related routes
+func (r *UsersRoute) RegisterRouter(router gin.IRouter) {
+	usersGroup := router.Group("/users")
+	{
+		// /v1/users/me/settings - User settings endpoints
+		meGroup := usersGroup.Group("/me")
+		{
+			meGroup.GET("/settings", r.authHandler.WithAppUserAuthChain(r.settingsHandler.GetSettings)...)
+			meGroup.PATCH("/settings", r.authHandler.WithAppUserAuthChain(r.settingsHandler.UpdateSettings)...)
+		}
+	}
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/routes/v1/v1_route.go b/services/llm-api/internal/interfaces/httpserver/routes/v1/v1_route.go
new file mode 100644
index 00000000..6a805e5b
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/routes/v1/v1_route.go
@@ -0,0 +1,92 @@
+package v1
+
+import (
+	"net/http"
+
+	"jan-server/services/llm-api/internal/config"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/admin"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/chat"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/conversation"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/llm/projects"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/model"
+	"jan-server/services/llm-api/internal/interfaces/httpserver/routes/v1/users"
+
+	"github.com/gin-gonic/gin"
+)
+
+type V1Route struct {
+	model        *model.ModelRoute
+	chat         *chat.ChatRoute
+	conversation *conversation.ConversationRoute
+	project      *projects.ProjectRoute
+	adminRoute   *admin.AdminRoute
+	users        *users.UsersRoute
+}
+
+func NewV1Route(
+	model *model.ModelRoute,
+	chat *chat.ChatRoute,
+	conversation *conversation.ConversationRoute,
+	project *projects.ProjectRoute,
+	adminRoute *admin.AdminRoute,
+	users *users.UsersRoute) *V1Route {
+	return &V1Route{
+		model,
+		chat,
+		conversation,
+		project,
+		adminRoute,
+		users,
+	}
+}
+
+func (v1Route *V1Route) RegisterRouter(router gin.IRouter) {
+	v1Router := router.Group("/v1")
+	v1Router.GET("/version", GetVersion)
+	v1Router.GET("/healthz", GetHealthz)
+	v1Router.GET("/readyz", GetReadyz)
+
+	v1Route.adminRoute.RegisterRouter(v1Router)
+	v1Route.model.RegisterRouter(v1Router)
+	v1Route.chat.RegisterRouter(v1Router)
+	v1Route.conversation.RegisterRouter(v1Router)
+	v1Route.project.RegisterRoutes(v1Router)
+	v1Route.users.RegisterRouter(v1Router)
+
+}
+
+// GetVersion godoc
+// @Summary Get API build version
+// @Description Returns the current build version of the API server and environment reload timestamp.
+// @Tags Server API
+// @Produce json
+// @Success 200 {object} map[string]string "Version information including version number and environment reload timestamp"
+// @Router /v1/version [get]
+func GetVersion(c *gin.Context) {
+	c.JSON(http.StatusOK, gin.H{
+		"version":         config.Version,
+		"env_reloaded_at": config.GetEnvReloadedAt().Format("2006-01-02T15:04:05Z07:00"),
+	})
+}
+
+// GetHealthz godoc
+// @Summary Health check endpoint
+// @Description Returns the health status of the API server. Used by orchestrators and monitoring systems.
+// @Tags Server API
+// @Produce json
+// @Success 200 {object} map[string]string "Health status OK"
+// @Router /v1/healthz [get]
+func GetHealthz(c *gin.Context) {
+	c.JSON(http.StatusOK, gin.H{"status": "ok"})
+}
+
+// GetReadyz godoc
+// @Summary Readiness check endpoint
+// @Description Returns the readiness status of the API server. Indicates if the service is ready to accept traffic.
+// @Tags Server API
+// @Produce json
+// @Success 200 {object} map[string]string "Readiness status ready"
+// @Router /v1/readyz [get]
+func GetReadyz(c *gin.Context) {
+	c.JSON(http.StatusOK, gin.H{"status": "ready"})
+}
diff --git a/services/llm-api/internal/interfaces/httpserver/swagger_handler.go b/services/llm-api/internal/interfaces/httpserver/swagger_handler.go
new file mode 100644
index 00000000..8dc0f60a
--- /dev/null
+++ b/services/llm-api/internal/interfaces/httpserver/swagger_handler.go
@@ -0,0 +1,56 @@
+package httpserver
+
+import (
+	"encoding/json"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog/log"
+)
+
+// ServeCombinedSwagger serves the combined swagger JSON if it exists, otherwise falls back to regular swagger
+func ServeCombinedSwagger() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		// Check if combined swagger exists
+		combinedPath := filepath.Join(".", "docs", "swagger", "swagger-combined.json")
+
+		if _, err := os.Stat(combinedPath); err == nil {
+			// Combined swagger exists, serve it
+			data, err := ioutil.ReadFile(combinedPath)
+			if err != nil {
+				log.Error().Err(err).Msg("Failed to read combined swagger")
+				c.JSON(500, gin.H{"error": "Failed to load API documentation"})
+				return
+			}
+
+			var spec map[string]interface{}
+			if err := json.Unmarshal(data, &spec); err != nil {
+				log.Error().Err(err).Msg("Failed to parse combined swagger")
+				c.JSON(500, gin.H{"error": "Failed to parse API documentation"})
+				return
+			}
+
+			c.JSON(200, spec)
+		} else {
+			// Fall back to regular doc.json
+			docPath := filepath.Join(".", "docs", "swagger", "swagger.json")
+			data, err := ioutil.ReadFile(docPath)
+			if err != nil {
+				log.Error().Err(err).Msg("Failed to read swagger")
+				c.JSON(500, gin.H{"error": "Failed to load API documentation"})
+				return
+			}
+
+			var spec map[string]interface{}
+			if err := json.Unmarshal(data, &spec); err != nil {
+				log.Error().Err(err).Msg("Failed to parse swagger")
+				c.JSON(500, gin.H{"error": "Failed to parse API documentation"})
+				return
+			}
+
+			c.JSON(200, spec)
+		}
+	}
+}
diff --git a/services/llm-api/internal/interfaces/interfaceproviders.go b/services/llm-api/internal/interfaces/interfaceproviders.go
new file mode 100644
index 00000000..98e5221b
--- /dev/null
+++ b/services/llm-api/internal/interfaces/interfaceproviders.go
@@ -0,0 +1,11 @@
+package interfaces
+
+import (
+	"jan-server/services/llm-api/internal/interfaces/httpserver"
+
+	"github.com/google/wire"
+)
+
+var InterfacesProvider = wire.NewSet(
+	httpserver.NewHttpServer,
+)
diff --git a/services/llm-api/internal/utils/crypto/crypto.go b/services/llm-api/internal/utils/crypto/crypto.go
new file mode 100644
index 00000000..edc69e74
--- /dev/null
+++ b/services/llm-api/internal/utils/crypto/crypto.go
@@ -0,0 +1,94 @@
+package crypto
+
+import (
+	"crypto/aes"
+	"crypto/cipher"
+	"crypto/rand"
+	"encoding/base64"
+	"errors"
+	"io"
+)
+
+// EncryptString encrypts plaintext using AES-GCM with the given secret key
+func EncryptString(secret, plaintext string) (string, error) {
+	if secret == "" {
+		return "", errors.New("secret key cannot be empty")
+	}
+
+	// Ensure the key is 32 bytes for AES-256
+	key := []byte(secret)
+	if len(key) < 32 {
+		// Pad the key to 32 bytes
+		paddedKey := make([]byte, 32)
+		copy(paddedKey, key)
+		key = paddedKey
+	} else if len(key) > 32 {
+		// Truncate the key to 32 bytes
+		key = key[:32]
+	}
+
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		return "", err
+	}
+
+	gcm, err := cipher.NewGCM(block)
+	if err != nil {
+		return "", err
+	}
+
+	nonce := make([]byte, gcm.NonceSize())
+	if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
+		return "", err
+	}
+
+	ciphertext := gcm.Seal(nonce, nonce, []byte(plaintext), nil)
+	return base64.StdEncoding.EncodeToString(ciphertext), nil
+}
+
+// DecryptString decrypts ciphertext using AES-GCM with the given secret key
+func DecryptString(secret, ciphertext string) (string, error) {
+	if secret == "" {
+		return "", errors.New("secret key cannot be empty")
+	}
+
+	// Ensure the key is 32 bytes for AES-256
+	key := []byte(secret)
+	if len(key) < 32 {
+		// Pad the key to 32 bytes
+		paddedKey := make([]byte, 32)
+		copy(paddedKey, key)
+		key = paddedKey
+	} else if len(key) > 32 {
+		// Truncate the key to 32 bytes
+		key = key[:32]
+	}
+
+	data, err := base64.StdEncoding.DecodeString(ciphertext)
+	if err != nil {
+		return "", err
+	}
+
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		return "", err
+	}
+
+	gcm, err := cipher.NewGCM(block)
+	if err != nil {
+		return "", err
+	}
+
+	nonceSize := gcm.NonceSize()
+	if len(data) < nonceSize {
+		return "", errors.New("ciphertext too short")
+	}
+
+	nonce, ciphertext2 := data[:nonceSize], data[nonceSize:]
+	plaintext, err := gcm.Open(nil, nonce, ciphertext2, nil)
+	if err != nil {
+		return "", err
+	}
+
+	return string(plaintext), nil
+}
diff --git a/services/llm-api/internal/utils/functional/functional.go b/services/llm-api/internal/utils/functional/functional.go
new file mode 100644
index 00000000..21662d4c
--- /dev/null
+++ b/services/llm-api/internal/utils/functional/functional.go
@@ -0,0 +1,61 @@
+package functional
+
+// Map applies a function to each element of a slice and returns a new slice with the results
+func Map[T any, U any](slice []T, fn func(T) U) []U {
+	result := make([]U, len(slice))
+	for i, item := range slice {
+		result[i] = fn(item)
+	}
+	return result
+}
+
+// Filter returns a new slice containing only the elements that satisfy the predicate
+func Filter[T any](slice []T, predicate func(T) bool) []T {
+	result := make([]T, 0)
+	for _, item := range slice {
+		if predicate(item) {
+			result = append(result, item)
+		}
+	}
+	return result
+}
+
+// Reduce applies a function against an accumulator and each element in the slice to reduce it to a single value
+func Reduce[T any, U any](slice []T, initial U, fn func(U, T) U) U {
+	accumulator := initial
+	for _, item := range slice {
+		accumulator = fn(accumulator, item)
+	}
+	return accumulator
+}
+
+// Find returns the first element that satisfies the predicate, or the zero value if none found
+func Find[T any](slice []T, predicate func(T) bool) (T, bool) {
+	for _, item := range slice {
+		if predicate(item) {
+			return item, true
+		}
+	}
+	var zero T
+	return zero, false
+}
+
+// Any returns true if any element in the slice satisfies the predicate
+func Any[T any](slice []T, predicate func(T) bool) bool {
+	for _, item := range slice {
+		if predicate(item) {
+			return true
+		}
+	}
+	return false
+}
+
+// All returns true if all elements in the slice satisfy the predicate
+func All[T any](slice []T, predicate func(T) bool) bool {
+	for _, item := range slice {
+		if !predicate(item) {
+			return false
+		}
+	}
+	return true
+}
diff --git a/services/llm-api/internal/utils/httpclients/chat/chat_completion_client.go b/services/llm-api/internal/utils/httpclients/chat/chat_completion_client.go
new file mode 100644
index 00000000..493b6bfc
--- /dev/null
+++ b/services/llm-api/internal/utils/httpclients/chat/chat_completion_client.go
@@ -0,0 +1,770 @@
+package chat
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"strings"
+	"sync"
+	"time"
+
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+
+	"github.com/gin-gonic/gin"
+	"github.com/sashabaranov/go-openai"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/codes"
+	"go.opentelemetry.io/otel/trace"
+	"resty.dev/v3"
+)
+
+const (
+	requestTimeout       = 120 * time.Second
+	channelBufferSize    = 100
+	errorBufferSize      = 10
+	dataPrefix           = "data: "
+	doneMarker           = "[DONE]"
+	newlineChar          = "\n"
+	scannerInitialBuffer = 12 * 1024        // 12KB
+	scannerMaxBuffer     = 10 * 1024 * 1024 // 10MB
+)
+
+type StreamOption func(*resty.Request)
+
+// BeforeDoneCallback is called before writing [DONE] marker
+type BeforeDoneCallback func(*gin.Context) error
+
+type TokenUsage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
+type ChoiceDelta struct {
+	Content          string               `json:"content"`
+	ReasoningContent string               `json:"reasoning_content"`
+	FunctionCall     *openai.FunctionCall `json:"function_call,omitempty"`
+	ToolCalls        []openai.ToolCall    `json:"tool_calls,omitempty"`
+}
+
+type StreamChoice struct {
+	Delta ChoiceDelta `json:"delta"`
+}
+
+func WithHeader(key, value string) StreamOption {
+	return func(r *resty.Request) {
+		if strings.TrimSpace(key) == "" {
+			return
+		}
+		if value == "" {
+			r.SetHeader(key, "")
+			return
+		}
+		r.SetHeader(key, value)
+	}
+}
+
+func WithAcceptEncodingIdentity() StreamOption {
+	return WithHeader("Accept-Encoding", "identity")
+}
+
+type ChatCompletionClient struct {
+	client  *resty.Client
+	baseURL string
+	name    string
+}
+
+type functionCallAccumulator struct {
+	Name      string
+	Arguments string
+	Complete  bool
+}
+
+type toolCallAccumulator struct {
+	ID       string
+	Type     string
+	Index    int
+	Function struct {
+		Name      string
+		Arguments string
+	}
+	Complete bool
+}
+
+func NewChatCompletionClient(client *resty.Client, name, baseURL string) *ChatCompletionClient {
+	return &ChatCompletionClient{
+		client:  client,
+		baseURL: normalizeBaseURL(baseURL),
+		name:    name,
+	}
+}
+
+func (c *ChatCompletionClient) CreateChatCompletion(ctx context.Context, apiKey string, request openai.ChatCompletionRequest) (*openai.ChatCompletionResponse, error) {
+	// Start OpenTelemetry span for tracking
+	ctx, span := otel.Tracer("chat-completion-client").Start(ctx, "CreateChatCompletion",
+		trace.WithSpanKind(trace.SpanKindClient),
+		trace.WithAttributes(
+			attribute.String("llm.provider", c.name),
+			attribute.String("llm.model", request.Model),
+			attribute.Int("llm.message_count", len(request.Messages)),
+			attribute.Bool("llm.stream", false),
+		),
+	)
+	defer span.End()
+
+	// Add optional parameters as attributes
+	if request.Temperature != 0 {
+		span.SetAttributes(attribute.Float64("llm.temperature", float64(request.Temperature)))
+	}
+	if request.MaxTokens != 0 {
+		span.SetAttributes(attribute.Int("llm.max_tokens", request.MaxTokens))
+	}
+	if request.TopP != 0 {
+		span.SetAttributes(attribute.Float64("llm.top_p", float64(request.TopP)))
+	}
+
+	start := time.Now()
+
+	var respBody openai.ChatCompletionResponse
+	resp, err := c.prepareRequest(ctx, apiKey).
+		SetBody(request).
+		SetResult(&respBody).
+		Post(c.endpoint("/chat/completions"))
+
+	duration := time.Since(start)
+
+	if err != nil {
+		span.RecordError(err)
+		span.SetStatus(codes.Error, err.Error())
+		span.SetAttributes(attribute.Int64("llm.duration_ms", duration.Milliseconds()))
+		return nil, err
+	}
+	if resp.IsError() {
+		reqErr := c.errorFromResponse(ctx, resp, "request failed")
+		span.RecordError(reqErr)
+		span.SetStatus(codes.Error, reqErr.Error())
+		span.SetAttributes(
+			attribute.Int("http.status_code", resp.StatusCode()),
+			attribute.Int64("llm.duration_ms", duration.Milliseconds()),
+		)
+		return nil, reqErr
+	}
+
+	// Record token usage and timing in span
+	span.SetAttributes(
+		attribute.Int("llm.usage.prompt_tokens", respBody.Usage.PromptTokens),
+		attribute.Int("llm.usage.completion_tokens", respBody.Usage.CompletionTokens),
+		attribute.Int("llm.usage.total_tokens", respBody.Usage.TotalTokens),
+		attribute.Int64("llm.duration_ms", duration.Milliseconds()),
+		attribute.Int("http.status_code", resp.StatusCode()),
+	)
+
+	// Add finish reason if available
+	if len(respBody.Choices) > 0 {
+		span.SetAttributes(attribute.String("llm.finish_reason", string(respBody.Choices[0].FinishReason)))
+	}
+
+	// Add reasoning tokens if available
+	if respBody.Usage.CompletionTokensDetails != nil && respBody.Usage.CompletionTokensDetails.ReasoningTokens > 0 {
+		span.SetAttributes(attribute.Int("llm.usage.reasoning_tokens", respBody.Usage.CompletionTokensDetails.ReasoningTokens))
+	}
+
+	span.SetStatus(codes.Ok, "completion successful")
+	span.AddEvent("chat_completion_completed", trace.WithAttributes(
+		attribute.Int("response.choice_count", len(respBody.Choices)),
+	))
+
+	return &respBody, nil
+}
+
+func (c *ChatCompletionClient) CreateChatCompletionStream(ctx context.Context, apiKey string, request openai.ChatCompletionRequest, opts ...StreamOption) (io.ReadCloser, error) {
+	resp, err := c.doStreamingRequest(ctx, apiKey, request, opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	reader, writer := io.Pipe()
+
+	go func() {
+		defer func() {
+			if closeErr := resp.RawResponse.Body.Close(); closeErr != nil {
+				log := logger.GetLogger()
+				log.Error().Err(closeErr).Str("client", c.name).Msg("unable to close response body")
+			}
+		}()
+
+		if _, copyErr := io.Copy(writer, resp.RawResponse.Body); copyErr != nil {
+			_ = writer.CloseWithError(copyErr)
+			return
+		}
+		_ = writer.Close()
+	}()
+
+	return reader, nil
+}
+
+func (c *ChatCompletionClient) StreamChatCompletionToContext(reqCtx *gin.Context, apiKey string, request openai.ChatCompletionRequest, opts ...StreamOption) (*openai.ChatCompletionResponse, error) {
+	return c.StreamChatCompletionToContextWithCallback(reqCtx, apiKey, request, nil, opts...)
+}
+
+func (c *ChatCompletionClient) StreamChatCompletionToContextWithCallback(reqCtx *gin.Context, apiKey string, request openai.ChatCompletionRequest, beforeDone BeforeDoneCallback, opts ...StreamOption) (*openai.ChatCompletionResponse, error) {
+	// Start OpenTelemetry span for tracking streaming completion
+	ctx := reqCtx.Request.Context()
+	ctx, span := otel.Tracer("chat-completion-client").Start(ctx, "StreamChatCompletion",
+		trace.WithSpanKind(trace.SpanKindClient),
+		trace.WithAttributes(
+			attribute.String("llm.provider", c.name),
+			attribute.String("llm.model", request.Model),
+			attribute.Int("llm.message_count", len(request.Messages)),
+			attribute.Bool("llm.stream", true),
+		),
+	)
+	defer span.End()
+
+	// Add optional parameters as attributes
+	if request.Temperature != 0 {
+		span.SetAttributes(attribute.Float64("llm.temperature", float64(request.Temperature)))
+	}
+	if request.MaxTokens != 0 {
+		span.SetAttributes(attribute.Int("llm.max_tokens", request.MaxTokens))
+	}
+	if request.TopP != 0 {
+		span.SetAttributes(attribute.Float64("llm.top_p", float64(request.TopP)))
+	}
+
+	start := time.Now()
+
+	// force to true to collect tokens
+	request.StreamOptions = &openai.StreamOptions{
+		IncludeUsage: true,
+	}
+
+	streamCtx, cancel := context.WithTimeout(ctx, requestTimeout)
+	defer cancel()
+
+	c.SetupSSEHeaders(reqCtx)
+
+	dataChan := make(chan string, channelBufferSize)
+	errChan := make(chan error, errorBufferSize)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go c.streamResponseToChannel(streamCtx, apiKey, request, dataChan, errChan, &wg, opts)
+
+	var contentBuilder strings.Builder
+	var reasoningBuilder strings.Builder
+	functionCallAccumulator := make(map[int]*functionCallAccumulator)
+	toolCallAccumulator := make(map[int]*toolCallAccumulator)
+
+	// Track streaming metrics
+	var chunksReceived int
+	var totalUsage *TokenUsage
+
+	streamingComplete := false
+
+	for !streamingComplete {
+		select {
+		case line, ok := <-dataChan:
+			if !ok {
+				streamingComplete = true
+				break
+			}
+
+			// Check if this is the [DONE] marker BEFORE writing it
+			if data, found := strings.CutPrefix(line, dataPrefix); found {
+				if data == doneMarker {
+					// Call the beforeDone callback BEFORE sending [DONE]
+					if beforeDone != nil {
+						if err := beforeDone(reqCtx); err != nil {
+							log := logger.GetLogger()
+							log.Warn().Err(err).Msg("beforeDone callback failed")
+						}
+					}
+					// Now write the [DONE] marker
+					if err := c.writeSSELine(reqCtx, line); err != nil {
+						cancel()
+						wg.Wait()
+						span.RecordError(err)
+						span.SetStatus(codes.Error, "failed to write SSE done marker")
+						return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "unable to write SSE line")
+					}
+					streamingComplete = true
+					cancel()
+					break
+				}
+			}
+
+			// Write the line for non-[DONE] events
+			if err := c.writeSSELine(reqCtx, line); err != nil {
+				cancel()
+				wg.Wait()
+				span.RecordError(err)
+				span.SetStatus(codes.Error, "failed to write SSE line")
+				return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "unable to write SSE line")
+			}
+
+			// Process the data chunk
+			if data, found := strings.CutPrefix(line, dataPrefix); found {
+				chunksReceived++
+
+				choice, usage := c.processStreamChunkForChannel(data)
+
+				// Capture final usage if available
+				if usage != nil {
+					totalUsage = usage
+				}
+
+				if choice != nil {
+					if choice.Delta.Content != "" {
+						contentBuilder.WriteString(choice.Delta.Content)
+					}
+
+					if choice.Delta.ReasoningContent != "" {
+						reasoningBuilder.WriteString(choice.Delta.ReasoningContent)
+					}
+
+					if choice.Delta.FunctionCall != nil {
+						c.handleStreamingFunctionCall(choice.Delta.FunctionCall, functionCallAccumulator)
+					}
+
+					if len(choice.Delta.ToolCalls) > 0 {
+						c.handleStreamingToolCall(&choice.Delta.ToolCalls[0], toolCallAccumulator)
+					}
+				}
+			}
+
+		case err, ok := <-errChan:
+			if ok && err != nil {
+				cancel()
+				wg.Wait()
+				span.RecordError(err)
+				span.SetStatus(codes.Error, "streaming error")
+				return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, err, "streaming error")
+			}
+
+		case <-streamCtx.Done():
+			wg.Wait()
+			span.RecordError(streamCtx.Err())
+			span.SetStatus(codes.Error, "streaming context cancelled")
+			return nil, platformerrors.AsError(ctx, platformerrors.LayerDomain, streamCtx.Err(), "streaming context cancelled")
+
+		case <-reqCtx.Request.Context().Done():
+			cancel()
+			wg.Wait()
+			span.RecordError(reqCtx.Request.Context().Err())
+			span.SetStatus(codes.Error, "client request cancelled")
+			return nil, platformerrors.AsError(reqCtx.Request.Context(), platformerrors.LayerDomain, reqCtx.Request.Context().Err(), "client request cancelled")
+		}
+	}
+
+	cancel()
+	wg.Wait()
+
+	close(dataChan)
+	close(errChan)
+
+	duration := time.Since(start)
+
+	response := c.buildCompleteResponse(
+		contentBuilder.String(),
+		reasoningBuilder.String(),
+		functionCallAccumulator,
+		toolCallAccumulator,
+		request.Model,
+		request,
+	)
+
+	// Record streaming metrics in span
+	span.SetAttributes(
+		attribute.Int("llm.streaming.chunks_received", chunksReceived),
+		attribute.Int64("llm.duration_ms", duration.Milliseconds()),
+	)
+
+	// Add token usage if available from streaming
+	if totalUsage != nil {
+		span.SetAttributes(
+			attribute.Int("llm.usage.prompt_tokens", totalUsage.PromptTokens),
+			attribute.Int("llm.usage.completion_tokens", totalUsage.CompletionTokens),
+			attribute.Int("llm.usage.total_tokens", totalUsage.TotalTokens),
+		)
+	} else {
+		// Use estimated usage from response
+		span.SetAttributes(
+			attribute.Int("llm.usage.prompt_tokens", response.Usage.PromptTokens),
+			attribute.Int("llm.usage.completion_tokens", response.Usage.CompletionTokens),
+			attribute.Int("llm.usage.total_tokens", response.Usage.TotalTokens),
+		)
+	}
+
+	// Add finish reason if available
+	if len(response.Choices) > 0 {
+		span.SetAttributes(attribute.String("llm.finish_reason", string(response.Choices[0].FinishReason)))
+	}
+
+	span.SetStatus(codes.Ok, "streaming completion successful")
+	span.AddEvent("streaming_completed", trace.WithAttributes(
+		attribute.Int("chunks.total", chunksReceived),
+		attribute.Int("content.length", len(contentBuilder.String())),
+	))
+
+	return &response, nil
+}
+
+func (c *ChatCompletionClient) SetupSSEHeaders(reqCtx *gin.Context) {
+	if reqCtx == nil {
+		return
+	}
+
+	reqCtx.Header("Content-Type", "text/event-stream")
+	reqCtx.Header("Cache-Control", "no-cache")
+	reqCtx.Header("Connection", "keep-alive")
+	reqCtx.Header("Access-Control-Allow-Origin", "*")
+	reqCtx.Header("Access-Control-Allow-Headers", "Cache-Control")
+	reqCtx.Header("Transfer-Encoding", "chunked")
+	reqCtx.Writer.WriteHeaderNow()
+}
+
+func (c *ChatCompletionClient) prepareRequest(ctx context.Context, apiKey string) *resty.Request {
+	req := c.client.R().SetContext(ctx)
+	req.SetHeader("Content-Type", "application/json")
+	if strings.TrimSpace(apiKey) != "" {
+		req.SetHeader("Authorization", fmt.Sprintf("Bearer %s", apiKey))
+	}
+	return req
+}
+
+func (c *ChatCompletionClient) endpoint(path string) string {
+	if path == "" {
+		return c.baseURL
+	}
+	if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") {
+		return path
+	}
+	if c.baseURL == "" {
+		return path
+	}
+	if strings.HasPrefix(path, "/") {
+		return c.baseURL + path
+	}
+	return c.baseURL + "/" + path
+}
+
+func (c *ChatCompletionClient) errorFromResponse(ctx context.Context, resp *resty.Response, message string) error {
+	if resp == nil || resp.RawResponse == nil || resp.RawResponse.Body == nil {
+		return platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeExternal, message, nil, "3476dd55-5fc0-4653-bd10-665895ecc099")
+	}
+	defer resp.RawResponse.Body.Close()
+	body, err := io.ReadAll(resp.RawResponse.Body)
+	if err != nil {
+		return platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeExternal, message, nil, "8cd2cae7-9ad9-40fe-ac00-8f9b24251064")
+	}
+	trimmed := strings.TrimSpace(string(body))
+	if trimmed == "" {
+		return platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeExternal, message, nil, "b8797de4-38cb-4bd9-9ae8-b9a04e70f6ab")
+	}
+	return platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeExternal, fmt.Sprintf("%s: %s", message, trimmed), nil, "a1f46e0d-4017-4411-ac05-987946c3066d")
+}
+
+func (c *ChatCompletionClient) doStreamingRequest(ctx context.Context, apiKey string, request openai.ChatCompletionRequest, opts ...StreamOption) (*resty.Response, error) {
+	req := c.prepareRequest(ctx, apiKey).
+		SetBody(request).
+		SetDoNotParseResponse(true)
+
+	for _, opt := range opts {
+		if opt == nil {
+			continue
+		}
+		opt(req)
+	}
+
+	if req.Header.Get("Accept-Encoding") == "" {
+		req.SetHeader("Accept-Encoding", "identity")
+	}
+
+	resp, err := req.Post(c.endpoint("/chat/completions"))
+	if err != nil {
+		return nil, err
+	}
+	if resp.IsError() {
+		return nil, c.errorFromResponse(ctx, resp, "streaming request failed")
+	}
+	if resp.RawResponse == nil || resp.RawResponse.Body == nil {
+		return nil, platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeExternal, "streaming request failed: empty response body", nil, "1b3ab461-dbf9-4034-8abb-dfc6ea8486c5")
+	}
+
+	return resp, nil
+}
+
+func (c *ChatCompletionClient) streamResponseToChannel(ctx context.Context, apiKey string, request openai.ChatCompletionRequest, dataChan chan<- string, errChan chan<- error, wg *sync.WaitGroup, opts []StreamOption) {
+	defer wg.Done()
+
+	resp, err := c.doStreamingRequest(ctx, apiKey, request, opts...)
+	if err != nil {
+		c.sendAsyncError(errChan, err)
+		return
+	}
+
+	defer func() {
+		if closeErr := resp.RawResponse.Body.Close(); closeErr != nil {
+			log := logger.GetLogger()
+			log.Error().Err(closeErr).Str("client", c.name).Msg("unable to close response body")
+		}
+	}()
+
+	scanner := bufio.NewScanner(resp.RawResponse.Body)
+	scanner.Buffer(make([]byte, 0, scannerInitialBuffer), scannerMaxBuffer)
+
+	for scanner.Scan() {
+		select {
+		case <-ctx.Done():
+			c.sendAsyncError(errChan, ctx.Err())
+			return
+		default:
+		}
+
+		line := scanner.Text()
+
+		select {
+		case dataChan <- line:
+		case <-ctx.Done():
+			c.sendAsyncError(errChan, ctx.Err())
+			return
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		c.sendAsyncError(errChan, err)
+	}
+}
+
+func (c *ChatCompletionClient) writeSSELine(reqCtx *gin.Context, line string) error {
+	if reqCtx == nil {
+		return platformerrors.NewError(context.Background(), platformerrors.LayerDomain, platformerrors.ErrorTypeValidation, "nil gin context provided", nil, "8ee6e88f-07e9-49e5-9c7a-6e1dfe151456")
+	}
+	_, err := reqCtx.Writer.Write([]byte(line + newlineChar))
+	if err != nil {
+		return err
+	}
+	reqCtx.Writer.Flush()
+	return nil
+}
+
+func (c *ChatCompletionClient) processStreamChunkForChannel(data string) (*StreamChoice, *TokenUsage) {
+	var streamData struct {
+		Choices []StreamChoice `json:"choices"`
+		Usage   *TokenUsage    `json:"usage"`
+	}
+
+	if err := json.Unmarshal([]byte(data), &streamData); err != nil {
+		log := logger.GetLogger()
+		log.Error().Err(err).Str("client", c.name).Str("data", data).Msg("failed to parse stream chunk JSON")
+		return nil, nil
+	}
+
+	result := &StreamChoice{
+		Delta: ChoiceDelta{},
+	}
+
+	for _, choice := range streamData.Choices {
+		if choice.Delta.Content != "" {
+			result.Delta.Content += choice.Delta.Content
+		}
+
+		if choice.Delta.ReasoningContent != "" {
+			result.Delta.ReasoningContent += choice.Delta.ReasoningContent
+		}
+
+		if choice.Delta.FunctionCall != nil {
+			result.Delta.FunctionCall = choice.Delta.FunctionCall
+		}
+
+		if len(choice.Delta.ToolCalls) > 0 {
+			// TODO: Handle multiple tool calls if needed
+			result.Delta.ToolCalls = choice.Delta.ToolCalls
+		}
+	}
+
+	return result, streamData.Usage
+}
+
+func (c *ChatCompletionClient) handleStreamingFunctionCall(functionCall *openai.FunctionCall, accumulator map[int]*functionCallAccumulator) {
+	if functionCall == nil {
+		return
+	}
+
+	index := 0
+	if accumulator[index] == nil {
+		accumulator[index] = &functionCallAccumulator{}
+	}
+
+	if functionCall.Name != "" {
+		accumulator[index].Name = functionCall.Name
+	}
+	if functionCall.Arguments != "" {
+		accumulator[index].Arguments += functionCall.Arguments
+	}
+
+	if accumulator[index].Name != "" && accumulator[index].Arguments != "" && strings.HasSuffix(accumulator[index].Arguments, "}") {
+		accumulator[index].Complete = true
+	}
+}
+
+func (c *ChatCompletionClient) handleStreamingToolCall(toolCall *openai.ToolCall, accumulator map[int]*toolCallAccumulator) {
+	if toolCall == nil || toolCall.Index == nil {
+		return
+	}
+
+	index := *toolCall.Index
+	if accumulator[index] == nil {
+		accumulator[index] = &toolCallAccumulator{
+			ID:    toolCall.ID,
+			Type:  string(toolCall.Type),
+			Index: index,
+		}
+	}
+
+	if toolCall.Function.Name != "" {
+		accumulator[index].Function.Name = toolCall.Function.Name
+	}
+	if toolCall.Function.Arguments != "" {
+		accumulator[index].Function.Arguments += toolCall.Function.Arguments
+	}
+
+	if accumulator[index].Function.Name != "" && accumulator[index].Function.Arguments != "" && strings.HasSuffix(accumulator[index].Function.Arguments, "}") {
+		accumulator[index].Complete = true
+	}
+}
+
+func (c *ChatCompletionClient) buildCompleteResponse(content string, reasoning string, functionCallAccumulator map[int]*functionCallAccumulator, toolCallAccumulator map[int]*toolCallAccumulator, model string, request openai.ChatCompletionRequest) openai.ChatCompletionResponse {
+	message := openai.ChatCompletionMessage{
+		Role:    openai.ChatMessageRoleAssistant,
+		Content: content,
+	}
+
+	if reasoning != "" {
+		message.ReasoningContent = reasoning
+	}
+
+	finishReason := openai.FinishReasonStop
+
+	if len(functionCallAccumulator) > 0 {
+		for _, acc := range functionCallAccumulator {
+			if acc != nil && acc.Complete {
+				message.FunctionCall = &openai.FunctionCall{
+					Name:      acc.Name,
+					Arguments: acc.Arguments,
+				}
+				finishReason = openai.FinishReasonFunctionCall
+				break
+			}
+		}
+	}
+
+	if len(toolCallAccumulator) > 0 {
+		var toolCalls []openai.ToolCall
+		for _, acc := range toolCallAccumulator {
+			if acc != nil && acc.Complete {
+				toolCalls = append(toolCalls, openai.ToolCall{
+					ID:   acc.ID,
+					Type: openai.ToolType(acc.Type),
+					Function: openai.FunctionCall{
+						Name:      acc.Function.Name,
+						Arguments: acc.Function.Arguments,
+					},
+				})
+			}
+		}
+
+		if len(toolCalls) > 0 {
+			message.ToolCalls = toolCalls
+			finishReason = openai.FinishReasonToolCalls
+		}
+	}
+
+	choices := []openai.ChatCompletionChoice{
+		{
+			Index:        0,
+			Message:      message,
+			FinishReason: finishReason,
+		},
+	}
+
+	promptTokens := c.estimateTokens(request.Messages)
+	completionTokens := c.estimateTokens([]openai.ChatCompletionMessage{message})
+	totalTokens := promptTokens + completionTokens
+
+	return openai.ChatCompletionResponse{
+		ID:      "",
+		Object:  "chat.completion",
+		Created: time.Now().Unix(),
+		Model:   model,
+		Choices: choices,
+		Usage: openai.Usage{
+			PromptTokens:     promptTokens,
+			CompletionTokens: completionTokens,
+			TotalTokens:      totalTokens,
+		},
+	}
+}
+
+func (c *ChatCompletionClient) estimateTokens(messages []openai.ChatCompletionMessage) int {
+	var allText strings.Builder
+
+	for _, msg := range messages {
+		allText.WriteString(msg.Content)
+		allText.WriteString(" ")
+
+		if msg.FunctionCall != nil {
+			allText.WriteString(msg.FunctionCall.Name)
+			allText.WriteString(" ")
+			allText.WriteString(msg.FunctionCall.Arguments)
+			allText.WriteString(" ")
+		}
+
+		for _, toolCall := range msg.ToolCalls {
+			allText.WriteString(toolCall.ID)
+			allText.WriteString(" ")
+			allText.WriteString(toolCall.Function.Name)
+			allText.WriteString(" ")
+			allText.WriteString(toolCall.Function.Arguments)
+			allText.WriteString(" ")
+		}
+	}
+
+	normalized := strings.Join(strings.Fields(allText.String()), " ")
+	words := strings.Fields(normalized)
+	return len(words)
+}
+
+func (c *ChatCompletionClient) sendAsyncError(errChan chan<- error, err error) {
+	if err == nil {
+		return
+	}
+
+	select {
+	case errChan <- err:
+	default:
+	}
+}
+
+func (c *ChatCompletionClient) BaseURL() string {
+	return c.baseURL
+}
+
+func normalizeBaseURL(baseURL string) string {
+	trimmed := strings.TrimSpace(baseURL)
+	trimmed = strings.TrimRight(trimmed, "/")
+	return trimmed
+}
+
+func statusCode(resp *resty.Response) int {
+	if resp == nil {
+		return 0
+	}
+	return resp.StatusCode()
+}
diff --git a/services/llm-api/internal/utils/httpclients/chat/chat_model_client.go b/services/llm-api/internal/utils/httpclients/chat/chat_model_client.go
new file mode 100644
index 00000000..f77e8303
--- /dev/null
+++ b/services/llm-api/internal/utils/httpclients/chat/chat_model_client.go
@@ -0,0 +1,136 @@
+package chat
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"strings"
+
+	"jan-server/services/llm-api/internal/utils/platformerrors"
+
+	"resty.dev/v3"
+)
+
+type ChatModelClient struct {
+	client  *resty.Client
+	baseURL string
+	name    string
+}
+
+type ModelsResponse struct {
+	Object string  `json:"object"`
+	Data   []Model `json:"data"`
+}
+
+type Model struct {
+	ID            string         `json:"id"`
+	Object        string         `json:"object"`
+	OwnedBy       string         `json:"owned_by"`
+	Created       int            `json:"created"`
+	DisplayName   string         `json:"display_name"`
+	Name          string         `json:"name"`
+	CanonicalSlug string         `json:"canonical_slug"`
+	Raw           map[string]any `json:"-"`
+}
+
+func (m *Model) UnmarshalJSON(data []byte) error {
+	type Alias Model
+	aux := Alias{}
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+	var raw map[string]any
+	if err := json.Unmarshal(data, &raw); err != nil {
+		return err
+	}
+	*m = Model(aux)
+	m.Raw = raw
+	if m.DisplayName == "" {
+		if display, ok := raw["display_name"].(string); ok && display != "" {
+			m.DisplayName = display
+		} else if name, ok := raw["name"].(string); ok && name != "" {
+			m.DisplayName = name
+		} else {
+			m.DisplayName = m.ID
+		}
+	}
+	if m.Name == "" {
+		if name, ok := raw["name"].(string); ok {
+			m.Name = name
+		}
+	}
+	if m.OwnedBy == "" {
+		if ownedBy, ok := raw["owned_by"].(string); ok {
+			m.OwnedBy = ownedBy
+		}
+	}
+	if m.CanonicalSlug == "" {
+		if slug, ok := raw["canonical_slug"].(string); ok {
+			m.CanonicalSlug = slug
+		}
+	}
+	if created, ok := raw["created"]; ok {
+		if createdInt, castOK := created.(float64); castOK {
+			m.Created = int(createdInt)
+		} else if createdInt, castOK := created.(int); castOK {
+			m.Created = createdInt
+		}
+	}
+	return nil
+}
+
+func NewChatModelClient(client *resty.Client, name, baseURL string) *ChatModelClient {
+	return &ChatModelClient{
+		client:  client,
+		baseURL: normalizeBaseURL(baseURL),
+		name:    name,
+	}
+}
+
+func (c *ChatModelClient) ListModels(ctx context.Context) (*ModelsResponse, error) {
+	var respBody ModelsResponse
+	resp, err := c.client.R().
+		SetContext(ctx).
+		SetResult(&respBody).
+		Get(c.endpoint("/models"))
+	if err != nil {
+		return nil, err
+	}
+	if resp.IsError() {
+		return nil, c.errorFromResponse(ctx, resp, "list models request failed")
+	}
+	return &respBody, nil
+}
+
+func (c *ChatModelClient) endpoint(path string) string {
+	if path == "" {
+		return c.baseURL
+	}
+	if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") {
+		return path
+	}
+	if c.baseURL == "" {
+		return path
+	}
+	if strings.HasPrefix(path, "/") {
+		return c.baseURL + path
+	}
+	return c.baseURL + "/" + path
+}
+
+func (c *ChatModelClient) errorFromResponse(ctx context.Context, resp *resty.Response, message string) error {
+	if resp == nil || resp.RawResponse == nil || resp.RawResponse.Body == nil {
+		return platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeExternal, fmt.Sprintf("%s with status %d", message, statusCode(resp)), nil, "f4ea9b1a-e011-47f5-8704-4552e4901532")
+	}
+	defer resp.RawResponse.Body.Close()
+	body, err := io.ReadAll(resp.RawResponse.Body)
+	if err != nil {
+		return platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeExternal, fmt.Sprintf("%s with status %d", message, statusCode(resp)), nil, "bb39f602-d488-4ed2-89ef-0c37b24ebe0e")
+	}
+	trimmed := strings.TrimSpace(string(body))
+	if trimmed == "" {
+		return platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeExternal, fmt.Sprintf("%s with status %d", message, statusCode(resp)), nil, "0d526244-69a7-4d93-82f5-8bfbcb3dbf57")
+	}
+	return platformerrors.NewError(ctx, platformerrors.LayerDomain, platformerrors.ErrorTypeExternal, fmt.Sprintf("%s with status %d: %s", message, statusCode(resp), trimmed), nil, "1d3cd5df-956e-46e7-80de-8dca838b91eb")
+}
diff --git a/services/llm-api/internal/utils/httpclients/resty.go b/services/llm-api/internal/utils/httpclients/resty.go
new file mode 100644
index 00000000..279cdedc
--- /dev/null
+++ b/services/llm-api/internal/utils/httpclients/resty.go
@@ -0,0 +1,55 @@
+package httpclients
+
+import (
+	"context"
+	"time"
+
+	"jan-server/services/llm-api/internal/infrastructure/logger"
+
+	"resty.dev/v3"
+)
+
+type RequestID struct{}
+type HTTPClientStartsAt struct{}
+type HTTPClientRequestBody struct{}
+
+func NewClient(clientName string) *resty.Client {
+	client := resty.New()
+	client.AddRequestMiddleware(func(c *resty.Client, r *resty.Request) error {
+		start := time.Now()
+		ctx := context.WithValue(r.Context(), HTTPClientStartsAt{}, start)
+		ctx = context.WithValue(ctx, HTTPClientRequestBody{}, r.Body)
+		r.SetContext(ctx)
+		return nil
+	})
+	client.AddResponseMiddleware(func(c *resty.Client, r *resty.Response) error {
+		log := logger.GetLogger()
+		requestID := r.Request.Context().Value(RequestID{})
+		startTime, _ := r.Request.Context().Value(HTTPClientStartsAt{}).(time.Time)
+		requestBody := r.Request.Context().Value(HTTPClientRequestBody{})
+		latency := time.Since(startTime)
+		var responseBody any
+		if !r.Request.DoNotParseResponse {
+			responseBody = r.Result()
+		}
+
+		requestIDStr := ""
+		if reqID, ok := requestID.(string); ok {
+			requestIDStr = reqID
+		}
+
+		log.Info().
+			Str("request_id", requestIDStr).
+			Str("client", clientName).
+			Int("status", r.StatusCode()).
+			Str("method", r.Request.RawRequest.Method).
+			Str("path", r.Request.RawRequest.URL.Path).
+			Str("query", r.Request.RawRequest.URL.RawQuery).
+			Interface("req_body", requestBody).
+			Interface("resp_body", responseBody).
+			Dur("latency", latency).
+			Msg("HTTP client request")
+		return nil
+	})
+	return client
+}
diff --git a/apps/jan-api-gateway/application/app/utils/idgen/generator.go b/services/llm-api/internal/utils/idgen/generator.go
similarity index 89%
rename from apps/jan-api-gateway/application/app/utils/idgen/generator.go
rename to services/llm-api/internal/utils/idgen/generator.go
index 59e8c04d..139d3d57 100644
--- a/apps/jan-api-gateway/application/app/utils/idgen/generator.go
+++ b/services/llm-api/internal/utils/idgen/generator.go
@@ -7,8 +7,6 @@ import (
 	"encoding/hex"
 	"fmt"
 	"strings"
-
-	"menlo.ai/jan-api-gateway/config/environment_variables"
 )
 
 // GenerateSecureID generates a cryptographically secure ID with the given prefix and length
@@ -52,13 +50,12 @@ func ValidateIDFormat(id, expectedPrefix string) bool {
 			return false
 		}
 	}
-
 	return true
 }
 
-func HashKey(key string) string {
-	h := hmac.New(sha256.New, []byte(environment_variables.EnvironmentVariables.APIKEY_SECRET))
+// HashKey256 generates HMAC-SHA256 hash of a key with a secret
+func HashKey256(key string, secret []byte) string {
+	h := hmac.New(sha256.New, secret)
 	h.Write([]byte(key))
-
 	return hex.EncodeToString(h.Sum(nil))
 }
diff --git a/services/llm-api/internal/utils/platformerrors/errors.go b/services/llm-api/internal/utils/platformerrors/errors.go
new file mode 100644
index 00000000..d47c0709
--- /dev/null
+++ b/services/llm-api/internal/utils/platformerrors/errors.go
@@ -0,0 +1,210 @@
+package platformerrors
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+)
+
+// getRequestIDFromContext extracts request ID from context
+func getRequestIDFromContext(ctx context.Context) string {
+	val := ctx.Value("requestID")
+	if requestID, ok := val.(string); ok {
+		return requestID
+	}
+	return ""
+}
+
+// ErrorType represents the category of error
+type ErrorType string
+
+const (
+	ErrorTypeNotFound       ErrorType = "NOT_FOUND"
+	ErrorTypeTooManyRecords ErrorType = "TOO_MANY_RECORDS"
+	ErrorTypeValidation     ErrorType = "VALIDATION"
+	ErrorTypeConflict       ErrorType = "CONFLICT"
+	ErrorTypeUnauthorized   ErrorType = "UNAUTHORIZED"
+	ErrorTypeForbidden      ErrorType = "FORBIDDEN"
+	ErrorTypeInternal       ErrorType = "INTERNAL"
+	ErrorTypeExternal       ErrorType = "EXTERNAL"
+	ErrorTypeDatabaseError  ErrorType = "DATABASE_ERROR"
+	ErrorTypeNotImplemented ErrorType = "NOT_IMPLEMENTED"
+)
+
+// Layer represents the application layer where the error occurred
+type Layer string
+
+const (
+	LayerRepository     Layer = "repository"
+	LayerDomain         Layer = "domain"
+	LayerHandler        Layer = "handler"
+	LayerRoute          Layer = "route"
+	LayerInfrastructure Layer = "infrastructure"
+	LayerCommon         Layer = "common"
+)
+
+// PlatformError represents an error with context and metadata
+type PlatformError struct {
+	UUID      string
+	Type      ErrorType
+	Message   string
+	Err       error
+	Context   map[string]any
+	RequestID string
+	Layer     Layer
+	Timestamp time.Time
+}
+
+// Error implements the error interface
+func (e *PlatformError) Error() string {
+	if e.Err != nil {
+		return fmt.Sprintf("[%s][%s][%s] %s: %v", e.Layer, e.Type, e.UUID, e.Message, e.Err)
+	}
+	return fmt.Sprintf("[%s][%s][%s] %s", e.Layer, e.Type, e.UUID, e.Message)
+}
+
+// Unwrap returns the underlying error
+func (e *PlatformError) Unwrap() error {
+	return e.Err
+}
+
+// GetErrorType returns the error type
+func (e *PlatformError) GetErrorType() ErrorType {
+	return e.Type
+}
+
+// GetRequestID returns the request ID
+func (e *PlatformError) GetRequestID() string {
+	return e.RequestID
+}
+
+// GetUUID returns the error UUID
+func (e *PlatformError) GetUUID() string {
+	return e.UUID
+}
+
+// NewError creates a new PlatformError with the specified parameters
+func NewError(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string) *PlatformError {
+	return NewErrorWithContext(ctx, layer, errorType, message, err, customUUID, nil)
+}
+
+// NewErrorWithContext creates a new PlatformError with additional context fields
+func NewErrorWithContext(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string, contextFields map[string]any) *PlatformError {
+	requestID := getRequestIDFromContext(ctx)
+
+	errorUUID := customUUID
+	if errorUUID == "" {
+		errorUUID = "auto-generated-uuid"
+	}
+
+	errorContext := make(map[string]any)
+	for k, v := range contextFields {
+		errorContext[k] = v
+	}
+
+	platformError := &PlatformError{
+		UUID:      errorUUID,
+		Type:      errorType,
+		Message:   message,
+		Err:       err,
+		RequestID: requestID,
+		Layer:     layer,
+		Timestamp: time.Now().UTC(),
+		Context:   errorContext,
+	}
+
+	return platformError
+}
+
+// AsError wraps an error with layer context
+func AsError(ctx context.Context, layer Layer, err error, message string) *PlatformError {
+	if err == nil {
+		return nil
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return NewError(ctx, layer, platformErr.Type, fmt.Sprintf("%s: %s", message, platformErr.Message), platformErr, platformErr.UUID)
+	}
+
+	errorType := ErrorTypeInternal
+	if errors.Is(err, gorm.ErrRecordNotFound) {
+		errorType = ErrorTypeNotFound
+	}
+
+	return NewError(ctx, layer, errorType, message, err, "")
+}
+
+// ErrorTypeToHTTPStatus maps error types to HTTP status codes
+func ErrorTypeToHTTPStatus(errorType ErrorType) int {
+	switch errorType {
+	case ErrorTypeNotFound:
+		return http.StatusNotFound
+	case ErrorTypeValidation:
+		return http.StatusBadRequest
+	case ErrorTypeConflict:
+		return http.StatusConflict
+	case ErrorTypeUnauthorized:
+		return http.StatusUnauthorized
+	case ErrorTypeForbidden:
+		return http.StatusForbidden
+	case ErrorTypeNotImplemented:
+		return http.StatusNotImplemented
+	case ErrorTypeTooManyRecords:
+		return http.StatusInternalServerError
+	case ErrorTypeDatabaseError:
+		return http.StatusInternalServerError
+	case ErrorTypeExternal:
+		return http.StatusBadGateway
+	case ErrorTypeInternal:
+		fallthrough
+	default:
+		return http.StatusInternalServerError
+	}
+}
+
+// IsErrorType checks if an error is a PlatformError with the specified type
+func IsErrorType(err error, errorType ErrorType) bool {
+	if err == nil {
+		return false
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return platformErr.Type == errorType
+	}
+
+	return false
+}
+
+// LogError logs a platform error with proper structure
+func LogError(logger zerolog.Logger, err *PlatformError) {
+	if err == nil {
+		return
+	}
+
+	event := logger.Error().
+		Str("error_uuid", err.UUID).
+		Str("error_type", string(err.Type)).
+		Str("layer", string(err.Layer)).
+		Time("timestamp_utc", err.Timestamp)
+
+	if err.RequestID != "" {
+		event = event.Str("request_id", err.RequestID)
+	}
+
+	for k, v := range err.Context {
+		event = event.Interface(k, v)
+	}
+
+	if err.Err != nil {
+		event = event.Err(err.Err)
+	}
+
+	event.Msg(err.Message)
+}
diff --git a/apps/jan-api-gateway/application/app/utils/ptr/pointer.go b/services/llm-api/internal/utils/ptr/ptr.go
similarity index 65%
rename from apps/jan-api-gateway/application/app/utils/ptr/pointer.go
rename to services/llm-api/internal/utils/ptr/ptr.go
index 4fbc5fdb..d53cd0d6 100644
--- a/apps/jan-api-gateway/application/app/utils/ptr/pointer.go
+++ b/services/llm-api/internal/utils/ptr/ptr.go
@@ -26,10 +26,6 @@ func ToTime(b time.Time) *time.Time {
 	return &b
 }
 
-// FromString safely dereferences a string pointer, returning empty string if nil
-func FromString(s *string) string {
-	if s == nil {
-		return ""
-	}
-	return *s
+func ToFloat64(f float64) *float64 {
+	return &f
 }
diff --git a/services/llm-api/migrations/000001_init_schema.down.sql b/services/llm-api/migrations/000001_init_schema.down.sql
new file mode 100644
index 00000000..a7eb0fef
--- /dev/null
+++ b/services/llm-api/migrations/000001_init_schema.down.sql
@@ -0,0 +1,28 @@
+-- Drop triggers first
+DROP TRIGGER IF EXISTS api_keys_updated_at ON llm_api.api_keys;
+DROP TRIGGER IF EXISTS conversation_items_updated_at ON llm_api.conversation_items;
+DROP TRIGGER IF EXISTS conversation_branches_updated_at ON llm_api.conversation_branches;
+DROP TRIGGER IF EXISTS conversations_updated_at ON llm_api.conversations;
+DROP TRIGGER IF EXISTS models_updated_at ON llm_api.models;
+DROP TRIGGER IF EXISTS provider_models_updated_at ON llm_api.provider_models;
+DROP TRIGGER IF EXISTS model_catalogs_updated_at ON llm_api.model_catalogs;
+DROP TRIGGER IF EXISTS providers_updated_at ON llm_api.providers;
+DROP TRIGGER IF EXISTS users_updated_at ON llm_api.users;
+
+-- Drop trigger function
+DROP FUNCTION IF EXISTS update_updated_at_column();
+
+-- Drop tables in reverse order (respecting foreign keys)
+DROP TABLE IF EXISTS llm_api.api_keys;
+DROP TABLE IF EXISTS llm_api.conversation_items;
+DROP TABLE IF EXISTS llm_api.conversation_branches;
+DROP TABLE IF EXISTS llm_api.conversations;
+DROP TABLE IF EXISTS llm_api.models;
+DROP TABLE IF EXISTS llm_api.provider_models;
+DROP TABLE IF EXISTS llm_api.model_catalogs;
+DROP TABLE IF EXISTS llm_api.providers;
+DROP TABLE IF EXISTS llm_api.users;
+
+-- Drop schema
+DROP SCHEMA IF EXISTS llm_api CASCADE;
+
diff --git a/services/llm-api/migrations/000001_init_schema.up.sql b/services/llm-api/migrations/000001_init_schema.up.sql
new file mode 100644
index 00000000..69363438
--- /dev/null
+++ b/services/llm-api/migrations/000001_init_schema.up.sql
@@ -0,0 +1,291 @@
+CREATE SCHEMA IF NOT EXISTS llm_api;
+
+-- Set search path to llm_api schema
+SET search_path TO llm_api;
+
+-- ============================================================================
+-- USERS
+-- ============================================================================
+CREATE TABLE llm_api.users (
+    id SERIAL PRIMARY KEY,
+    auth_provider VARCHAR(50) NOT NULL DEFAULT 'keycloak',
+    issuer VARCHAR(255) NOT NULL,
+    subject VARCHAR(255) NOT NULL,
+    username VARCHAR(150),
+    email VARCHAR(320),
+    name VARCHAR(255),
+    picture VARCHAR(512),
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    deleted_at TIMESTAMPTZ,
+    CONSTRAINT ux_users_issuer_subject UNIQUE (issuer, subject)
+);
+
+CREATE INDEX idx_users_deleted_at ON llm_api.users(deleted_at);
+
+-- ============================================================================
+-- PROVIDERS
+-- ============================================================================
+CREATE TABLE llm_api.providers (
+    id SERIAL PRIMARY KEY,
+    public_id VARCHAR(64) NOT NULL,
+    display_name VARCHAR(255) NOT NULL,
+    kind VARCHAR(64) NOT NULL,
+    base_url VARCHAR(512),
+    encrypted_api_key TEXT,
+    api_key_hint VARCHAR(128),
+    is_moderated BOOLEAN NOT NULL DEFAULT false,
+    active BOOLEAN NOT NULL DEFAULT true,
+    metadata JSONB,
+    last_synced_at TIMESTAMPTZ,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    deleted_at TIMESTAMPTZ,
+    CONSTRAINT providers_public_id_unique UNIQUE (public_id)
+);
+
+CREATE INDEX idx_providers_kind ON llm_api.providers(kind);
+CREATE INDEX idx_providers_is_moderated ON llm_api.providers(is_moderated);
+CREATE INDEX idx_providers_active ON llm_api.providers(active);
+CREATE INDEX idx_providers_active_kind ON llm_api.providers(active, kind);
+CREATE INDEX idx_providers_deleted_at ON llm_api.providers(deleted_at);
+
+-- ============================================================================
+-- MODEL CATALOG
+-- ============================================================================
+CREATE TABLE llm_api.model_catalogs (
+    id SERIAL PRIMARY KEY,
+    public_id VARCHAR(64) NOT NULL,
+    supported_parameters JSONB NOT NULL,
+    architecture JSONB NOT NULL,
+    tags JSONB,
+    notes TEXT,
+    is_moderated BOOLEAN,
+    active BOOLEAN DEFAULT true,
+    status VARCHAR(32) NOT NULL DEFAULT 'init',
+    extras JSONB,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    deleted_at TIMESTAMPTZ,
+    CONSTRAINT model_catalogs_public_id_unique UNIQUE (public_id)
+);
+
+CREATE INDEX idx_model_catalogs_is_moderated ON llm_api.model_catalogs(is_moderated);
+CREATE INDEX idx_model_catalogs_active ON llm_api.model_catalogs(active);
+CREATE INDEX idx_model_catalogs_status ON llm_api.model_catalogs(status);
+CREATE INDEX idx_model_catalogs_status_active ON llm_api.model_catalogs(status, active);
+CREATE INDEX idx_model_catalogs_deleted_at ON llm_api.model_catalogs(deleted_at);
+
+-- ============================================================================
+-- PROVIDER MODELS
+-- ============================================================================
+CREATE TABLE llm_api.provider_models (
+    id SERIAL PRIMARY KEY,
+    provider_id INTEGER NOT NULL,
+    public_id VARCHAR(64) NOT NULL,
+    kind VARCHAR(64) NOT NULL,
+    model_catalog_id INTEGER,
+    model_public_id VARCHAR(128) NOT NULL,
+    provider_original_model_id VARCHAR(255) NOT NULL,
+    display_name VARCHAR(255) NOT NULL,
+    pricing JSONB NOT NULL,
+    token_limits JSONB,
+    family VARCHAR(128),
+    supports_images BOOLEAN NOT NULL DEFAULT false,
+    supports_embeddings BOOLEAN NOT NULL DEFAULT false,
+    supports_reasoning BOOLEAN NOT NULL DEFAULT false,
+    supports_audio BOOLEAN NOT NULL DEFAULT false,
+    supports_video BOOLEAN NOT NULL DEFAULT false,
+    active BOOLEAN NOT NULL DEFAULT true,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    deleted_at TIMESTAMPTZ,
+    CONSTRAINT provider_models_public_id_unique UNIQUE (public_id),
+    CONSTRAINT ux_provider_model_public_id UNIQUE (provider_id, model_public_id),
+    CONSTRAINT fk_provider_models_provider FOREIGN KEY (provider_id) REFERENCES llm_api.providers(id) ON DELETE CASCADE,
+    CONSTRAINT fk_provider_models_model_catalog FOREIGN KEY (model_catalog_id) REFERENCES llm_api.model_catalogs(id) ON DELETE SET NULL
+);
+
+CREATE INDEX idx_provider_models_provider_id ON llm_api.provider_models(provider_id);
+CREATE INDEX idx_provider_models_kind ON llm_api.provider_models(kind);
+CREATE INDEX idx_provider_models_model_catalog_id ON llm_api.provider_models(model_catalog_id);
+CREATE INDEX idx_provider_models_model_public_id ON llm_api.provider_models(model_public_id);
+CREATE INDEX idx_provider_models_active ON llm_api.provider_models(active);
+CREATE INDEX idx_provider_models_provider_active ON llm_api.provider_models(provider_id, active);
+CREATE INDEX idx_provider_models_catalog_active ON llm_api.provider_models(model_catalog_id, active);
+CREATE INDEX idx_provider_models_deleted_at ON llm_api.provider_models(deleted_at);
+
+-- ============================================================================
+-- MODELS (Legacy table)
+-- ============================================================================
+CREATE TABLE llm_api.models (
+    id VARCHAR(255) PRIMARY KEY,
+    provider VARCHAR(255) NOT NULL,
+    display_name VARCHAR(255) NOT NULL,
+    family VARCHAR(255),
+    capabilities JSONB,
+    active BOOLEAN NOT NULL DEFAULT true,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+-- ============================================================================
+-- CONVERSATIONS
+-- ============================================================================
+CREATE TABLE llm_api.conversations (
+    id SERIAL PRIMARY KEY,
+    public_id VARCHAR(50) NOT NULL,
+    object VARCHAR(50) NOT NULL DEFAULT 'conversation',
+    title VARCHAR(256),
+    user_id INTEGER NOT NULL,
+    status VARCHAR(20) NOT NULL DEFAULT 'active',
+    active_branch VARCHAR(50) NOT NULL DEFAULT 'MAIN',
+    referrer VARCHAR(100),
+    metadata JSONB,
+    is_private BOOLEAN DEFAULT false,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    deleted_at TIMESTAMPTZ,
+    CONSTRAINT conversations_public_id_unique UNIQUE (public_id),
+    CONSTRAINT fk_conversations_user FOREIGN KEY (user_id) REFERENCES llm_api.users(id)
+);
+
+CREATE INDEX idx_conversations_user_id_referrer ON llm_api.conversations(user_id, referrer);
+CREATE INDEX idx_conversations_user_id_status ON llm_api.conversations(user_id, status);
+CREATE INDEX idx_conversations_deleted_at ON llm_api.conversations(deleted_at);
+
+-- ============================================================================
+-- CONVERSATION BRANCHES
+-- ============================================================================
+CREATE TABLE llm_api.conversation_branches (
+    id SERIAL PRIMARY KEY,
+    conversation_id INTEGER NOT NULL,
+    name VARCHAR(50) NOT NULL,
+    description TEXT,
+    parent_branch VARCHAR(50),
+    forked_at TIMESTAMPTZ,
+    forked_from_item_id VARCHAR(50),
+    item_count INTEGER DEFAULT 0,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    deleted_at TIMESTAMPTZ,
+    CONSTRAINT idx_conversation_branch_name UNIQUE (conversation_id, name),
+    CONSTRAINT fk_conversation_branches_conversation FOREIGN KEY (conversation_id) REFERENCES llm_api.conversations(id) ON DELETE CASCADE
+);
+
+CREATE INDEX idx_conversation_branches_deleted_at ON llm_api.conversation_branches(deleted_at);
+
+-- ============================================================================
+-- CONVERSATION ITEMS
+-- ============================================================================
+CREATE TABLE llm_api.conversation_items (
+    id SERIAL PRIMARY KEY,
+    conversation_id INTEGER NOT NULL,
+    public_id VARCHAR(50) NOT NULL,
+    object VARCHAR(50) NOT NULL DEFAULT 'conversation.item',
+    branch VARCHAR(50) NOT NULL DEFAULT 'MAIN',
+    sequence_number INTEGER NOT NULL,
+    type VARCHAR(50) NOT NULL,
+    role VARCHAR(20),
+    content JSONB,
+    status VARCHAR(20),
+    incomplete_at TIMESTAMPTZ,
+    incomplete_details JSONB,
+    completed_at TIMESTAMPTZ,
+    response_id INTEGER,
+    rating VARCHAR(10),
+    rated_at TIMESTAMPTZ,
+    rating_comment TEXT,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    deleted_at TIMESTAMPTZ,
+    CONSTRAINT conversation_items_public_id_unique UNIQUE (public_id),
+    CONSTRAINT fk_conversation_items_conversation FOREIGN KEY (conversation_id) REFERENCES llm_api.conversations(id) ON DELETE CASCADE
+);
+
+CREATE INDEX idx_conversation_items_conversation_id_branch ON llm_api.conversation_items(conversation_id, branch);
+CREATE INDEX idx_conversation_items_conversation_id_sequence ON llm_api.conversation_items(conversation_id, sequence_number);
+CREATE INDEX idx_conversation_items_response_id ON llm_api.conversation_items(response_id);
+CREATE INDEX idx_conversation_items_deleted_at ON llm_api.conversation_items(deleted_at);
+
+-- ============================================================================
+-- API KEYS
+-- ============================================================================
+CREATE TABLE llm_api.api_keys (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    user_id INTEGER NOT NULL,
+    name VARCHAR(128) NOT NULL,
+    prefix VARCHAR(32) NOT NULL,
+    suffix VARCHAR(8) NOT NULL,
+    hash VARCHAR(128) NOT NULL,
+    expires_at TIMESTAMPTZ NOT NULL,
+    revoked_at TIMESTAMPTZ,
+    last_used_at TIMESTAMPTZ,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    CONSTRAINT fk_api_keys_user FOREIGN KEY (user_id) REFERENCES llm_api.users(id) ON DELETE CASCADE
+);
+
+CREATE INDEX idx_api_keys_user_id ON llm_api.api_keys(user_id);
+CREATE INDEX idx_api_keys_expires_at ON llm_api.api_keys(expires_at);
+CREATE INDEX idx_api_keys_prefix ON llm_api.api_keys(prefix);
+CREATE INDEX idx_api_keys_user_id_revoked_at ON llm_api.api_keys(user_id, revoked_at);
+
+-- ============================================================================
+-- TRIGGERS
+-- ============================================================================
+
+-- Updated_at trigger function
+CREATE OR REPLACE FUNCTION update_updated_at_column()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.updated_at = NOW();
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Apply updated_at triggers
+CREATE TRIGGER users_updated_at
+    BEFORE UPDATE ON llm_api.users
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+CREATE TRIGGER providers_updated_at
+    BEFORE UPDATE ON llm_api.providers
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+CREATE TRIGGER model_catalogs_updated_at
+    BEFORE UPDATE ON llm_api.model_catalogs
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+CREATE TRIGGER provider_models_updated_at
+    BEFORE UPDATE ON llm_api.provider_models
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+CREATE TRIGGER models_updated_at
+    BEFORE UPDATE ON llm_api.models
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+CREATE TRIGGER conversations_updated_at
+    BEFORE UPDATE ON llm_api.conversations
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+CREATE TRIGGER conversation_branches_updated_at
+    BEFORE UPDATE ON llm_api.conversation_branches
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+CREATE TRIGGER conversation_items_updated_at
+    BEFORE UPDATE ON llm_api.conversation_items
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+CREATE TRIGGER api_keys_updated_at
+    BEFORE UPDATE ON llm_api.api_keys
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
diff --git a/services/llm-api/migrations/000002_create_projects.down.sql b/services/llm-api/migrations/000002_create_projects.down.sql
new file mode 100644
index 00000000..1af82e7c
--- /dev/null
+++ b/services/llm-api/migrations/000002_create_projects.down.sql
@@ -0,0 +1,15 @@
+-- Remove indexes
+DROP INDEX IF EXISTS idx_conversations_project_updated_at;
+DROP INDEX IF EXISTS idx_projects_archived_at;
+DROP INDEX IF EXISTS idx_projects_deleted_at;
+DROP INDEX IF EXISTS idx_projects_user_updated_at;
+DROP INDEX IF EXISTS idx_projects_user_id;
+
+-- Remove columns from conversations
+ALTER TABLE conversations
+    DROP COLUMN IF EXISTS effective_instruction_snapshot,
+    DROP COLUMN IF EXISTS instruction_version,
+    DROP COLUMN IF EXISTS project_id;
+
+-- Drop projects table
+DROP TABLE IF EXISTS projects;
diff --git a/services/llm-api/migrations/000002_create_projects.up.sql b/services/llm-api/migrations/000002_create_projects.up.sql
new file mode 100644
index 00000000..3557ed1d
--- /dev/null
+++ b/services/llm-api/migrations/000002_create_projects.up.sql
@@ -0,0 +1,34 @@
+-- Create projects table
+CREATE TABLE IF NOT EXISTS projects (
+    id BIGSERIAL PRIMARY KEY,
+    public_id VARCHAR(64) NOT NULL UNIQUE,
+    user_id BIGINT NOT NULL,
+    name VARCHAR(255) NOT NULL,
+    instruction TEXT,
+    favorite BOOLEAN NOT NULL DEFAULT false,
+    archived_at TIMESTAMPTZ,
+    deleted_at TIMESTAMPTZ,
+    last_used_at TIMESTAMPTZ,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+    CONSTRAINT uq_projects_user_name UNIQUE (user_id, name)
+);
+
+CREATE INDEX idx_projects_user_id ON projects(user_id);
+CREATE INDEX idx_projects_user_updated_at ON projects(user_id, updated_at DESC);
+CREATE INDEX idx_projects_deleted_at ON projects(deleted_at);
+CREATE INDEX idx_projects_archived_at ON projects(archived_at);
+
+-- Update conversations table
+ALTER TABLE conversations
+    ADD COLUMN IF NOT EXISTS project_id BIGINT REFERENCES projects(id) ON DELETE SET NULL,
+    ADD COLUMN IF NOT EXISTS instruction_version INT NOT NULL DEFAULT 1,
+    ADD COLUMN IF NOT EXISTS effective_instruction_snapshot TEXT;
+
+CREATE INDEX IF NOT EXISTS idx_conversations_project_updated_at 
+    ON conversations(project_id, updated_at DESC);
+
+COMMENT ON TABLE projects IS 'Projects for grouping conversations and inheriting instructions';
+COMMENT ON COLUMN conversations.project_id IS 'Optional project grouping';
+COMMENT ON COLUMN conversations.instruction_version IS 'Version of project instruction when conversation was created';
+COMMENT ON COLUMN conversations.effective_instruction_snapshot IS 'Snapshot of merged instruction for reproducibility';
diff --git a/services/llm-api/migrations/000003_add_api_keys_hash_index.down.sql b/services/llm-api/migrations/000003_add_api_keys_hash_index.down.sql
new file mode 100644
index 00000000..023c72ec
--- /dev/null
+++ b/services/llm-api/migrations/000003_add_api_keys_hash_index.down.sql
@@ -0,0 +1,2 @@
+-- Remove index on api_keys.hash
+DROP INDEX IF EXISTS idx_api_keys_hash;
diff --git a/services/llm-api/migrations/000003_add_api_keys_hash_index.up.sql b/services/llm-api/migrations/000003_add_api_keys_hash_index.up.sql
new file mode 100644
index 00000000..c56519bd
--- /dev/null
+++ b/services/llm-api/migrations/000003_add_api_keys_hash_index.up.sql
@@ -0,0 +1,2 @@
+-- Add index on api_keys.hash for fast API key lookups
+CREATE INDEX IF NOT EXISTS idx_api_keys_hash ON api_keys(hash);
diff --git a/services/llm-api/migrations/000004_add_project_public_id_to_conversations.down.sql b/services/llm-api/migrations/000004_add_project_public_id_to_conversations.down.sql
new file mode 100644
index 00000000..d28d2c92
--- /dev/null
+++ b/services/llm-api/migrations/000004_add_project_public_id_to_conversations.down.sql
@@ -0,0 +1,5 @@
+-- Remove project_public_id column from conversations table
+DROP INDEX IF EXISTS llm_api.idx_conversations_project_public_id;
+
+ALTER TABLE llm_api.conversations
+    DROP COLUMN IF EXISTS project_public_id;
diff --git a/services/llm-api/migrations/000004_add_project_public_id_to_conversations.up.sql b/services/llm-api/migrations/000004_add_project_public_id_to_conversations.up.sql
new file mode 100644
index 00000000..a032cceb
--- /dev/null
+++ b/services/llm-api/migrations/000004_add_project_public_id_to_conversations.up.sql
@@ -0,0 +1,9 @@
+-- Add project_public_id column to conversations table
+ALTER TABLE llm_api.conversations
+    ADD COLUMN IF NOT EXISTS project_public_id VARCHAR(64);
+
+-- Add index for project_public_id
+CREATE INDEX IF NOT EXISTS idx_conversations_project_public_id 
+    ON llm_api.conversations(project_public_id);
+
+COMMENT ON COLUMN llm_api.conversations.project_public_id IS 'Public ID of the project this conversation belongs to';
diff --git a/services/llm-api/migrations/000005_create_user_settings.down.sql b/services/llm-api/migrations/000005_create_user_settings.down.sql
new file mode 100644
index 00000000..4896a13c
--- /dev/null
+++ b/services/llm-api/migrations/000005_create_user_settings.down.sql
@@ -0,0 +1,2 @@
+-- Drop user_settings table
+DROP TABLE IF EXISTS llm_api.user_settings;
diff --git a/services/llm-api/migrations/000005_create_user_settings.up.sql b/services/llm-api/migrations/000005_create_user_settings.up.sql
new file mode 100644
index 00000000..8e66621f
--- /dev/null
+++ b/services/llm-api/migrations/000005_create_user_settings.up.sql
@@ -0,0 +1,58 @@
+-- ============================================================================
+-- USER SETTINGS
+-- ============================================================================
+-- Stores user preferences and feature toggles for personalization and control
+
+CREATE TABLE llm_api.user_settings (
+    id SERIAL PRIMARY KEY,
+    user_id INTEGER NOT NULL,
+    
+    -- Memory Configuration stored as JSON for flexibility
+    memory_config JSONB NOT NULL DEFAULT '{
+        "enabled": true,
+        "observe_enabled": true,
+        "inject_user_core": true,
+        "inject_semantic": true,
+        "inject_episodic": false,
+        "max_user_items": 3,
+        "max_project_items": 5,
+        "max_episodic_items": 3,
+        "min_similarity": 0.75
+    }',
+    
+    -- Profile Settings
+    profile_settings JSONB NOT NULL DEFAULT '{
+        "base_style": "Friendly",
+        "custom_instructions": "",
+        "nick_name": "",
+        "occupation": "",
+        "more_about_you": ""
+    }',
+    
+    -- Advanced Settings
+    advanced_settings JSONB NOT NULL DEFAULT '{
+        "web_search": false,
+        "code_enabled": false
+    }',
+    
+    -- Other Feature Toggles
+    enable_trace BOOLEAN NOT NULL DEFAULT false,
+    enable_tools BOOLEAN NOT NULL DEFAULT true,
+    
+    -- Preferences stored as flexible JSON for future extensions
+    preferences JSONB NOT NULL DEFAULT '{}',
+    
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    
+    CONSTRAINT fk_user_settings_user FOREIGN KEY (user_id) REFERENCES llm_api.users(id) ON DELETE CASCADE,
+    CONSTRAINT ux_user_settings_user_id UNIQUE (user_id)
+);
+
+CREATE INDEX idx_user_settings_user_id ON llm_api.user_settings(user_id);
+
+-- Add helpful comments
+COMMENT ON TABLE llm_api.user_settings IS 'User preferences and feature toggles with JSONB columns for flexible configuration';
+COMMENT ON COLUMN llm_api.user_settings.memory_config IS 'Memory configuration: enabled, auto_inject, observe_enabled, inject flags, retrieval limits, similarity threshold';
+COMMENT ON COLUMN llm_api.user_settings.profile_settings IS 'User profile information: base_style, custom_instructions, nick_name (alias nickname), occupation, more_about_you';
+COMMENT ON COLUMN llm_api.user_settings.advanced_settings IS 'Advanced features: web_search, code_enabled';
diff --git a/services/llm-api/migrations/migrations.go b/services/llm-api/migrations/migrations.go
new file mode 100644
index 00000000..91cca1c3
--- /dev/null
+++ b/services/llm-api/migrations/migrations.go
@@ -0,0 +1,6 @@
+package migrations
+
+import "embed"
+
+//go:embed *.sql
+var FS embed.FS
diff --git a/services/llm-api/opentelemetry.md b/services/llm-api/opentelemetry.md
new file mode 100644
index 00000000..6b8bcf57
--- /dev/null
+++ b/services/llm-api/opentelemetry.md
@@ -0,0 +1,350 @@
+# OpenTelemetry Implementation Guide
+
+This document describes how OpenTelemetry is implemented in the llm-api service and how to use it.
+
+## Overview
+
+The llm-api service uses OpenTelemetry for distributed tracing and metrics collection. Traces and metrics are exported to an OpenTelemetry Collector, which then forwards them to Prometheus (metrics) and Jaeger (traces).
+
+## Architecture
+
+```
+llm-api -> OpenTelemetry SDK -> OTLP Exporter -> OTel Collector -> Prometheus + Jaeger -> Grafana
+```
+
+## Configuration
+
+OpenTelemetry is configured via environment variables:
+
+```bash
+# Enable/disable by providing an endpoint
+OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318  # HTTP endpoint
+# OR
+OTEL_EXPORTER_OTLP_ENDPOINT=otel-collector:4317  # gRPC endpoint (auto-detected)
+
+# Optional: Custom headers
+OTEL_EXPORTER_OTLP_HEADERS=key1=value1,key2=value2
+
+# Service identification
+SERVICE_NAME=llm-api
+SERVICE_NAMESPACE=jan
+ENVIRONMENT=development
+```
+
+## Automatic Instrumentation
+
+### HTTP Request Tracing
+
+All HTTP requests are automatically traced via the `TracingMiddleware`. This middleware:
+
+- Creates a span for each HTTP request
+- Extracts trace context from incoming headers (for distributed tracing)
+- Adds standard HTTP attributes (method, route, status, etc.)
+- Records errors for 4xx and 5xx responses
+- Injects trace context into the request context
+
+**Attributes captured:**
+- `http.method`
+- `http.route`
+- `http.url`
+- `http.target`
+- `http.scheme`
+- `net.host.name`
+- `http.user_agent`
+- `http.client_ip`
+- `http.status_code`
+- `request.id` (if present)
+
+### Logging Integration
+
+The `LoggingMiddleware` automatically correlates logs with traces by adding:
+
+- `trace_id`: OpenTelemetry trace ID
+- `span_id`: Current span ID
+- `request_id`: Request ID from middleware
+
+This allows you to find all logs related to a specific trace in your log aggregation system.
+
+## Manual Instrumentation
+
+### Adding Spans in Business Logic
+
+Use the helper functions from `internal/infrastructure/observability/tracing.go`:
+
+```go
+package mypackage
+
+import (
+    "context"
+    "jan-server/services/llm-api/internal/config"
+    "jan-server/services/llm-api/internal/infrastructure/observability"
+    "go.opentelemetry.io/otel/attribute"
+    "go.opentelemetry.io/otel/codes"
+)
+
+func MyBusinessLogic(ctx context.Context) error {
+    cfg := config.GetGlobal()
+    
+    // Start a new span
+    ctx, span := observability.StartSpan(ctx, cfg.ServiceName, "MyBusinessLogic.ProcessData")
+    defer span.End()
+    
+    // Add custom attributes
+    observability.AddSpanAttributes(ctx,
+        attribute.String("user.id", "12345"),
+        attribute.Int("batch.size", 100),
+    )
+    
+    // Add events
+    observability.AddSpanEvent(ctx, "validation_started")
+    
+    // Do some work...
+    if err := doWork(); err != nil {
+        // Record errors
+        observability.RecordError(ctx, err)
+        return err
+    }
+    
+    // Set success status
+    observability.SetSpanStatus(ctx, codes.Ok, "completed successfully")
+    
+    return nil
+}
+```
+
+### Nested Spans
+
+Create child spans for sub-operations:
+
+```go
+func ProcessBatch(ctx context.Context, items []Item) error {
+    cfg := config.GetGlobal()
+    
+    // Parent span
+    ctx, span := observability.StartSpan(ctx, cfg.ServiceName, "ProcessBatch")
+    defer span.End()
+    
+    observability.AddSpanAttributes(ctx, attribute.Int("batch.size", len(items)))
+    
+    for i, item := range items {
+        // Child span for each item
+        _, itemSpan := observability.StartSpan(ctx, cfg.ServiceName, "ProcessItem")
+        observability.AddSpanAttributes(ctx,
+            attribute.Int("item.index", i),
+            attribute.String("item.id", item.ID),
+        )
+        
+        if err := processItem(ctx, item); err != nil {
+            observability.RecordError(ctx, err)
+            itemSpan.End()
+            continue
+        }
+        
+        itemSpan.End()
+    }
+    
+    return nil
+}
+```
+
+### Adding Trace Context to Logs
+
+Get trace IDs for manual logging:
+
+```go
+import (
+    "jan-server/services/llm-api/internal/infrastructure/logger"
+    "jan-server/services/llm-api/internal/infrastructure/observability"
+)
+
+func MyFunction(ctx context.Context) {
+    log := logger.GetLogger()
+    
+    // Get trace context
+    traceID := observability.GetTraceID(ctx)
+    spanID := observability.GetSpanID(ctx)
+    
+    log.Info().
+        Str("trace_id", traceID).
+        Str("span_id", spanID).
+        Msg("Processing request")
+}
+```
+
+## Example: Tracing a Use Case
+
+```go
+package usecases
+
+import (
+    "context"
+    "jan-server/services/llm-api/internal/config"
+    "jan-server/services/llm-api/internal/domain"
+    "jan-server/services/llm-api/internal/infrastructure/observability"
+    "go.opentelemetry.io/otel/attribute"
+    "go.opentelemetry.io/otel/codes"
+)
+
+type ChatCompletionUseCase struct {
+    repo domain.ConversationRepository
+    cfg  *config.Config
+}
+
+func (uc *ChatCompletionUseCase) Execute(ctx context.Context, req domain.ChatRequest) (*domain.ChatResponse, error) {
+    // Start span for the entire use case
+    ctx, span := observability.StartSpan(ctx, uc.cfg.ServiceName, "ChatCompletionUseCase.Execute")
+    defer span.End()
+    
+    observability.AddSpanAttributes(ctx,
+        attribute.String("model", req.Model),
+        attribute.Int("message.count", len(req.Messages)),
+    )
+    
+    // Database operation (create child span)
+    observability.AddSpanEvent(ctx, "fetching_conversation")
+    ctx, dbSpan := observability.StartSpan(ctx, uc.cfg.ServiceName, "FetchConversation")
+    conversation, err := uc.repo.FindByID(ctx, req.ConversationID)
+    dbSpan.End()
+    
+    if err != nil {
+        observability.RecordError(ctx, err)
+        return nil, err
+    }
+    
+    // LLM call (create child span)
+    observability.AddSpanEvent(ctx, "calling_llm")
+    ctx, llmSpan := observability.StartSpan(ctx, uc.cfg.ServiceName, "LLMInference")
+    observability.AddSpanAttributes(ctx,
+        attribute.String("llm.provider", "vllm"),
+        attribute.String("llm.model", req.Model),
+    )
+    
+    response, err := uc.callLLM(ctx, req)
+    llmSpan.End()
+    
+    if err != nil {
+        observability.RecordError(ctx, err)
+        return nil, err
+    }
+    
+    // Save result
+    ctx, saveSpan := observability.StartSpan(ctx, uc.cfg.ServiceName, "SaveResponse")
+    if err := uc.repo.Save(ctx, response); err != nil {
+        observability.RecordError(ctx, err)
+        saveSpan.End()
+        return nil, err
+    }
+    saveSpan.End()
+    
+    observability.SetSpanStatus(ctx, codes.Ok, "completed")
+    return response, nil
+}
+```
+
+## Viewing Traces
+
+### In Jaeger UI
+
+1. Start the monitoring stack: `make monitor-up`
+2. Navigate to http://localhost:16686
+3. Select service: `llm-api`
+4. Search for traces
+5. Click on a trace to see:
+   - Span timeline
+   - Parent-child relationships
+   - Attributes and events
+   - Errors
+
+### In Grafana
+
+1. Navigate to http://localhost:3331 (admin/admin)
+2. Go to Explore
+3. Select Jaeger datasource
+4. Query traces
+5. Correlate with metrics from Prometheus
+
+## Best Practices
+
+1. **Always defer span.End()**
+   ```go
+   ctx, span := observability.StartSpan(ctx, serviceName, "operation")
+   defer span.End()  // Ensures span is closed even if panic occurs
+   ```
+
+2. **Use meaningful span names**
+   ```go
+   // Good
+   "ChatCompletion.Execute"
+   "UserRepository.FindByID"
+   "LLMProvider.GenerateResponse"
+   
+   // Bad
+   "process"
+   "doStuff"
+   "handler"
+   ```
+
+3. **Add relevant attributes**
+   ```go
+   observability.AddSpanAttributes(ctx,
+       attribute.String("user.id", userID),
+       attribute.String("model.name", model),
+       attribute.Int("batch.size", len(items)),
+       attribute.Bool("cache.hit", cacheHit),
+   )
+   ```
+
+4. **Record errors properly**
+   ```go
+   if err != nil {
+       observability.RecordError(ctx, err)
+       return err
+   }
+   ```
+
+5. **Use events for significant milestones**
+   ```go
+   observability.AddSpanEvent(ctx, "validation_completed")
+   observability.AddSpanEvent(ctx, "cache_miss")
+   observability.AddSpanEvent(ctx, "model_loaded")
+   ```
+
+6. **Don't create spans for trivial operations**
+   - Avoid spans for simple getters/setters
+   - Focus on I/O operations, external calls, and business logic
+
+## Troubleshooting
+
+### No traces appearing in Jaeger
+
+1. Check OTEL_EXPORTER_OTLP_ENDPOINT is set:
+   ```bash
+   docker logs jan-server-llm-api-1 | grep -i otel
+   ```
+
+2. Verify OTel Collector is receiving data:
+   ```bash
+   docker logs jan-server-otel-collector-1
+   ```
+
+3. Check llm-api is creating spans:
+   - Look for trace_id in logs
+   - Verify TracingMiddleware is registered
+
+### Traces not correlating with logs
+
+1. Ensure LoggingMiddleware is after TracingMiddleware
+2. Check that context is properly passed through the call chain
+3. Verify trace_id appears in log output
+
+### High overhead
+
+1. Reduce sampling rate (if needed, add sampler to otel.go)
+2. Avoid creating too many child spans
+3. Use span events instead of separate spans for lightweight operations
+
+## References
+
+- [OpenTelemetry Go Documentation](https://opentelemetry.io/docs/instrumentation/go/)
+- [OpenTelemetry Semantic Conventions](https://opentelemetry.io/docs/specs/semconv/)
+- [Jaeger Documentation](https://www.jaegertracing.io/docs/)
diff --git a/services/llm-api/scripts/tools.go b/services/llm-api/scripts/tools.go
new file mode 100644
index 00000000..d7cfd756
--- /dev/null
+++ b/services/llm-api/scripts/tools.go
@@ -0,0 +1,15 @@
+//go:build tools
+// +build tools
+
+// Package tools tracks tool dependencies for the project
+package tools
+
+import (
+	_ "github.com/google/wire/cmd/wire"
+	_ "github.com/swaggo/swag/cmd/swag"
+	_ "gorm.io/gen"
+)
+
+//go:generate go install github.com/google/wire/cmd/wire
+//go:generate go install github.com/swaggo/swag/cmd/swag
+//go:generate go install gorm.io/gen/tools/gentool@latest
diff --git a/services/llm-api/swagger/swagger.go b/services/llm-api/swagger/swagger.go
new file mode 100644
index 00000000..bb9ef8b7
--- /dev/null
+++ b/services/llm-api/swagger/swagger.go
@@ -0,0 +1,17 @@
+package swagger
+
+import (
+	"os"
+	"path/filepath"
+
+	"github.com/gin-gonic/gin"
+)
+
+// Register exposes the swagger assets directory under /v1/swagger.
+func Register(router *gin.Engine) {
+	assetsDir := os.Getenv("SWAGGER_ASSETS_DIR")
+	if assetsDir == "" {
+		assetsDir = filepath.Join(".", "docs", "openapi")
+	}
+	router.StaticFS("/v1/swagger", gin.Dir(assetsDir, false))
+}
diff --git a/services/mcp-tools/Dockerfile b/services/mcp-tools/Dockerfile
new file mode 100644
index 00000000..4eb4afb3
--- /dev/null
+++ b/services/mcp-tools/Dockerfile
@@ -0,0 +1,29 @@
+ARG GO_VERSION=1.25
+
+FROM golang:${GO_VERSION}-alpine AS builder
+
+WORKDIR /app
+
+# Copy go mod files
+COPY go.mod ./
+RUN go mod download
+
+# Copy source code
+COPY . .
+
+# Build the application
+RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o mcp-tools .
+
+# Runtime stage
+FROM alpine:latest
+
+RUN apk --no-cache add ca-certificates
+
+WORKDIR /app
+
+# Copy the binary from builder
+COPY --from=builder /app/mcp-tools .
+
+EXPOSE 8091
+
+CMD ["./mcp-tools"]
diff --git a/services/mcp-tools/INTEGRATION.md b/services/mcp-tools/INTEGRATION.md
new file mode 100644
index 00000000..f1f5822a
--- /dev/null
+++ b/services/mcp-tools/INTEGRATION.md
@@ -0,0 +1,71 @@
+# MCP Tools Service - Integration Notes
+
+## Current State
+
+The MCP Tools service has been scaffolded with Clean Architecture following platform patterns. However, the `mcp-go` library version 0.7.0 has a different API than expected.
+
+## API Changes in mcp-go v0.7.0
+
+The current version of `github.com/mark3labs/mcp-go` (v0.7.0) has the following differences from platform's reference:
+
+1. **No `WithToolCapabilities` option** - Tools are registered directly, no capability declaration needed
+2. **No `WithRecovery` option** - Recovery middleware doesn't exist
+3. **No `NewStreamableHTTPServer` function** - Use `NewStreamableHTTPServer(server, ...options)` instead
+4. **SSEServer doesn't implement http.Handler** - Has `.Start(addr)` method instead
+5. **CallToolRequest API changed** - No `RequireString`, `GetString` helper methods
+
+## Recommended Approach
+
+Based on mcp-go examples, there are two approaches:
+
+### Option 1: Standalone MCP Server (Recommended)
+
+Don't try to integrate MCP into Gin. Run a separate MCP server:
+
+```go
+func main() {
+    // Create MCP server
+    mcpServer := server.NewMCPServer("mcp-tools", "1.0.0")
+    
+    // Register tools
+    serperMCP.RegisterTools(mcpServer)
+    
+    // Start HTTP server
+    httpServer := server.NewStreamableHTTPServer(mcpServer)
+    httpServer.Start(":8091")
+}
+```
+
+### Option 2: Use stdio transport
+
+For simpler integration with llm-api:
+
+```go
+func main() {
+    mcpServer := server.NewMCPServer("mcp-tools", "1.0.0")
+    serperMCP.RegisterTools(mcpServer)
+    server.ServeStdio(mcpServer)
+}
+```
+
+## Files to Fix
+
+1. **`interfaces/httpserver/routes/serper_mcp.go`**
+   - Fix `CallToolRequest` parameter extraction
+   - Use `request.Params.Arguments` map directly
+   - No helper methods like `RequireString` available
+
+2. **`interfaces/httpserver/routes/mcp_route.go`**
+   - Simplify to just create and start MCP server
+   - Don't try to integrate into Gin router
+
+3. **`main.go`**
+   - Choose between HTTP or stdio transport
+   - Don't mix Gin and MCP servers
+
+## Next Steps
+
+1. Decide on transport: HTTP or stdio
+2. Rewrite tool handlers to use `Params.Arguments` map
+3. Simplify main.go to use one of the recommended approaches
+4. Test with MCP client (stdio or HTTP)
diff --git a/services/mcp-tools/README.md b/services/mcp-tools/README.md
new file mode 100644
index 00000000..31114e0a
--- /dev/null
+++ b/services/mcp-tools/README.md
@@ -0,0 +1,249 @@
+# MCP Tools Service
+
+A standalone **Model Context Protocol (MCP)** service that provides AI models with access to external tools and capabilities.
+
+## Features
+
+- **MCP Protocol Support** - Full implementation of the Model Context Protocol
+- **Web Search** - Pluggable engines (Serper, SearXNG, DuckDuckGo fallback) with offline and domain filters
+- **Web Scraping** - Extract content from any webpage with structured metadata
+- **File Search Tools** - Lightweight vector store (index + query) for MCP automations
+- **Code Interpreter** - SandboxFusion-backed python_exec tool
+- **Standalone Service** - Can run independently or with jan-server
+- **Clean Architecture** - Domain/Infrastructure/Interfaces layers
+
+## Architecture
+
+`
+services/mcp-tools/
++-- domain/            # Business logic (transport-agnostic)
+|   +-- search/        # Search service interfaces and types
++-- infrastructure/    # External systems integration
+|   +-- config/        # Configuration management
+|   +-- logger/        # Logging setup
+|   +-- search/        # Serper, SearXNG, fallback clients
++-- interfaces/        # Delivery mechanisms
+|   +-- httpserver/
+|       +-- middlewares/
+|       +-- routes/    # MCP route handlers
++-- utils/
+    +-- mcp/          # MCP helper functions
+``
+services/mcp-tools/
++-- domain/           # Business logic (transport-agnostic)
+|   +-- serper/       # Serper service interfaces and types
++-- infrastructure/   # External systems integration
+|   +-- config/       # Configuration management
+|   +-- logger/       # Logging setup
+|   +-- serper/       # Serper API client implementation
++-- interfaces/       # Delivery mechanisms
+|   +-- httpserver/   # HTTP/MCP server
+|       +-- middlewares/
+|       +-- routes/   # MCP route handlers
++-- utils/            # Utilities
+    +-- mcp/          # MCP helper functions
+
+```
+
+## Available Tools
+
+### 1. google_search
+Perform web searches via the configured engine (Serper, SearXNG, or DuckDuckGo fallback) and emit structured citations.
+
+**Arguments:**
+- `q` (required): Search query string
+- `gl` (optional): Region code (ISO 3166-1 alpha-2, e.g., 'us')
+- `hl` (optional): Language code (ISO 639-1, e.g., 'en')
+- `location` (optional): Location for results
+- `num` (optional): Number of results (default: 10)
+- `tbs` (optional): Time-based filter ('qdr:h', 'qdr:d', 'qdr:w', 'qdr:m', 'qdr:y')
+- `page` (optional): Page number (default: 1)
+- `autocorrect` (optional): Enable autocorrect (default: true)
+- `domain_allow_list` (optional): Array of domains to scope the query to (`["example.com","wikipedia.org"]`)
+- `location_hint` (optional): Soft hint when upstream engines support region-aware ranking
+- `offline_mode` (optional): Force cached/offline behaviour even if live engines are available
+
+**Output:**
+- JSON payload containing `results` blocks with `{ source_url, snippet, fetched_at, cache_status }`, plus a `citations` array and the raw upstream response for backward compatibility.
+
+### 2. scrape
+Scrape webpage content with metadata describing cache/fallback state.
+
+**Arguments:**
+- `url` (required): The URL to scrape
+- `includeMarkdown` (optional): Return markdown format (default: false)
+
+**Output:**
+- JSON payload containing raw text, a `text_preview`, `cache_status`, and metadata describing whether the fallback fetcher was used.
+
+### 3. file_search_index
+Index arbitrary text into the lightweight vector store so that automations can cite custom documents.
+
+**Arguments:**
+- `document_id` (required): Stable identifier for the document
+- `text` (required): Raw text body
+- `metadata` (optional): Object that will be echoed back with search results
+- `tags` (optional): Array of simple tags (e.g., `["support","guide"]`)
+
+### 4. file_search_query
+Query the vector store for the closest documents and receive citation-ready payloads.
+
+**Arguments:**
+- `query` (required): Natural language query
+- `top_k` (optional): Number of hits to return (default 5, max 20)
+- `document_ids` (optional): Restrict search to a subset of documents
+
+### 5. python_exec
+Execute trusted code inside SandboxFusion when a containerized interpreter is required.
+
+**Arguments:**
+- `code` (required): Script to execute
+- `language` (optional): Defaults to python
+- `session_id` (optional): Continue an existing SandboxFusion session
+- `approved` (optional): Must be `true` when `SANDBOX_FUSION_REQUIRE_APPROVAL` is enabled
+
+**Output:**
+- JSON payload containing `stdout`, `stderr`, `duration_ms`, `session_id`, and any downloadable artifacts surfaced by SandboxFusion.
+
+## Environment Variables
+
+```env
+HTTP_PORT=8091                    # HTTP server port
+LOG_LEVEL=info                    # Log level (debug, info, warn, error)
+LOG_FORMAT=json                   # Log format (json, console)
+SERPER_API_KEY=your_api_key_here  # Serper API key (required)
+SEARCH_ENGINE=serper              # serper or searxng
+SEARXNG_URL=http://localhost:8086 # SearXNG base URL when SEARCH_ENGINE=searxng
+SERPER_DOMAIN_FILTER=             # Optional CSV of domains to pin (e.g., example.com,wikipedia.org)
+SERPER_LOCATION_HINT=             # Optional default location hint (e.g., California, United States)
+SERPER_OFFLINE_MODE=false         # Force cached/offline search mode
+VECTOR_STORE_URL=http://localhost:3015 # Base URL for the internal vector store service
+SANDBOX_FUSION_URL=http://localhost:3010 # SandboxFusion container service
+SANDBOX_FUSION_REQUIRE_APPROVAL=false   # Gate python_exec until manually approved
+MCP_ENABLE_PYTHON_EXEC=true       # Set false to remove python_exec from tool list
+MCP_ENABLE_MEMORY_RETRIEVE=true   # Set false to remove memory_retrieve from tool list
+MEMORY_TOOLS_URL=http://localhost:8090  # Memory tools service URL for memory_retrieve
+```
+
+## Quick Start
+
+### Local Development
+
+```bash
+cd services/mcp-tools
+
+# Install dependencies
+go mod tidy
+
+# Run the service
+go run .
+```
+
+### Docker
+
+```bash
+# Build
+docker build -t mcp-tools:latest .
+
+# Run
+docker run -p 8091:8091 \
+  -e SERPER_API_KEY=your_api_key \
+  mcp-tools:latest
+```
+
+## Usage
+
+### Health Check
+
+```bash
+curl http://localhost:8091/healthz
+```
+
+### MCP Request
+
+```bash
+curl -X POST http://localhost:8091/v1/mcp \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "id": 1,
+    "method": "tools/call",
+    "params": {
+      "name": "google_search",
+      "arguments": {
+        "q": "Model Context Protocol",
+        "num": 5
+      }
+    }
+  }'
+```
+
+## Integration with LLM API
+
+The MCP Tools service can be integrated with the llm-api service to provide tool-calling capabilities to LLM conversations.
+
+### Add to docker-compose.yml
+
+All MCP infrastructure services are defined in `docker/services-mcp.yml` and automatically included in the main `docker-compose.yml`.
+
+```yaml
+mcp-tools:
+  build: ./services/mcp-tools
+  restart: unless-stopped
+  environment:
+    HTTP_PORT: 8091
+    SERPER_API_KEY: ${SERPER_API_KEY}
+    LOG_LEVEL: info
+    LOG_FORMAT: json
+  ports:
+    - "8091:8091"
+  healthcheck:
+    test: ["CMD", "wget", "--spider", "-q", "http://localhost:8091/healthz"]
+    interval: 10s
+    timeout: 5s
+    retries: 5
+```
+
+## MCP Protocol
+
+This service implements the [Model Context Protocol](https://modelcontextprotocol.io/), allowing AI models to:
+
+1. **Discover Tools** - List available capabilities
+2. **Call Tools** - Execute external functions
+3. **Stream Results** - Receive real-time responses
+
+Supported MCP methods:
+- `initialize` - Handshake
+- `tools/list` - List available tools
+- `tools/call` - Execute a tool
+- `ping` - Health check
+
+## Development
+
+### Project Structure Follows Platform Conventions
+
+- **Clean Architecture** - Domain -> Infrastructure -> Interfaces
+- **No HTTP in Domain** - Business logic is transport-agnostic
+- **Dependency Injection** - All dependencies injected
+- **Error Handling** - Structured error responses
+
+### Adding New Tools
+
+1. Define tool arguments in `interfaces/httpserver/routes/serper_mcp.go`
+2. Add domain method in `domain/search/service.go`
+3. Implement infrastructure in `infrastructure/search/client.go`
+4. Register tool in `RegisterTools()` method
+
+## Testing
+
+```bash
+# Run tests
+go test ./...
+
+# Run with coverage
+go test -cover ./...
+```
+
+## License
+
+Part of the jan-server project.
diff --git a/services/mcp-tools/cmd/server/server.go b/services/mcp-tools/cmd/server/server.go
new file mode 100644
index 00000000..694dfcdc
--- /dev/null
+++ b/services/mcp-tools/cmd/server/server.go
@@ -0,0 +1,68 @@
+package main
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/rs/zerolog/log"
+
+	"jan-server/services/mcp-tools/internal/infrastructure/config"
+	"jan-server/services/mcp-tools/internal/infrastructure/logger"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/routes/mcp"
+)
+
+type Application struct {
+	httpServer  *httpserver.HTTPServer
+	providerMCP *mcp.ProviderMCP
+}
+
+func init() {
+	// Initialize logger with default settings
+	logger.Init("info", "json")
+}
+
+// @title Jan Server MCP Tools Service
+// @version 1.0
+// @description Model Context Protocol (MCP) tools service providing search and scraping capabilities.
+// @contact.name Jan Server Team
+// @contact.url https://github.com/janhq/jan-server
+// @BasePath /
+func (app *Application) Start(ctx context.Context) error {
+	// Initialize MCP providers
+	if err := app.providerMCP.Initialize(ctx); err != nil {
+		log.Error().Err(err).Msg("Failed to initialize MCP providers")
+	}
+
+	// Start HTTP server
+	log.Info().Str("address", fmt.Sprintf(":%s", "3014")).Msg("Server listening")
+	return app.httpServer.Run()
+}
+
+func main() {
+	ctx := context.Background()
+
+	// Load configuration
+	cfg, err := config.LoadConfig()
+	if err != nil {
+		log.Fatal().Err(err).Msg("Failed to load config")
+	}
+
+	// Re-initialize logger with config settings
+	logger.Init(cfg.LogLevel, cfg.LogFormat)
+	log.Info().
+		Str("http_port", cfg.HTTPPort).
+		Str("log_level", cfg.LogLevel).
+		Msg("Starting MCP Tools service")
+
+	// Create application with dependency injection
+	application, err := CreateApplication(ctx)
+	if err != nil {
+		log.Fatal().Err(err).Msg("Failed to create application")
+	}
+
+	// Start application
+	if err := application.Start(ctx); err != nil {
+		log.Fatal().Err(err).Msg("Failed to start server")
+	}
+}
diff --git a/services/mcp-tools/cmd/server/wire.go b/services/mcp-tools/cmd/server/wire.go
new file mode 100644
index 00000000..7f5c25de
--- /dev/null
+++ b/services/mcp-tools/cmd/server/wire.go
@@ -0,0 +1,25 @@
+//go:build wireinject
+
+package main
+
+import (
+	"context"
+
+	"github.com/google/wire"
+
+	"jan-server/services/mcp-tools/internal/domain"
+	"jan-server/services/mcp-tools/internal/infrastructure"
+	"jan-server/services/mcp-tools/internal/interfaces"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/routes"
+)
+
+func CreateApplication(ctx context.Context) (*Application, error) {
+	wire.Build(
+		domain.DomainProvider,
+		infrastructure.InfrastructureProvider,
+		routes.RoutesProvider,
+		interfaces.InterfacesProvider,
+		wire.Struct(new(Application), "*"),
+	)
+	return nil, nil
+}
diff --git a/services/mcp-tools/cmd/server/wire_gen.go b/services/mcp-tools/cmd/server/wire_gen.go
new file mode 100644
index 00000000..768ccf90
--- /dev/null
+++ b/services/mcp-tools/cmd/server/wire_gen.go
@@ -0,0 +1,45 @@
+// Code generated by Wire. DO NOT EDIT.
+
+//go:generate go run -mod=mod github.com/google/wire/cmd/wire
+//go:build !wireinject
+// +build !wireinject
+
+package main
+
+import (
+	"context"
+	"jan-server/services/mcp-tools/internal/domain/search"
+	"jan-server/services/mcp-tools/internal/infrastructure"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/routes"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/routes/mcp"
+)
+
+// Injectors from wire.go:
+
+func CreateApplication(ctx context.Context) (*Application, error) {
+	config, err := infrastructure.ProvideConfig()
+	if err != nil {
+		return nil, err
+	}
+	searchClient := infrastructure.ProvideSearchClient(config)
+	searchService := search.NewSearchService(searchClient)
+	client := infrastructure.ProvideVectorStoreClient(config)
+	serperMCP := mcp.NewSerperMCP(searchService, client)
+	mcpproviderConfig := infrastructure.ProvideMCPProviderConfig()
+	providerMCP := mcp.NewProviderMCP(mcpproviderConfig)
+	sandboxfusionClient := infrastructure.ProvideSandboxFusionClient(config)
+	sandboxFusionMCP := routes.ProvideSandboxFusionMCP(sandboxfusionClient, config)
+	memoryMCP := routes.ProvideMemoryMCP(config)
+	mcpRoute := mcp.NewMCPRoute(serperMCP, providerMCP, sandboxFusionMCP, memoryMCP)
+	validator, err := infrastructure.ProvideAuthValidator(ctx, config)
+	if err != nil {
+		return nil, err
+	}
+	httpServer := httpserver.NewHTTPServer(config, mcpRoute, validator)
+	application := &Application{
+		httpServer:  httpServer,
+		providerMCP: providerMCP,
+	}
+	return application, nil
+}
diff --git a/services/mcp-tools/configs/mcp-providers.yml b/services/mcp-tools/configs/mcp-providers.yml
new file mode 100644
index 00000000..9b02b0b5
--- /dev/null
+++ b/services/mcp-tools/configs/mcp-providers.yml
@@ -0,0 +1,43 @@
+# MCP Provider Configuration
+# This file defines external MCP servers that mcp-tools will bridge to
+# Each provider exposes tools that will be registered in the unified MCP server
+
+providers:
+  # SearXNG - Meta Search Engine
+  # - name: searxng
+  #   description: Meta search engine for privacy-focused web searches
+  #   enabled: true
+  #   endpoint: http://searxng:8080
+  #   type: http
+  #   # SearXNG doesn't directly expose MCP protocol but we can create wrapper tools
+  #   # This is a placeholder - actual implementation would need SearXNG API integration
+  #   tools:
+  #     - name: searxng_search
+  #       description: Search the web using SearXNG meta search engine
+  #       enabled: false  # Disabled until we implement SearXNG API wrapper
+
+  # Vector Store Service - used by MCP file_search tools
+  - name: vector-store
+    description: Lightweight in-memory vector store backing MCP file_search tools
+    enabled: false
+    endpoint: ${VECTOR_STORE_URL}
+    type: http
+
+  # SandboxFusion MCP placeholder (direct integration handled via python_exec)
+  # - name: sandbox-fusion
+  #   description: SandboxFusion python interpreter service
+  #   enabled: false
+  #   endpoint: ${SANDBOX_FUSION_URL}
+  #   type: http
+
+# Global settings
+settings:
+  # Maximum timeout for any MCP provider call
+  max_timeout: 120s
+  
+  # Enable detailed logging of provider communications
+  debug_logging: ${MCP_PROVIDER_DEBUG:-false}
+  
+  # Retry configuration
+  retry_attempts: 2
+  retry_delay: 1s
diff --git a/services/mcp-tools/docs/swagger/docs.go b/services/mcp-tools/docs/swagger/docs.go
new file mode 100644
index 00000000..f23dc5fe
--- /dev/null
+++ b/services/mcp-tools/docs/swagger/docs.go
@@ -0,0 +1,102 @@
+// Code generated by swaggo/swag. DO NOT EDIT.
+
+package swagger
+
+import "github.com/swaggo/swag"
+
+const docTemplate = `{
+    "schemes": {{ marshal .Schemes }},
+    "swagger": "2.0",
+    "info": {
+        "description": "{{escape .Description}}",
+        "title": "{{.Title}}",
+        "contact": {
+            "name": "Jan Server Team",
+            "url": "https://github.com/janhq/jan-server"
+        },
+        "version": "{{.Version}}"
+    },
+    "host": "{{.Host}}",
+    "basePath": "{{.BasePath}}",
+    "paths": {
+        "/v1/mcp": {
+            "post": {
+                "description": "Handles Model Context Protocol (MCP) requests over HTTP. Supports MCP methods: initialize, ping, tools/list, tools/call, prompts/list, prompts/call, resources/list, resources/read.\n\n**Available Tools:**\n- ` + "`" + `google_search` + "`" + `: Web search via pluggable engines (Serper/SearXNG/duckduckgo) with params: q, gl, hl, location, num, tbs, page, autocorrect, domain_allow_list, location_hint, offline_mode. Returns structured citations.\n- ` + "`" + `scrape` + "`" + `: Web page scraping (params: url, includeMarkdown) returning text, preview, cache_status, and metadata.\n- ` + "`" + `file_search_index` + "`" + ` / ` + "`" + `file_search_query` + "`" + `: Index arbitrary text and run similarity queries against the lightweight vector store.\n- ` + "`" + `python_exec` + "`" + `: Execute trusted code through SandboxFusion (params: code, language, session_id, approved) to retrieve stdout/stderr/artifacts.\n\n**MCP Protocol:**\n- Request format: JSON-RPC 2.0 with method and params\n- Response format: Server-Sent Events (SSE) stream\n- Stateless mode (no session management)",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "text/event-stream"
+                ],
+                "tags": [
+                    "MCP API"
+                ],
+                "summary": "MCP endpoint for tool execution",
+                "parameters": [
+                    {
+                        "description": "MCP JSON-RPC request payload (e.g., {\\",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "type": "object"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Streamed MCP response in SSE format",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid MCP request payload or unsupported method",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "responses.ErrorResponse": {
+            "type": "object",
+            "properties": {
+                "code": {
+                    "description": "UUID from PlatformError",
+                    "type": "string"
+                },
+                "error": {
+                    "type": "string"
+                },
+                "request_id": {
+                    "type": "string"
+                }
+            }
+        }
+    }
+}`
+
+// SwaggerInfo holds exported Swagger Info so clients can modify it
+var SwaggerInfo = &swag.Spec{
+	Version:          "1.0",
+	Host:             "",
+	BasePath:         "/",
+	Schemes:          []string{},
+	Title:            "Jan Server MCP Tools Service",
+	Description:      "Model Context Protocol (MCP) tools service providing search and scraping capabilities.",
+	InfoInstanceName: "swagger",
+	SwaggerTemplate:  docTemplate,
+}
+
+func init() {
+	swag.Register(SwaggerInfo.InstanceName(), SwaggerInfo)
+}
diff --git a/services/mcp-tools/docs/swagger/swagger.json b/services/mcp-tools/docs/swagger/swagger.json
new file mode 100644
index 00000000..7129ed69
--- /dev/null
+++ b/services/mcp-tools/docs/swagger/swagger.json
@@ -0,0 +1,78 @@
+{
+    "swagger": "2.0",
+    "info": {
+        "description": "Model Context Protocol (MCP) tools service providing search and scraping capabilities.",
+        "title": "Jan Server MCP Tools Service",
+        "contact": {
+            "name": "Jan Server Team",
+            "url": "https://github.com/janhq/jan-server"
+        },
+        "version": "1.0"
+    },
+    "basePath": "/",
+    "paths": {
+        "/v1/mcp": {
+            "post": {
+                "description": "Handles Model Context Protocol (MCP) requests over HTTP. Supports MCP methods: initialize, ping, tools/list, tools/call, prompts/list, prompts/call, resources/list, resources/read.\n\n**Available Tools:**\n- `google_search`: Web search via pluggable engines (Serper/SearXNG/duckduckgo) with params: q, gl, hl, location, num, tbs, page, autocorrect, domain_allow_list, location_hint, offline_mode. Returns structured citations.\n- `scrape`: Web page scraping (params: url, includeMarkdown) returning text, preview, cache_status, and metadata.\n- `file_search_index` / `file_search_query`: Index arbitrary text and run similarity queries against the lightweight vector store.\n- `python_exec`: Execute trusted code through SandboxFusion (params: code, language, session_id, approved) to retrieve stdout/stderr/artifacts.\n\n**MCP Protocol:**\n- Request format: JSON-RPC 2.0 with method and params\n- Response format: Server-Sent Events (SSE) stream\n- Stateless mode (no session management)",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "text/event-stream"
+                ],
+                "tags": [
+                    "MCP API"
+                ],
+                "summary": "MCP endpoint for tool execution",
+                "parameters": [
+                    {
+                        "description": "MCP JSON-RPC request payload (e.g., {\\",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "type": "object"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Streamed MCP response in SSE format",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid MCP request payload or unsupported method",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal server error",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ErrorResponse"
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "responses.ErrorResponse": {
+            "type": "object",
+            "properties": {
+                "code": {
+                    "description": "UUID from PlatformError",
+                    "type": "string"
+                },
+                "error": {
+                    "type": "string"
+                },
+                "request_id": {
+                    "type": "string"
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/services/mcp-tools/docs/swagger/swagger.yaml b/services/mcp-tools/docs/swagger/swagger.yaml
new file mode 100644
index 00000000..b1c0c975
--- /dev/null
+++ b/services/mcp-tools/docs/swagger/swagger.yaml
@@ -0,0 +1,64 @@
+basePath: /
+definitions:
+  responses.ErrorResponse:
+    properties:
+      code:
+        description: UUID from PlatformError
+        type: string
+      error:
+        type: string
+      request_id:
+        type: string
+    type: object
+info:
+  contact:
+    name: Jan Server Team
+    url: https://github.com/janhq/jan-server
+  description: Model Context Protocol (MCP) tools service providing search and scraping
+    capabilities.
+  title: Jan Server MCP Tools Service
+  version: "1.0"
+paths:
+  /v1/mcp:
+    post:
+      consumes:
+      - application/json
+      description: |-
+        Handles Model Context Protocol (MCP) requests over HTTP. Supports MCP methods: initialize, ping, tools/list, tools/call, prompts/list, prompts/call, resources/list, resources/read.
+
+        **Available Tools:**
+        - `google_search`: Web search via pluggable engines (Serper/SearXNG/duckduckgo) with params: q, gl, hl, location, num, tbs, page, autocorrect, domain_allow_list, location_hint, offline_mode. Returns structured citations.
+        - `scrape`: Web page scraping (params: url, includeMarkdown) returning text, preview, cache_status, and metadata.
+        - `file_search_index` / `file_search_query`: Index arbitrary text and run similarity queries against the lightweight vector store.
+        - `python_exec`: Execute trusted code through SandboxFusion (params: code, language, session_id, approved) to retrieve stdout/stderr/artifacts.
+
+        **MCP Protocol:**
+        - Request format: JSON-RPC 2.0 with method and params
+        - Response format: Server-Sent Events (SSE) stream
+        - Stateless mode (no session management)
+      parameters:
+      - description: MCP JSON-RPC request payload (e.g., {\
+        in: body
+        name: request
+        required: true
+        schema:
+          type: object
+      produces:
+      - text/event-stream
+      responses:
+        "200":
+          description: Streamed MCP response in SSE format
+          schema:
+            type: string
+        "400":
+          description: Invalid MCP request payload or unsupported method
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+        "500":
+          description: Internal server error
+          schema:
+            $ref: '#/definitions/responses.ErrorResponse'
+      summary: MCP endpoint for tool execution
+      tags:
+      - MCP API
+swagger: "2.0"
diff --git a/services/mcp-tools/go.mod b/services/mcp-tools/go.mod
new file mode 100644
index 00000000..4357cdb6
--- /dev/null
+++ b/services/mcp-tools/go.mod
@@ -0,0 +1,72 @@
+module jan-server/services/mcp-tools
+
+go 1.25.0
+
+require (
+	github.com/MicahParks/keyfunc/v2 v2.1.0
+	github.com/caarlos0/env/v11 v11.0.0
+	github.com/gin-gonic/gin v1.10.0
+	github.com/go-resty/resty/v2 v2.11.0
+	github.com/golang-jwt/jwt/v5 v5.3.0
+	github.com/mark3labs/mcp-go v0.43.0
+	github.com/rs/zerolog v1.33.0
+	github.com/swaggo/swag v1.16.6
+	golang.org/x/net v0.47.0
+	gopkg.in/yaml.v3 v3.0.1
+)
+
+require (
+	github.com/KyleBanks/depth v1.2.1 // indirect
+	github.com/PuerkitoBio/purell v1.1.1 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
+	github.com/bahlo/generic-list-go v0.2.0 // indirect
+	github.com/buger/jsonparser v1.1.1 // indirect
+	github.com/bytedance/sonic v1.11.6 // indirect
+	github.com/bytedance/sonic/loader v0.1.1 // indirect
+	github.com/cloudwego/base64x v0.1.4 // indirect
+	github.com/cloudwego/iasm v0.2.0 // indirect
+	github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d // indirect
+	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
+	github.com/gin-contrib/sse v0.1.0 // indirect
+	github.com/go-openapi/jsonpointer v0.19.5 // indirect
+	github.com/go-openapi/jsonreference v0.19.6 // indirect
+	github.com/go-openapi/spec v0.20.4 // indirect
+	github.com/go-openapi/swag v0.19.15 // indirect
+	github.com/go-playground/locales v0.14.1 // indirect
+	github.com/go-playground/universal-translator v0.18.1 // indirect
+	github.com/go-playground/validator/v10 v10.20.0 // indirect
+	github.com/goccy/go-json v0.10.2 // indirect
+	github.com/google/subcommands v1.2.0 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/google/wire v0.7.0 // indirect
+	github.com/invopop/jsonschema v0.13.0 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
+	github.com/leodido/go-urn v1.4.0 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
+	github.com/pmezard/go-difflib v1.0.0 // indirect
+	github.com/russross/blackfriday/v2 v2.0.1 // indirect
+	github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect
+	github.com/spf13/cast v1.7.1 // indirect
+	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
+	github.com/ugorji/go/codec v1.2.12 // indirect
+	github.com/urfave/cli/v2 v2.3.0 // indirect
+	github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
+	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
+	golang.org/x/arch v0.8.0 // indirect
+	golang.org/x/crypto v0.45.0 // indirect
+	golang.org/x/mod v0.29.0 // indirect
+	golang.org/x/sync v0.18.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+	golang.org/x/tools v0.38.0 // indirect
+	google.golang.org/protobuf v1.34.1 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	sigs.k8s.io/yaml v1.3.0 // indirect
+)
diff --git a/services/mcp-tools/go.sum b/services/mcp-tools/go.sum
new file mode 100644
index 00000000..7433bc71
--- /dev/null
+++ b/services/mcp-tools/go.sum
@@ -0,0 +1,234 @@
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
+github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
+github.com/MicahParks/keyfunc/v2 v2.1.0 h1:6ZXKb9Rp6qp1bDbJefnG7cTH8yMN1IC/4nf+GVjO99k=
+github.com/MicahParks/keyfunc/v2 v2.1.0/go.mod h1:rW42fi+xgLJ2FRRXAfNx9ZA8WpD4OeE/yHVMteCkw9k=
+github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
+github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
+github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
+github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
+github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
+github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
+github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
+github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
+github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
+github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
+github.com/caarlos0/env/v11 v11.0.0 h1:ZIlkOjuL3xoZS0kmUJlF74j2Qj8GMOq3CDLX/Viak8Q=
+github.com/caarlos0/env/v11 v11.0.0/go.mod h1:2RC3HQu8BQqtEK3V4iHPxj0jOdWdbPpWJ6pOueeU1xM=
+github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
+github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
+github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
+github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
+github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
+github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
+github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
+github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
+github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
+github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
+github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
+github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
+github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
+github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
+github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
+github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
+github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
+github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
+github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
+github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
+github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
+github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
+github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
+github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
+github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
+github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
+github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
+github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
+github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
+github.com/go-resty/resty/v2 v2.11.0 h1:i7jMfNOJYMp69lq7qozJP+bjgzfAzeOhuGlyDrqxT/8=
+github.com/go-resty/resty/v2 v2.11.0/go.mod h1:iiP/OpA0CkcL3IGt1O0+/SIItFUbkkyw5BGXiVdTu+A=
+github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
+github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
+github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/subcommands v1.2.0 h1:vWQspBTo2nEqTUFita5/KeEWlUL8kQObDFbub/EN9oE=
+github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4=
+github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18=
+github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
+github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
+github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
+github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
+github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mark3labs/mcp-go v0.43.0 h1:lgiKcWMddh4sngbU+hoWOZ9iAe/qp/m851RQpj3Y7jA=
+github.com/mark3labs/mcp-go v0.43.0/go.mod h1:YnJfOL382MIWDx1kMY+2zsRHU/q78dBg9aFb8W6Thdw=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
+github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
+github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
+github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
+github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
+github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
+github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
+github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
+github.com/spf13/cast v1.7.1 h1:cuNEagBQEHWN1FnbGEjCXL2szYEXqfJPbP2HNUaca9Y=
+github.com/spf13/cast v1.7.1/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI=
+github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg=
+github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
+github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
+github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
+github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/urfave/cli/v2 v2.3.0 h1:qph92Y649prgesehzOrQjdWyxFOp/QVM+6imKHad91M=
+github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
+github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
+github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
+github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
+github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
+golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
+golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
+golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
+google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
+sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
+sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
diff --git a/services/mcp-tools/internal/domain/provider.go b/services/mcp-tools/internal/domain/provider.go
new file mode 100644
index 00000000..ce487885
--- /dev/null
+++ b/services/mcp-tools/internal/domain/provider.go
@@ -0,0 +1,12 @@
+package domain
+
+import (
+	"github.com/google/wire"
+
+	domainsearch "jan-server/services/mcp-tools/internal/domain/search"
+)
+
+// DomainProvider provides all domain services
+var DomainProvider = wire.NewSet(
+	domainsearch.NewSearchService,
+)
diff --git a/services/mcp-tools/internal/domain/search/service.go b/services/mcp-tools/internal/domain/search/service.go
new file mode 100644
index 00000000..1cb4f6b5
--- /dev/null
+++ b/services/mcp-tools/internal/domain/search/service.go
@@ -0,0 +1,31 @@
+package search
+
+import "context"
+
+// SearchClient defines the search operations required by the domain layer
+type SearchClient interface {
+	Search(ctx context.Context, query SearchRequest) (*SearchResponse, error)
+	FetchWebpage(ctx context.Context, query FetchWebpageRequest) (*FetchWebpageResponse, error)
+}
+
+// SearchService orchestrates MCP operations across pluggable search engines while remaining transport-agnostic.
+type SearchService struct {
+	client SearchClient
+}
+
+// NewSearchService creates a new search service.
+func NewSearchService(client SearchClient) *SearchService {
+	return &SearchService{
+		client: client,
+	}
+}
+
+// Search performs a web search using Serper API
+func (s *SearchService) Search(ctx context.Context, query SearchRequest) (*SearchResponse, error) {
+	return s.client.Search(ctx, query)
+}
+
+// FetchWebpage scrapes a webpage using Serper API
+func (s *SearchService) FetchWebpage(ctx context.Context, query FetchWebpageRequest) (*FetchWebpageResponse, error) {
+	return s.client.FetchWebpage(ctx, query)
+}
diff --git a/services/mcp-tools/internal/domain/search/types.go b/services/mcp-tools/internal/domain/search/types.go
new file mode 100644
index 00000000..029fdf5c
--- /dev/null
+++ b/services/mcp-tools/internal/domain/search/types.go
@@ -0,0 +1,50 @@
+package search
+
+// TBSTimeRange defines time-based search filters for Serper API
+type TBSTimeRange string
+
+const (
+	TBSAny       TBSTimeRange = ""
+	TBSPastHour  TBSTimeRange = "qdr:h"
+	TBSPastDay   TBSTimeRange = "qdr:d"
+	TBSPastWeek  TBSTimeRange = "qdr:w"
+	TBSPastMonth TBSTimeRange = "qdr:m"
+	TBSPastYear  TBSTimeRange = "qdr:y"
+)
+
+// SearchRequest represents a search query to Serper API
+type SearchRequest struct {
+	Q               string        `json:"q"`
+	GL              *string       `json:"gl,omitempty"`                // Region code (ISO 3166-1 alpha-2)
+	HL              *string       `json:"hl,omitempty"`                // Language code (ISO 639-1)
+	Location        *string       `json:"location,omitempty"`          // Location for search results
+	LocationHint    *string       `json:"location_hint,omitempty"`     // Soft location preference (country/region/timezone)
+	Num             *int          `json:"num,omitempty"`               // Number of results (default: 10)
+	Page            *int          `json:"page,omitempty"`              // Page number (default: 1)
+	Autocorrect     *bool         `json:"autocorrect,omitempty"`       // Enable autocorrect
+	TBS             *TBSTimeRange `json:"tbs,omitempty"`               // Time-based search filter
+	DomainAllowList []string      `json:"domain_allow_list,omitempty"` // Restrict results to these domains
+	OfflineMode     *bool         `json:"offline_mode,omitempty"`      // Force cached/offline behaviour
+}
+
+// SearchResponse contains search results from Serper API
+type SearchResponse struct {
+	SearchParameters map[string]any   `json:"searchParameters"`
+	Organic          []map[string]any `json:"organic"`
+	KnowledgeGraph   map[string]any   `json:"knowledgeGraph,omitempty"`
+	Images           []map[string]any `json:"images,omitempty"`
+	News             []map[string]any `json:"news,omitempty"`
+	AnswerBox        map[string]any   `json:"answerBox,omitempty"`
+}
+
+// FetchWebpageRequest represents a webpage scraping request
+type FetchWebpageRequest struct {
+	Url             string `json:"url"`
+	IncludeMarkdown *bool  `json:"includeMarkdown,omitempty"`
+}
+
+// FetchWebpageResponse contains scraped webpage content
+type FetchWebpageResponse struct {
+	Text     string         `json:"text"`
+	Metadata map[string]any `json:"metadata"`
+}
diff --git a/services/mcp-tools/internal/infrastructure/auth/validator.go b/services/mcp-tools/internal/infrastructure/auth/validator.go
new file mode 100644
index 00000000..0faffdf5
--- /dev/null
+++ b/services/mcp-tools/internal/infrastructure/auth/validator.go
@@ -0,0 +1,104 @@
+package auth
+
+import (
+	"context"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/MicahParks/keyfunc/v2"
+	"github.com/gin-gonic/gin"
+	"github.com/golang-jwt/jwt/v5"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/mcp-tools/internal/infrastructure/config"
+)
+
+type Validator struct {
+	cfg  *config.Config
+	log  zerolog.Logger
+	jwks *keyfunc.JWKS
+}
+
+func NewValidator(ctx context.Context, cfg *config.Config, log zerolog.Logger) (*Validator, error) {
+	if !cfg.AuthEnabled {
+		return &Validator{cfg: cfg, log: log}, nil
+	}
+	options := keyfunc.Options{
+		Ctx:               ctx,
+		RefreshInterval:   time.Hour,
+		RefreshUnknownKID: true,
+		RefreshErrorHandler: func(err error) {
+			log.Error().Err(err).Msg("jwks refresh error")
+		},
+	}
+	jwks, err := keyfunc.Get(cfg.AuthJWKSURL, options)
+	if err != nil {
+		return nil, err
+	}
+	return &Validator{
+		cfg:  cfg,
+		log:  log,
+		jwks: jwks,
+	}, nil
+}
+
+func (v *Validator) Middleware() gin.HandlerFunc {
+	if v == nil || !v.cfg.AuthEnabled {
+		return func(c *gin.Context) {
+			c.Next()
+		}
+	}
+
+	return func(c *gin.Context) {
+		// Skip auth for health check endpoints
+		path := c.Request.URL.Path
+		if path == "/healthz" || path == "/readyz" || path == "/health/auth" {
+			c.Next()
+			return
+		}
+
+		tokenString := bearerToken(c.GetHeader("Authorization"))
+		if tokenString == "" {
+			abortUnauthorized(c, "missing bearer token")
+			return
+		}
+
+		token, err := jwt.Parse(tokenString, v.jwks.Keyfunc,
+			jwt.WithAudience(v.cfg.Account),
+			jwt.WithIssuer(v.cfg.AuthIssuer),
+			jwt.WithValidMethods([]string{"RS256", "RS384", "RS512"}),
+		)
+		if err != nil || !token.Valid {
+			abortUnauthorized(c, "invalid token")
+			return
+		}
+
+		c.Set("auth_token", token)
+		c.Next()
+	}
+}
+
+func (v *Validator) Ready() bool {
+	if v == nil || !v.cfg.AuthEnabled {
+		return true
+	}
+	return v.jwks != nil
+}
+
+func bearerToken(header string) string {
+	if header == "" {
+		return ""
+	}
+	parts := strings.SplitN(header, " ", 2)
+	if len(parts) != 2 || !strings.EqualFold(parts[0], "Bearer") {
+		return ""
+	}
+	return strings.TrimSpace(parts[1])
+}
+
+func abortUnauthorized(c *gin.Context, message string) {
+	c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{
+		"error": message,
+	})
+}
diff --git a/services/mcp-tools/internal/infrastructure/config/config.go b/services/mcp-tools/internal/infrastructure/config/config.go
new file mode 100644
index 00000000..ae298e28
--- /dev/null
+++ b/services/mcp-tools/internal/infrastructure/config/config.go
@@ -0,0 +1,60 @@
+package config
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/caarlos0/env/v11"
+)
+
+// Config holds all configuration for the MCP Tools service
+type Config struct {
+	// HTTP Server - using MCP_TOOLS_ prefix to avoid collisions
+	HTTPPort  string `env:"MCP_TOOLS_HTTP_PORT" envDefault:"8091"`
+	LogLevel  string `env:"MCP_TOOLS_LOG_LEVEL" envDefault:"info"`
+	LogFormat string `env:"MCP_TOOLS_LOG_FORMAT" envDefault:"json"` // json or console
+
+	// Search Configuration
+	SerperAPIKey       string   `env:"SERPER_API_KEY"`
+	SearchEngine       string   `env:"MCP_SEARCH_ENGINE" envDefault:"serper"`
+	SearxngURL         string   `env:"SEARXNG_URL" envDefault:"http://searxng:8080"`
+	SerperDomainFilter []string `env:"SERPER_DOMAIN_FILTER" envSeparator:","`
+	SerperLocationHint string   `env:"SERPER_LOCATION_HINT"`
+	SerperOfflineMode  bool     `env:"SERPER_OFFLINE_MODE" envDefault:"false"`
+
+	// External Services
+	VectorStoreURL   string `env:"VECTOR_STORE_URL" envDefault:"http://vector-store-mcp:3015"`
+	SandboxFusionURL string `env:"SANDBOXFUSION_URL" envDefault:"http://sandbox-fusion:8080"`
+	MemoryToolsURL   string `env:"MEMORY_TOOLS_URL" envDefault:"http://memory-tools:8090"`
+
+	// Sandbox Configuration
+	SandboxFusionRequireApproval bool `env:"MCP_SANDBOX_REQUIRE_APPROVAL" envDefault:"false"`
+	EnablePythonExec             bool `env:"MCP_ENABLE_PYTHON_EXEC" envDefault:"true"`
+	EnableMemoryRetrieve         bool `env:"MCP_ENABLE_MEMORY_RETRIEVE" envDefault:"true"`
+
+	// Authentication
+	AuthEnabled bool   `env:"AUTH_ENABLED" envDefault:"false"`
+	AuthIssuer  string `env:"AUTH_ISSUER"`
+	Account     string `env:"ACCOUNT"`
+	AuthJWKSURL string `env:"AUTH_JWKS_URL"`
+}
+
+// LoadConfig loads configuration from environment variables
+func LoadConfig() (*Config, error) {
+	cfg := &Config{}
+	if err := env.Parse(cfg); err != nil {
+		return nil, err
+	}
+	if cfg.AuthEnabled {
+		if strings.TrimSpace(cfg.AuthIssuer) == "" {
+			return nil, fmt.Errorf("AUTH_ISSUER is required when AUTH_ENABLED is true")
+		}
+		if strings.TrimSpace(cfg.Account) == "" {
+			return nil, fmt.Errorf("ACCOUNT is required when AUTH_ENABLED is true")
+		}
+		if strings.TrimSpace(cfg.AuthJWKSURL) == "" {
+			return nil, fmt.Errorf("AUTH_JWKS_URL is required when AUTH_ENABLED is true")
+		}
+	}
+	return cfg, nil
+}
diff --git a/services/mcp-tools/internal/infrastructure/infrastructure_provider.go b/services/mcp-tools/internal/infrastructure/infrastructure_provider.go
new file mode 100644
index 00000000..51d03399
--- /dev/null
+++ b/services/mcp-tools/internal/infrastructure/infrastructure_provider.go
@@ -0,0 +1,87 @@
+package infrastructure
+
+import (
+	"context"
+
+	"github.com/google/wire"
+	"github.com/rs/zerolog/log"
+
+	"jan-server/services/mcp-tools/internal/domain/search"
+	"jan-server/services/mcp-tools/internal/infrastructure/auth"
+	"jan-server/services/mcp-tools/internal/infrastructure/config"
+	"jan-server/services/mcp-tools/internal/infrastructure/mcpprovider"
+	sandboxfusionclient "jan-server/services/mcp-tools/internal/infrastructure/sandboxfusion"
+	searchclient "jan-server/services/mcp-tools/internal/infrastructure/search"
+	vectorstoreclient "jan-server/services/mcp-tools/internal/infrastructure/vectorstore"
+)
+
+// InfrastructureProvider provides all infrastructure dependencies
+var InfrastructureProvider = wire.NewSet(
+	// Config
+	ProvideConfig,
+
+	// Search client
+	ProvideSearchClient,
+
+	// Vector store client
+	ProvideVectorStoreClient,
+
+	// Sandbox Fusion client
+	ProvideSandboxFusionClient,
+
+	// MCP Provider config
+	ProvideMCPProviderConfig,
+
+	// Auth validator
+	ProvideAuthValidator,
+)
+
+// ProvideConfig loads and provides the application configuration
+func ProvideConfig() (*config.Config, error) {
+	return config.LoadConfig()
+}
+
+// ProvideSearchClient provides the search client
+func ProvideSearchClient(cfg *config.Config) search.SearchClient {
+	return searchclient.NewSearchClient(searchclient.ClientConfig{
+		Engine:        searchclient.Engine(cfg.SearchEngine),
+		SerperAPIKey:  cfg.SerperAPIKey,
+		SearxngURL:    cfg.SearxngURL,
+		DomainFilters: cfg.SerperDomainFilter,
+		LocationHint:  cfg.SerperLocationHint,
+		OfflineMode:   cfg.SerperOfflineMode,
+	})
+}
+
+// ProvideVectorStoreClient provides the vector store client
+func ProvideVectorStoreClient(cfg *config.Config) *vectorstoreclient.Client {
+	if cfg.VectorStoreURL == "" {
+		return nil
+	}
+	return vectorstoreclient.NewClient(cfg.VectorStoreURL)
+}
+
+// ProvideSandboxFusionClient provides the sandbox fusion client
+func ProvideSandboxFusionClient(cfg *config.Config) *sandboxfusionclient.Client {
+	if cfg.SandboxFusionURL == "" {
+		return nil
+	}
+	return sandboxfusionclient.NewClient(cfg.SandboxFusionURL)
+}
+
+// ProvideMCPProviderConfig loads the MCP provider configuration
+func ProvideMCPProviderConfig() *mcpprovider.Config {
+	providerConfig, err := mcpprovider.LoadConfig("configs/mcp-providers.yml")
+	if err != nil {
+		// Return empty config if file not found
+		return &mcpprovider.Config{}
+	}
+	return providerConfig
+}
+
+// ProvideAuthValidator provides the auth validator
+func ProvideAuthValidator(ctx context.Context, cfg *config.Config) (*auth.Validator, error) {
+	// Get global logger from zerolog
+	logger := log.Logger
+	return auth.NewValidator(ctx, cfg, logger)
+}
diff --git a/services/mcp-tools/internal/infrastructure/logger/logger.go b/services/mcp-tools/internal/infrastructure/logger/logger.go
new file mode 100644
index 00000000..e3bd30ee
--- /dev/null
+++ b/services/mcp-tools/internal/infrastructure/logger/logger.go
@@ -0,0 +1,42 @@
+package logger
+
+import (
+	"os"
+	"strings"
+	"time"
+
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+// Init initializes the global logger
+func Init(level string, format string) {
+	// Set log level
+	switch strings.ToLower(level) {
+	case "debug":
+		zerolog.SetGlobalLevel(zerolog.DebugLevel)
+	case "info":
+		zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	case "warn":
+		zerolog.SetGlobalLevel(zerolog.WarnLevel)
+	case "error":
+		zerolog.SetGlobalLevel(zerolog.ErrorLevel)
+	default:
+		zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	}
+
+	// Set output format
+	if strings.ToLower(format) == "console" {
+		log.Logger = log.Output(zerolog.ConsoleWriter{
+			Out:        os.Stdout,
+			TimeFormat: time.RFC3339,
+		})
+	} else {
+		log.Logger = zerolog.New(os.Stdout).With().Timestamp().Logger()
+	}
+}
+
+// Get returns the global logger
+func Get() *zerolog.Logger {
+	return &log.Logger
+}
diff --git a/services/mcp-tools/internal/infrastructure/mcpprovider/bridge.go b/services/mcp-tools/internal/infrastructure/mcpprovider/bridge.go
new file mode 100644
index 00000000..d92dcebc
--- /dev/null
+++ b/services/mcp-tools/internal/infrastructure/mcpprovider/bridge.go
@@ -0,0 +1,284 @@
+package mcpprovider
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/rs/zerolog/log"
+)
+
+// MCPRequest represents a generic MCP JSON-RPC request
+type MCPRequest struct {
+	JSONRPC string          `json:"jsonrpc"`
+	Method  string          `json:"method"`
+	Params  json.RawMessage `json:"params,omitempty"`
+	ID      interface{}     `json:"id"`
+}
+
+// MCPResponse represents a generic MCP JSON-RPC response
+type MCPResponse struct {
+	JSONRPC string          `json:"jsonrpc"`
+	Result  json.RawMessage `json:"result,omitempty"`
+	Error   *MCPError       `json:"error,omitempty"`
+	ID      interface{}     `json:"id"`
+}
+
+// MCPError represents an MCP error
+type MCPError struct {
+	Code    int         `json:"code"`
+	Message string      `json:"message"`
+	Data    interface{} `json:"data,omitempty"`
+}
+
+// Bridge handles communication with external MCP providers
+type Bridge struct {
+	provider   Provider
+	httpClient *http.Client
+	sessionID  string // MCP session ID for stateful connections
+}
+
+// NewBridge creates a new MCP provider bridge
+func NewBridge(provider Provider) *Bridge {
+	timeout := provider.TimeoutDuration()
+
+	return &Bridge{
+		provider: provider,
+		httpClient: &http.Client{
+			Timeout: timeout,
+		},
+	}
+}
+
+// ListTools retrieves the list of tools from an MCP provider
+func (b *Bridge) ListTools(ctx context.Context) (json.RawMessage, error) {
+	req := MCPRequest{
+		JSONRPC: "2.0",
+		Method:  "tools/list",
+		ID:      1,
+	}
+
+	resp, err := b.sendRequest(ctx, req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to list tools from %s: %w", b.provider.Name, err)
+	}
+
+	if resp.Error != nil {
+		return nil, fmt.Errorf("MCP error from %s: %s", b.provider.Name, resp.Error.Message)
+	}
+
+	return resp.Result, nil
+}
+
+// CallTool forwards a tool call to an MCP provider
+func (b *Bridge) CallTool(ctx context.Context, toolName string, arguments map[string]interface{}) (json.RawMessage, error) {
+	call := func() (json.RawMessage, error) {
+		params := map[string]interface{}{
+			"name":      toolName,
+			"arguments": arguments,
+		}
+
+		paramsJSON, err := json.Marshal(params)
+		if err != nil {
+			return nil, fmt.Errorf("failed to marshal tool call params: %w", err)
+		}
+
+		req := MCPRequest{
+			JSONRPC: "2.0",
+			Method:  "tools/call",
+			Params:  paramsJSON,
+			ID:      time.Now().UnixNano(), // Use timestamp as unique ID
+		}
+
+		resp, err := b.sendRequest(ctx, req)
+		if err != nil {
+			return nil, err
+		}
+
+		if resp.Error != nil {
+			return nil, fmt.Errorf("MCP error calling %s on %s: %s", toolName, b.provider.Name, resp.Error.Message)
+		}
+
+		return resp.Result, nil
+	}
+
+	result, err := call()
+	if err == nil {
+		return result, nil
+	}
+
+	if b.shouldReinitialize(err) {
+		log.Warn().
+			Err(err).
+			Str("provider", b.provider.Name).
+			Msg("Provider session invalid, reinitializing MCP bridge")
+		if initErr := b.Initialize(ctx); initErr == nil {
+			return call()
+		}
+	}
+
+	return nil, fmt.Errorf("failed to call tool %s on %s: %w", toolName, b.provider.Name, err)
+}
+
+func (b *Bridge) shouldReinitialize(err error) bool {
+	if err == nil {
+		return false
+	}
+	msg := err.Error()
+	if strings.Contains(strings.ToLower(msg), "session not found") {
+		return true
+	}
+	if strings.Contains(msg, "HTTP 404") {
+		return true
+	}
+	return false
+}
+
+// CallTool forwards a tool call to an MCP provider
+
+// Initialize sends an initialize request to the MCP provider
+func (b *Bridge) Initialize(ctx context.Context) error {
+	req := MCPRequest{
+		JSONRPC: "2.0",
+		Method:  "initialize",
+		Params:  json.RawMessage(`{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"mcp-tools","version":"1.0.0"}}`),
+		ID:      0,
+	}
+
+	resp, sessionID, err := b.sendRequestWithSession(ctx, req)
+	if err != nil {
+		return fmt.Errorf("failed to initialize %s: %w", b.provider.Name, err)
+	}
+
+	if resp.Error != nil {
+		return fmt.Errorf("MCP initialization error from %s: %s", b.provider.Name, resp.Error.Message)
+	}
+
+	// Store session ID for subsequent requests
+	if sessionID != "" {
+		b.sessionID = sessionID
+		log.Debug().
+			Str("provider", b.provider.Name).
+			Str("sessionID", sessionID).
+			Msg("Stored MCP session ID")
+	}
+
+	log.Info().
+		Str("provider", b.provider.Name).
+		Str("endpoint", b.provider.Endpoint).
+		Msg("MCP provider initialized successfully")
+
+	return nil
+}
+
+// sendRequest sends an MCP JSON-RPC request to the provider
+// Returns the response and session ID (if present in response headers)
+func (b *Bridge) sendRequest(ctx context.Context, mcpReq MCPRequest) (*MCPResponse, error) {
+	resp, _, err := b.sendRequestWithSession(ctx, mcpReq)
+	return resp, err
+}
+
+// sendRequestWithSession sends an MCP JSON-RPC request and returns session ID
+func (b *Bridge) sendRequestWithSession(ctx context.Context, mcpReq MCPRequest) (*MCPResponse, string, error) {
+	bodyBytes, err := json.Marshal(mcpReq)
+	if err != nil {
+		return nil, "", fmt.Errorf("failed to marshal MCP request: %w", err)
+	}
+
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", b.provider.Endpoint, bytes.NewReader(bodyBytes))
+	if err != nil {
+		return nil, "", fmt.Errorf("failed to create HTTP request: %w", err)
+	}
+
+	httpReq.Header.Set("Content-Type", "application/json")
+	// Support both JSON and SSE (Server-Sent Events) for MCP protocol
+	httpReq.Header.Set("Accept", "application/json, text/event-stream")
+	// Set Host header to localhost while preserving provider port for services with host restrictions
+	hostHeader := "localhost:3000"
+	if parsed, err := url.Parse(b.provider.Endpoint); err == nil {
+		if port := parsed.Port(); port != "" {
+			hostHeader = fmt.Sprintf("localhost:%s", port)
+		}
+	}
+	httpReq.Host = hostHeader
+
+	// Include session ID if we have one (for stateful MCP servers)
+	if b.sessionID != "" {
+		httpReq.Header.Set("mcp-session-id", b.sessionID)
+	}
+
+	log.Debug().
+		Str("provider", b.provider.Name).
+		Str("method", mcpReq.Method).
+		Str("endpoint", b.provider.Endpoint).
+		Str("sessionID", b.sessionID).
+		Msg("Sending MCP request to provider")
+
+	httpResp, err := b.httpClient.Do(httpReq)
+	if err != nil {
+		return nil, "", fmt.Errorf("HTTP request failed: %w", err)
+	}
+	defer httpResp.Body.Close()
+
+	// Extract session ID from response headers (for new sessions)
+	sessionID := httpResp.Header.Get("mcp-session-id")
+
+	if httpResp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(httpResp.Body)
+		return nil, sessionID, fmt.Errorf("HTTP %d: %s", httpResp.StatusCode, string(body))
+	}
+
+	respBodyBytes, err := io.ReadAll(httpResp.Body)
+	if err != nil {
+		return nil, sessionID, fmt.Errorf("failed to read response body: %w", err)
+	}
+
+	// Handle SSE (Server-Sent Events) format if present
+	var jsonData []byte
+	respStr := string(respBodyBytes)
+
+	// Check if response is SSE format (starts with "event:" or "data:")
+	if bytes.HasPrefix(respBodyBytes, []byte("event:")) || bytes.HasPrefix(respBodyBytes, []byte("data:")) {
+		// Parse SSE format to extract JSON from "data:" field
+		lines := bytes.Split(respBodyBytes, []byte("\n"))
+		for _, line := range lines {
+			line = bytes.TrimSpace(line)
+			if bytes.HasPrefix(line, []byte("data: ")) {
+				jsonData = bytes.TrimPrefix(line, []byte("data: "))
+				break
+			}
+		}
+
+		if len(jsonData) == 0 {
+			return nil, sessionID, fmt.Errorf("no data field found in SSE response: %s", respStr)
+		}
+	} else {
+		// Plain JSON response
+		jsonData = respBodyBytes
+	}
+
+	var mcpResp MCPResponse
+	if err := json.Unmarshal(jsonData, &mcpResp); err != nil {
+		return nil, sessionID, fmt.Errorf("failed to unmarshal MCP response: %w (data: %s)", err, string(jsonData))
+	}
+
+	return &mcpResp, sessionID, nil
+}
+
+// Ping sends a ping request to check provider health
+func (b *Bridge) Ping(ctx context.Context) error {
+	req := MCPRequest{
+		JSONRPC: "2.0",
+		Method:  "ping",
+		ID:      time.Now().UnixNano(),
+	}
+
+	_, err := b.sendRequest(ctx, req)
+	return err
+}
diff --git a/services/mcp-tools/internal/infrastructure/mcpprovider/config.go b/services/mcp-tools/internal/infrastructure/mcpprovider/config.go
new file mode 100644
index 00000000..15fa9db7
--- /dev/null
+++ b/services/mcp-tools/internal/infrastructure/mcpprovider/config.go
@@ -0,0 +1,103 @@
+package mcpprovider
+
+import (
+	"os"
+	"time"
+
+	"gopkg.in/yaml.v3"
+)
+
+// ProviderType defines the type of MCP provider
+type ProviderType string
+
+const (
+	ProviderTypeHTTP    ProviderType = "http"     // Regular HTTP API
+	ProviderTypeMCPHTTP ProviderType = "mcp-http" // MCP protocol over HTTP
+)
+
+// ProviderTool represents a tool exposed by a provider
+type ProviderTool struct {
+	Name        string `yaml:"name"`
+	Description string `yaml:"description"`
+	Enabled     bool   `yaml:"enabled"`
+}
+
+// Provider represents an external MCP service provider
+type Provider struct {
+	Name        string         `yaml:"name"`
+	Description string         `yaml:"description"`
+	Enabled     bool           `yaml:"enabled"`
+	Endpoint    string         `yaml:"endpoint"`
+	Type        ProviderType   `yaml:"type"`
+	ProxyMode   bool           `yaml:"proxy_mode"`
+	Timeout     string         `yaml:"timeout"`
+	Tools       []ProviderTool `yaml:"tools,omitempty"`
+}
+
+// TimeoutDuration returns the timeout as a time.Duration
+func (p *Provider) TimeoutDuration() time.Duration {
+	if p.Timeout == "" {
+		return 30 * time.Second
+	}
+	d, err := time.ParseDuration(p.Timeout)
+	if err != nil {
+		return 30 * time.Second
+	}
+	return d
+}
+
+// Settings represents global provider settings
+type Settings struct {
+	MaxTimeout    string `yaml:"max_timeout"`
+	DebugLogging  bool   `yaml:"debug_logging"`
+	RetryAttempts int    `yaml:"retry_attempts"`
+	RetryDelay    string `yaml:"retry_delay"`
+}
+
+// Config represents the MCP provider configuration
+type Config struct {
+	Providers []Provider `yaml:"providers"`
+	Settings  Settings   `yaml:"settings"`
+}
+
+// LoadConfig loads the MCP provider configuration from a YAML file
+func LoadConfig(configPath string) (*Config, error) {
+	// Expand environment variables in config path
+	configPath = os.ExpandEnv(configPath)
+
+	data, err := os.ReadFile(configPath)
+	if err != nil {
+		return nil, err
+	}
+
+	// Expand environment variables in the YAML content
+	expanded := os.ExpandEnv(string(data))
+
+	var config Config
+	if err := yaml.Unmarshal([]byte(expanded), &config); err != nil {
+		return nil, err
+	}
+
+	return &config, nil
+}
+
+// GetEnabledProviders returns only enabled providers
+func (c *Config) GetEnabledProviders() []Provider {
+	var enabled []Provider
+	for _, p := range c.Providers {
+		if p.Enabled {
+			enabled = append(enabled, p)
+		}
+	}
+	return enabled
+}
+
+// GetProvider returns a provider by name
+func (c *Config) GetProvider(name string) *Provider {
+	for _, p := range c.Providers {
+		if p.Name == name {
+			return &p
+		}
+	}
+	return nil
+}
diff --git a/services/mcp-tools/internal/infrastructure/sandboxfusion/client.go b/services/mcp-tools/internal/infrastructure/sandboxfusion/client.go
new file mode 100644
index 00000000..39b2461b
--- /dev/null
+++ b/services/mcp-tools/internal/infrastructure/sandboxfusion/client.go
@@ -0,0 +1,118 @@
+package sandboxfusion
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/go-resty/resty/v2"
+)
+
+type Client struct {
+	baseURL    string
+	httpClient *resty.Client
+}
+
+type RunCodeRequest struct {
+	Code      string `json:"code"`
+	Language  string `json:"language,omitempty"`
+	SessionID string `json:"session_id,omitempty"`
+}
+
+type Artifact struct {
+	Name string `json:"name"`
+	URL  string `json:"url"`
+}
+
+type RunResult struct {
+	Status        string  `json:"status"`
+	ExecutionTime float64 `json:"execution_time"`
+	ReturnCode    int     `json:"return_code"`
+	Stdout        string  `json:"stdout"`
+	Stderr        string  `json:"stderr"`
+}
+
+type SandboxFusionAPIResponse struct {
+	Status        string            `json:"status"`
+	Message       string            `json:"message"`
+	CompileResult interface{}       `json:"compile_result"`
+	RunResult     *RunResult        `json:"run_result"`
+	ExecutorPod   *string           `json:"executor_pod_name"`
+	Files         map[string]string `json:"files"`
+}
+
+type RunCodeResponse struct {
+	Stdout    string     `json:"stdout"`
+	Stderr    string     `json:"stderr"`
+	Duration  int        `json:"duration_ms"`
+	SessionID string     `json:"session_id"`
+	Artifacts []Artifact `json:"artifacts"`
+	Error     string     `json:"error,omitempty"`
+}
+
+func NewClient(baseURL string) *Client {
+	baseURL = strings.TrimRight(baseURL, "/")
+	if baseURL == "" {
+		return nil
+	}
+	client := resty.New().
+		SetBaseURL(baseURL).
+		SetHeader("User-Agent", "Jan-MCP-SandboxFusion/1.0").
+		SetTimeout(20 * time.Second)
+	return &Client{
+		baseURL:    baseURL,
+		httpClient: client,
+	}
+}
+
+func (c *Client) IsEnabled() bool {
+	return c != nil && c.baseURL != ""
+}
+
+func (c *Client) RunCode(ctx context.Context, req RunCodeRequest) (*RunCodeResponse, error) {
+	if !c.IsEnabled() {
+		return nil, fmt.Errorf("sandboxfusion client is not configured")
+	}
+	var apiResp SandboxFusionAPIResponse
+	httpResp, err := c.httpClient.R().
+		SetContext(ctx).
+		SetHeader("Content-Type", "application/json").
+		SetBody(req).
+		SetResult(&apiResp).
+		Post("/run_code")
+	if err != nil {
+		return nil, fmt.Errorf("sandboxfusion request failed: %w", err)
+	}
+	if httpResp.IsError() {
+		return nil, fmt.Errorf("sandboxfusion error (%d): %s", httpResp.StatusCode(), httpResp.String())
+	}
+
+	// Map the API response to our expected format
+	resp := &RunCodeResponse{
+		SessionID: req.SessionID,
+	}
+
+	if apiResp.RunResult != nil {
+		resp.Stdout = apiResp.RunResult.Stdout
+		resp.Stderr = apiResp.RunResult.Stderr
+		resp.Duration = int(apiResp.RunResult.ExecutionTime * 1000) // Convert to milliseconds
+	}
+
+	if apiResp.Status != "Success" {
+		resp.Error = apiResp.Message
+	}
+
+	// Convert files map to artifacts
+	if len(apiResp.Files) > 0 {
+		resp.Artifacts = make([]Artifact, 0, len(apiResp.Files))
+		for name, url := range apiResp.Files {
+			resp.Artifacts = append(resp.Artifacts, Artifact{
+				Name: name,
+				URL:  url,
+			})
+		}
+	}
+
+	return resp, nil
+}
diff --git a/services/mcp-tools/internal/infrastructure/search/client.go b/services/mcp-tools/internal/infrastructure/search/client.go
new file mode 100644
index 00000000..71fddd5d
--- /dev/null
+++ b/services/mcp-tools/internal/infrastructure/search/client.go
@@ -0,0 +1,584 @@
+package search
+
+import (
+	"context"
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+
+	domainsearch "jan-server/services/mcp-tools/internal/domain/search"
+
+	"github.com/go-resty/resty/v2"
+	"github.com/rs/zerolog/log"
+	"golang.org/x/net/html"
+)
+
+const (
+	serperSearchEndpoint = "https://google.serper.dev/search"
+	serperScrapeEndpoint = "https://scrape.serper.dev"
+	searxngSearchPath    = "/search"
+)
+
+// Engine represents the configured backend for search operations.
+type Engine string
+
+const (
+	// EngineSerper routes search requests to the hosted Serper API.
+	EngineSerper Engine = "serper"
+	// EngineSearxng routes search requests to a local SearXNG instance.
+	EngineSearxng Engine = "searxng"
+)
+
+// ClientConfig captures the knobs exposed to operators for the search client.
+type ClientConfig struct {
+	Engine        Engine
+	SerperAPIKey  string
+	SearxngURL    string
+	DomainFilters []string
+	LocationHint  string
+	OfflineMode   bool
+}
+
+// SearchClient implements domainsearch.SearchClient with pluggable backends.
+type SearchClient struct {
+	cfg            ClientConfig
+	serperClient   *resty.Client
+	fallbackClient *resty.Client
+	searxClient    *resty.Client
+}
+
+var _ domainsearch.SearchClient = (*SearchClient)(nil)
+
+// NewSearchClient wires HTTP clients for each supported backend.
+func NewSearchClient(cfg ClientConfig) *SearchClient {
+	engine := Engine(strings.ToLower(string(cfg.Engine)))
+	if engine == "" {
+		engine = EngineSerper
+	}
+	cfg.Engine = engine
+
+	serperHTTP := resty.New().
+		SetHeader("User-Agent", "Jan-MCP-Tools/1.0").
+		SetTimeout(30 * time.Second)
+
+	fallbackHTTP := resty.New().
+		SetHeader("User-Agent", "Jan-MCP-Tools-Fallback/1.0").
+		SetTimeout(15 * time.Second)
+
+	searxHTTP := resty.New().
+		SetHeader("User-Agent", "Jan-MCP-Tools/1.0").
+		SetTimeout(30 * time.Second)
+
+	baseURL := strings.TrimSuffix(cfg.SearxngURL, "/")
+	if baseURL != "" {
+		searxHTTP.SetBaseURL(baseURL)
+	}
+
+	return &SearchClient{
+		cfg:            cfg,
+		serperClient:   serperHTTP,
+		fallbackClient: fallbackHTTP,
+		searxClient:    searxHTTP,
+	}
+}
+
+// Search fans out to the configured backend while preserving offline + fallback behaviour.
+func (c *SearchClient) Search(ctx context.Context, query domainsearch.SearchRequest) (*domainsearch.SearchResponse, error) {
+	query = c.enrichQuery(query)
+	offline := c.resolveOfflineMode(query.OfflineMode)
+
+	if offline {
+		log.Info().Msg("search running in offline mode, returning cached duckduckgo results")
+		return c.searchViaDuckDuckGo(ctx, query, "offline_mode")
+	}
+
+	switch c.cfg.Engine {
+	case EngineSearxng:
+		if c.searxClient == nil || strings.TrimSpace(c.cfg.SearxngURL) == "" {
+			log.Warn().Msg("searxng search requested but SEARXNG_URL not configured; falling back to DuckDuckGo")
+			return c.searchViaDuckDuckGo(ctx, query, "searxng_unconfigured")
+		}
+		res, err := c.searchViaSearxng(ctx, query)
+		if err != nil {
+			log.Warn().Err(err).Msg("searxng search failed, falling back to DuckDuckGo")
+			return c.searchViaDuckDuckGo(ctx, query, "searxng_error")
+		}
+		return res, nil
+	default:
+		if c.hasAPIKey() {
+			res, err := c.searchViaSerper(ctx, query)
+			if err == nil {
+				return res, nil
+			}
+			log.Warn().Err(err).Msg("serper search failed, falling back to DuckDuckGo")
+			return c.searchViaDuckDuckGo(ctx, query, "serper_error")
+		}
+		log.Info().Msg("serper api key missing, falling back to DuckDuckGo")
+		return c.searchViaDuckDuckGo(ctx, query, "serper_unavailable")
+	}
+}
+
+// FetchWebpage scrapes a webpage either via Serper's scrape API or a fallback HTTP fetcher.
+func (c *SearchClient) FetchWebpage(ctx context.Context, query domainsearch.FetchWebpageRequest) (*domainsearch.FetchWebpageResponse, error) {
+	if c.hasAPIKey() {
+		if res, err := c.fetchViaSerper(ctx, query); err == nil {
+			return res, nil
+		}
+	}
+	return c.fetchFallback(ctx, query)
+}
+
+func (c *SearchClient) enrichQuery(query domainsearch.SearchRequest) domainsearch.SearchRequest {
+	mergedDomains := c.mergeDomains(query.DomainAllowList)
+	if len(mergedDomains) > 0 {
+		query.DomainAllowList = mergedDomains
+		query.Q = applyDomainFilter(query.Q, mergedDomains)
+	}
+
+	if query.LocationHint == nil && strings.TrimSpace(c.cfg.LocationHint) != "" {
+		hint := c.cfg.LocationHint
+		query.LocationHint = &hint
+	}
+
+	return query
+}
+
+func (c *SearchClient) mergeDomains(custom []string) []string {
+	var merged []string
+	seen := map[string]struct{}{}
+
+	appendDomain := func(values []string) {
+		for _, val := range values {
+			domain := sanitizeDomain(val)
+			if domain == "" {
+				continue
+			}
+			if _, exists := seen[domain]; exists {
+				continue
+			}
+			seen[domain] = struct{}{}
+			merged = append(merged, domain)
+		}
+	}
+
+	appendDomain(c.cfg.DomainFilters)
+	appendDomain(custom)
+
+	return merged
+}
+
+func (c *SearchClient) resolveOfflineMode(override *bool) bool {
+	if override != nil {
+		return *override
+	}
+	return c.cfg.OfflineMode
+}
+
+func (c *SearchClient) searchViaSerper(ctx context.Context, query domainsearch.SearchRequest) (*domainsearch.SearchResponse, error) {
+	body := map[string]any{
+		"q": query.Q,
+	}
+	if query.GL != nil {
+		body["gl"] = *query.GL
+	}
+	if query.HL != nil {
+		body["hl"] = *query.HL
+	}
+	if query.Location != nil {
+		body["location"] = *query.Location
+	} else if query.LocationHint != nil {
+		body["location"] = *query.LocationHint
+	}
+	if query.Num != nil {
+		body["num"] = *query.Num
+	}
+	if query.Page != nil {
+		body["page"] = *query.Page
+	}
+	if query.Autocorrect != nil {
+		body["autocorrect"] = *query.Autocorrect
+	}
+	if query.TBS != nil {
+		body["tbs"] = string(*query.TBS)
+	}
+
+	var result domainsearch.SearchResponse
+	resp, err := c.serperClient.R().
+		SetContext(ctx).
+		SetHeader("X-API-KEY", c.cfg.SerperAPIKey).
+		SetHeader("Content-Type", "application/json").
+		SetBody(body).
+		SetResult(&result).
+		Post(serperSearchEndpoint)
+
+	if err != nil {
+		return nil, fmt.Errorf("failed to query Serper search API: %w", err)
+	}
+
+	if resp.IsError() {
+		return nil, fmt.Errorf("Serper search API error (status %d): %s", resp.StatusCode(), resp.String())
+	}
+
+	if result.SearchParameters == nil {
+		result.SearchParameters = map[string]any{}
+	}
+	result.SearchParameters["engine"] = "serper"
+	result.SearchParameters["live"] = true
+	result.SearchParameters["domain_allow_list"] = query.DomainAllowList
+	if query.LocationHint != nil {
+		result.SearchParameters["location_hint"] = *query.LocationHint
+	}
+
+	return &result, nil
+}
+
+func (c *SearchClient) searchViaSearxng(ctx context.Context, query domainsearch.SearchRequest) (*domainsearch.SearchResponse, error) {
+	if c.searxClient == nil {
+		return nil, fmt.Errorf("searxng client not configured")
+	}
+
+	req := c.searxClient.R().
+		SetContext(ctx).
+		SetQueryParam("q", query.Q).
+		SetQueryParam("format", "json").
+		SetQueryParam("safesearch", "1")
+
+	if query.HL != nil {
+		req.SetQueryParam("language", *query.HL)
+	}
+	if query.Page != nil && *query.Page > 1 {
+		req.SetQueryParam("p", strconv.Itoa(*query.Page))
+	}
+	if query.Num != nil && *query.Num > 0 {
+		req.SetQueryParam("num", strconv.Itoa(*query.Num))
+	}
+	if query.TBS != nil {
+		if mapped := mapTBSToSearxng(*query.TBS); mapped != "" {
+			req.SetQueryParam("time_range", mapped)
+		}
+	}
+
+	var result searxngResponse
+	resp, err := req.SetResult(&result).Get(searxngSearchPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to query SearXNG API: %w", err)
+	}
+	if resp.IsError() {
+		return nil, fmt.Errorf("SearXNG API error (status %d): %s", resp.StatusCode(), resp.String())
+	}
+
+	limit := 10
+	if query.Num != nil && *query.Num > 0 {
+		limit = *query.Num
+	}
+
+	results := make([]map[string]any, 0, len(result.Results))
+	for idx, item := range result.Results {
+		if idx >= limit {
+			break
+		}
+		results = append(results, map[string]any{
+			"title":       item.Title,
+			"link":        item.URL,
+			"description": strings.TrimSpace(item.Content),
+			"source":      "searxng",
+			"engine":      item.Engine,
+		})
+	}
+
+	searchMetadata := map[string]any{
+		"engine":            "searxng",
+		"q":                 query.Q,
+		"live":              true,
+		"domain_allow_list": query.DomainAllowList,
+	}
+	if query.LocationHint != nil {
+		searchMetadata["location_hint"] = *query.LocationHint
+	}
+
+	return &domainsearch.SearchResponse{
+		SearchParameters: searchMetadata,
+		Organic:          results,
+	}, nil
+}
+
+func mapTBSToSearxng(t domainsearch.TBSTimeRange) string {
+	switch t {
+	case domainsearch.TBSPastHour:
+		return "day"
+	case domainsearch.TBSPastDay:
+		return "day"
+	case domainsearch.TBSPastWeek:
+		return "week"
+	case domainsearch.TBSPastMonth:
+		return "month"
+	case domainsearch.TBSPastYear:
+		return "year"
+	default:
+		return ""
+	}
+}
+
+func (c *SearchClient) fetchViaSerper(ctx context.Context, query domainsearch.FetchWebpageRequest) (*domainsearch.FetchWebpageResponse, error) {
+	body := map[string]any{
+		"url": query.Url,
+	}
+	if query.IncludeMarkdown != nil {
+		body["includeMarkdown"] = *query.IncludeMarkdown
+	}
+
+	var result domainsearch.FetchWebpageResponse
+	resp, err := c.serperClient.R().
+		SetContext(ctx).
+		SetHeader("X-API-KEY", c.cfg.SerperAPIKey).
+		SetHeader("Content-Type", "application/json").
+		SetBody(body).
+		SetResult(&result).
+		Post(serperScrapeEndpoint)
+
+	if err != nil {
+		return nil, fmt.Errorf("failed to query Serper scrape API: %w", err)
+	}
+
+	if resp.IsError() {
+		return nil, fmt.Errorf("Serper scrape API error (status %d): %s", resp.StatusCode(), resp.String())
+	}
+
+	return &result, nil
+}
+
+func (c *SearchClient) fetchFallback(ctx context.Context, query domainsearch.FetchWebpageRequest) (*domainsearch.FetchWebpageResponse, error) {
+	resp, err := c.fallbackClient.R().
+		SetContext(ctx).
+		SetHeader("User-Agent", "Jan-MCP-Tools-Fallback/1.0").
+		Get(query.Url)
+	if err != nil {
+		return nil, fmt.Errorf("fallback fetch failed: %w", err)
+	}
+	if resp.IsError() {
+		return nil, fmt.Errorf("fallback fetch HTTP %d: %s", resp.StatusCode(), resp.Status())
+	}
+
+	bodyBytes := resp.Body()
+	text := extractVisibleText(bodyBytes)
+	if text == "" {
+		text = string(bodyBytes)
+	}
+
+	metadata := map[string]any{
+		"source":        query.Url,
+		"contentType":   resp.Header().Get("Content-Type"),
+		"fallback_mode": true,
+	}
+
+	return &domainsearch.FetchWebpageResponse{
+		Text:     text,
+		Metadata: metadata,
+	}, nil
+}
+
+func (c *SearchClient) searchViaDuckDuckGo(ctx context.Context, query domainsearch.SearchRequest, reason string) (*domainsearch.SearchResponse, error) {
+	req := c.fallbackClient.R().
+		SetContext(ctx).
+		SetHeader("User-Agent", "Jan-MCP-Tools-Fallback/1.0").
+		SetQueryParam("q", query.Q).
+		SetQueryParam("format", "json").
+		SetQueryParam("no_redirect", "1").
+		SetQueryParam("no_html", "1")
+
+	var ddg duckDuckResponse
+	resp, err := req.SetResult(&ddg).Get("https://api.duckduckgo.com/")
+	if err != nil {
+		return nil, fmt.Errorf("duckduckgo fallback search failed: %w", err)
+	}
+	if resp.IsError() {
+		return nil, fmt.Errorf("duckduckgo fallback search HTTP %d: %s", resp.StatusCode(), resp.Status())
+	}
+
+	results := make([]map[string]any, 0, len(ddg.Results)+len(ddg.RelatedTopics))
+	for _, r := range ddg.Results {
+		results = append(results, map[string]any{
+			"title":       fallbackTitle(r.Text, query.Q),
+			"link":        orSelect(r.FirstURL, r.Result),
+			"description": r.Text,
+			"source":      "duckduckgo",
+		})
+	}
+	for _, topic := range flattenDuckTopics(ddg.RelatedTopics) {
+		if topic.FirstURL == "" && topic.Result == "" {
+			continue
+		}
+		results = append(results, map[string]any{
+			"title":       fallbackTitle(topic.Text, query.Q),
+			"link":        orSelect(topic.FirstURL, topic.Result),
+			"description": topic.Text,
+			"source":      "duckduckgo_related",
+		})
+		if len(results) >= 10 {
+			break
+		}
+	}
+	if len(results) == 0 {
+		results = append(results, map[string]any{
+			"title":       fmt.Sprintf("No live results for \"%s\"", query.Q),
+			"link":        fmt.Sprintf("https://duckduckgo.com/?q=%s", query.Q),
+			"description": "Configure SERPER_API_KEY or switch SEARCH_ENGINE to searxng for live results.",
+			"source":      "fallback",
+		})
+	}
+
+	return &domainsearch.SearchResponse{
+		SearchParameters: map[string]any{
+			"engine":            "duckduckgo",
+			"q":                 query.Q,
+			"live":              false,
+			"reason":            reason,
+			"domain_allow_list": query.DomainAllowList,
+		},
+		Organic: results,
+	}, nil
+}
+
+func (c *SearchClient) hasAPIKey() bool {
+	return strings.TrimSpace(c.cfg.SerperAPIKey) != ""
+}
+
+// --- Helper types + functions reused from the legacy client ---
+
+type duckDuckResponse struct {
+	Heading       string            `json:"Heading"`
+	Results       []duckDuckResult  `json:"Results"`
+	RelatedTopics []duckDuckTopics  `json:"RelatedTopics"`
+	AbstractURL   string            `json:"AbstractURL"`
+	AbstractText  string            `json:"AbstractText"`
+	Type          string            `json:"Type"`
+	Redirect      string            `json:"Redirect"`
+	Meta          map[string]string `json:"meta"`
+}
+
+type duckDuckResult struct {
+	FirstURL string `json:"FirstURL"`
+	Result   string `json:"Result"`
+	Text     string `json:"Text"`
+}
+
+type duckDuckTopics struct {
+	Name     string           `json:"Name"`
+	FirstURL string           `json:"FirstURL"`
+	Result   string           `json:"Result"`
+	Text     string           `json:"Text"`
+	Topics   []duckDuckTopics `json:"Topics"`
+	Children []duckDuckTopics `json:"children"`
+}
+
+type searxngResponse struct {
+	Query            string           `json:"query"`
+	NumberOfResults  int              `json:"number_of_results"`
+	Results          []searxngResult  `json:"results"`
+	Corrections      []string         `json:"corrections"`
+	UnresponsiveList []string         `json:"unresponsive_engines"`
+	Answers          []string         `json:"answers"`
+	Info             []map[string]any `json:"infoboxes"`
+}
+
+type searxngResult struct {
+	Title   string `json:"title"`
+	URL     string `json:"url"`
+	Content string `json:"content"`
+	Engine  string `json:"engine"`
+}
+
+func flattenDuckTopics(topics []duckDuckTopics) []duckDuckTopics {
+	var out []duckDuckTopics
+	for _, topic := range topics {
+		if len(topic.Topics) > 0 {
+			out = append(out, flattenDuckTopics(topic.Topics)...)
+			continue
+		}
+		if len(topic.Children) > 0 {
+			out = append(out, flattenDuckTopics(topic.Children)...)
+			continue
+		}
+		out = append(out, topic)
+	}
+	return out
+}
+
+func fallbackTitle(title, query string) string {
+	title = strings.TrimSpace(title)
+	if title != "" {
+		return title
+	}
+	return fmt.Sprintf("Result for \"%s\"", query)
+}
+
+func orSelect(values ...string) string {
+	for _, v := range values {
+		if strings.TrimSpace(v) != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+func sanitizeDomain(value string) string {
+	value = strings.TrimSpace(strings.ToLower(value))
+	value = strings.TrimPrefix(value, "https://")
+	value = strings.TrimPrefix(value, "http://")
+	value = strings.TrimPrefix(value, "www.")
+	return strings.Trim(value, "/")
+}
+
+func applyDomainFilter(query string, domains []string) string {
+	if len(domains) == 0 {
+		return query
+	}
+
+	var filters []string
+	for _, domain := range domains {
+		if domain == "" {
+			continue
+		}
+		filters = append(filters, fmt.Sprintf("site:%s", domain))
+	}
+	if len(filters) == 0 {
+		return query
+	}
+
+	filterExpr := strings.Join(filters, " OR ")
+	query = strings.TrimSpace(query)
+	if query == "" {
+		return filterExpr
+	}
+	return fmt.Sprintf("(%s) (%s)", query, filterExpr)
+}
+
+func extractVisibleText(htmlBytes []byte) string {
+	doc, err := html.Parse(strings.NewReader(string(htmlBytes)))
+	if err != nil {
+		return ""
+	}
+
+	var builder strings.Builder
+	var walk func(*html.Node)
+	walk = func(n *html.Node) {
+		if n.Type == html.TextNode {
+			val := strings.TrimSpace(n.Data)
+			if val != "" {
+				if builder.Len() > 0 {
+					builder.WriteString(" ")
+				}
+				builder.WriteString(val)
+			}
+		}
+		if n.Type == html.ElementNode && (n.Data == "script" || n.Data == "style") {
+			return
+		}
+		for c := n.FirstChild; c != nil; c = c.NextSibling {
+			walk(c)
+		}
+	}
+	walk(doc)
+	return builder.String()
+}
diff --git a/services/mcp-tools/internal/infrastructure/vectorstore/client.go b/services/mcp-tools/internal/infrastructure/vectorstore/client.go
new file mode 100644
index 00000000..1e2d0cec
--- /dev/null
+++ b/services/mcp-tools/internal/infrastructure/vectorstore/client.go
@@ -0,0 +1,112 @@
+package vectorstore
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/go-resty/resty/v2"
+)
+
+type Client struct {
+	baseURL    string
+	httpClient *resty.Client
+}
+
+type IndexRequest struct {
+	DocumentID string         `json:"document_id"`
+	Text       string         `json:"text"`
+	Metadata   map[string]any `json:"metadata,omitempty"`
+	Tags       []string       `json:"tags,omitempty"`
+}
+
+type IndexResponse struct {
+	Status     string `json:"status"`
+	DocumentID string `json:"document_id"`
+	TokenCount int    `json:"token_count"`
+	IndexedAt  string `json:"indexed_at"`
+}
+
+type QueryRequest struct {
+	Text        string   `json:"text"`
+	TopK        int      `json:"top_k,omitempty"`
+	DocumentIDs []string `json:"document_ids,omitempty"`
+}
+
+type QueryResult struct {
+	DocumentID  string         `json:"document_id"`
+	Score       float64        `json:"score"`
+	TextPreview string         `json:"text_preview"`
+	Metadata    map[string]any `json:"metadata"`
+	Tags        []string       `json:"tags,omitempty"`
+}
+
+type QueryResponse struct {
+	Query   string        `json:"query"`
+	TopK    int           `json:"top_k"`
+	Count   int           `json:"count"`
+	Results []QueryResult `json:"results"`
+}
+
+func NewClient(baseURL string) *Client {
+	baseURL = strings.TrimRight(baseURL, "/")
+	if baseURL == "" {
+		return nil
+	}
+	httpClient := resty.New().
+		SetBaseURL(baseURL).
+		SetHeader("User-Agent", "Jan-MCP-VectorStore/1.0").
+		SetTimeout(10 * time.Second)
+
+	return &Client{
+		baseURL:    baseURL,
+		httpClient: httpClient,
+	}
+}
+
+func (c *Client) IsEnabled() bool {
+	return c != nil && c.baseURL != ""
+}
+
+func (c *Client) IndexDocument(ctx context.Context, req IndexRequest) (*IndexResponse, error) {
+	if !c.IsEnabled() {
+		return nil, fmt.Errorf("vector store client is not configured")
+	}
+
+	var resp IndexResponse
+	httpResp, err := c.httpClient.R().
+		SetContext(ctx).
+		SetHeader("Content-Type", "application/json").
+		SetBody(req).
+		SetResult(&resp).
+		Post("/documents")
+	if err != nil {
+		return nil, fmt.Errorf("vector store index request failed: %w", err)
+	}
+	if httpResp.IsError() {
+		return nil, fmt.Errorf("vector store index error (%d): %s", httpResp.StatusCode(), httpResp.String())
+	}
+	return &resp, nil
+}
+
+func (c *Client) Query(ctx context.Context, req QueryRequest) (*QueryResponse, error) {
+	if !c.IsEnabled() {
+		return nil, fmt.Errorf("vector store client is not configured")
+	}
+
+	var resp QueryResponse
+	httpResp, err := c.httpClient.R().
+		SetContext(ctx).
+		SetHeader("Content-Type", "application/json").
+		SetBody(req).
+		SetResult(&resp).
+		Post("/query")
+	if err != nil {
+		return nil, fmt.Errorf("vector store query request failed: %w", err)
+	}
+	if httpResp.IsError() {
+		return nil, fmt.Errorf("vector store query error (%d): %s", httpResp.StatusCode(), httpResp.String())
+	}
+	return &resp, nil
+}
diff --git a/services/mcp-tools/internal/interfaces/httpserver/httpserver.go b/services/mcp-tools/internal/interfaces/httpserver/httpserver.go
new file mode 100644
index 00000000..5d13e6ca
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/httpserver.go
@@ -0,0 +1,71 @@
+package httpserver
+
+import (
+	"fmt"
+
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/mcp-tools/internal/infrastructure/auth"
+	"jan-server/services/mcp-tools/internal/infrastructure/config"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/middlewares"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/routes/mcp"
+)
+
+type HTTPServer struct {
+	router        *gin.Engine
+	config        *config.Config
+	mcpRoute      *mcp.MCPRoute
+	authValidator *auth.Validator
+}
+
+func NewHTTPServer(
+	cfg *config.Config,
+	mcpRoute *mcp.MCPRoute,
+	authValidator *auth.Validator,
+) *HTTPServer {
+	router := gin.New()
+	router.Use(gin.Recovery())
+	router.Use(middlewares.RequestLogger())
+	router.Use(middlewares.CORS())
+
+	// Apply auth middleware (will skip health checks internally)
+	if authValidator != nil {
+		router.Use(authValidator.Middleware())
+	}
+
+	return &HTTPServer{
+		router:        router,
+		config:        cfg,
+		mcpRoute:      mcpRoute,
+		authValidator: authValidator,
+	}
+}
+
+func (s *HTTPServer) setupRoutes() {
+	// Health check endpoints
+	s.router.GET("/healthz", func(c *gin.Context) {
+		c.JSON(200, gin.H{"status": "ok", "service": "mcp-tools"})
+	})
+
+	s.router.GET("/readyz", func(c *gin.Context) {
+		c.JSON(200, gin.H{"status": "ready", "service": "mcp-tools"})
+	})
+
+	s.router.GET("/health/auth", func(c *gin.Context) {
+		if s.authValidator == nil || s.authValidator.Ready() {
+			c.JSON(200, gin.H{"status": "ready"})
+			return
+		}
+		c.JSON(503, gin.H{"status": "initializing"})
+	})
+
+	// Register MCP routes
+	v1 := s.router.Group("/v1")
+	s.mcpRoute.RegisterRouter(v1)
+}
+
+func (s *HTTPServer) Run() error {
+	s.setupRoutes()
+	addr := fmt.Sprintf(":%s", s.config.HTTPPort)
+	return s.router.Run(addr)
+}
diff --git a/services/mcp-tools/internal/interfaces/httpserver/middlewares/middlewares.go b/services/mcp-tools/internal/interfaces/httpserver/middlewares/middlewares.go
new file mode 100644
index 00000000..ae68025a
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/middlewares/middlewares.go
@@ -0,0 +1,63 @@
+package middlewares
+
+import (
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog/log"
+)
+
+// RequestLogger logs HTTP requests
+func RequestLogger() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		log.Info().
+			Str("method", c.Request.Method).
+			Str("path", c.Request.URL.Path).
+			Str("client_ip", c.ClientIP()).
+			Msg("incoming request")
+
+		c.Next()
+
+		// Log errors if any
+		if len(c.Errors) > 0 {
+			for _, e := range c.Errors {
+				log.Error().
+					Str("method", c.Request.Method).
+					Str("path", c.Request.URL.Path).
+					Int("status", c.Writer.Status()).
+					Err(e.Err).
+					Msg("request error")
+			}
+		}
+
+		logEvent := log.Info().
+			Str("method", c.Request.Method).
+			Str("path", c.Request.URL.Path).
+			Int("status", c.Writer.Status())
+
+		if c.Writer.Status() >= 400 {
+			logEvent = log.Warn().
+				Str("method", c.Request.Method).
+				Str("path", c.Request.URL.Path).
+				Int("status", c.Writer.Status())
+		}
+
+		logEvent.Msg("request completed")
+	}
+}
+
+// CORS adds CORS headers
+func CORS() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
+		c.Writer.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, PATCH, DELETE, OPTIONS")
+		c.Writer.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, X-API-Key, Idempotency-Key, X-Request-Id, Mcp-Session-Id, mcp-protocol-version")
+		c.Writer.Header().Set("Access-Control-Expose-Headers", "X-Request-Id")
+		c.Writer.Header().Set("Access-Control-Max-Age", "3600")
+
+		if c.Request.Method == "OPTIONS" {
+			c.AbortWithStatus(204)
+			return
+		}
+
+		c.Next()
+	}
+}
diff --git a/services/mcp-tools/internal/interfaces/httpserver/requests/requests.go b/services/mcp-tools/internal/interfaces/httpserver/requests/requests.go
new file mode 100644
index 00000000..a018aa10
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/requests/requests.go
@@ -0,0 +1,4 @@
+package requests
+
+// Placeholder for future MCP request DTOs
+// MCP requests are currently handled via MCP protocol structures
diff --git a/services/mcp-tools/internal/interfaces/httpserver/responses/response.go b/services/mcp-tools/internal/interfaces/httpserver/responses/response.go
new file mode 100644
index 00000000..13781b97
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/responses/response.go
@@ -0,0 +1,155 @@
+package responses
+
+import (
+	"errors"
+	"net/http"
+	"time"
+
+	"jan-server/services/mcp-tools/utils/platformerrors"
+
+	"github.com/gin-gonic/gin"
+)
+
+type ErrorResponse struct {
+	Code          string `json:"code"` // UUID from PlatformError
+	Error         string `json:"error"`
+	ErrorInstance error  `json:"-"`
+	RequestID     string `json:"request_id,omitempty"`
+}
+
+func NewInternalServerError(reqCtx *gin.Context, errResp ErrorResponse) {
+	if errResp.ErrorInstance != nil {
+		reqCtx.Error(errResp.ErrorInstance)
+	}
+	reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, errResp)
+}
+
+// HandleError handles domain errors and returns appropriate HTTP responses
+// The message parameter is used directly as the error message in the response
+// Status code is automatically determined from the error type
+func HandleError(reqCtx *gin.Context, err error, message string) {
+	var domainErr *platformerrors.PlatformError
+	if errors.As(err, &domainErr) {
+		statusCode := platformerrors.ErrorTypeToHTTPStatus(domainErr.GetErrorType())
+
+		errResp := ErrorResponse{
+			Code:          domainErr.GetUUID(),
+			Error:         message,
+			ErrorInstance: domainErr,
+			RequestID:     domainErr.GetRequestID(),
+		}
+
+		reqCtx.AbortWithStatusJSON(statusCode, errResp)
+		return
+	} else {
+		// assign generic error response for non-domain errors
+		errResp := ErrorResponse{
+			Error:         message,
+			ErrorInstance: err,
+		}
+		reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, errResp)
+	}
+}
+
+// HandleErrorWithStatus handles domain errors with a custom status code
+// Use this when you need to override the default status code mapping
+func HandleErrorWithStatus(reqCtx *gin.Context, statusCode int, err error, message string) {
+	var domainErr *platformerrors.PlatformError
+	if errors.As(err, &domainErr) {
+		errResp := ErrorResponse{
+			Code:          domainErr.GetUUID(),
+			Error:         message,
+			ErrorInstance: domainErr,
+			RequestID:     domainErr.GetRequestID(),
+		}
+
+		reqCtx.AbortWithStatusJSON(statusCode, errResp)
+		return
+	} else {
+		// assign generic error response for non-domain errors
+		errResp := ErrorResponse{
+			Error:         message,
+			ErrorInstance: err,
+		}
+		reqCtx.AbortWithStatusJSON(statusCode, errResp)
+	}
+}
+
+// HandleNewError creates a new typed error at the route layer and handles it
+// This is a convenience function for route-level validations and errors
+// The uuid parameter should be provided from the route for error tracking
+func HandleNewError(reqCtx *gin.Context, errorType platformerrors.ErrorType, message string, uuid string) {
+	ctx := reqCtx.Request.Context()
+	// Use the provided UUID
+	err := platformerrors.NewError(ctx, platformerrors.LayerRoute, errorType, message, nil, uuid)
+
+	statusCode := platformerrors.ErrorTypeToHTTPStatus(err.GetErrorType())
+
+	errResp := ErrorResponse{
+		Code:          err.GetUUID(),
+		Error:         message,
+		ErrorInstance: err,
+		RequestID:     err.GetRequestID(),
+	}
+
+	reqCtx.AbortWithStatusJSON(statusCode, errResp)
+}
+
+type GeneralResponse[T any] struct {
+	Status string `json:"status"`
+	Result T      `json:"result"`
+}
+
+type ListResponse[T any] struct {
+	Total   int64   `json:"total"`
+	Results []T     `json:"results"`
+	FirstID *string `json:"first_id"`
+	LastID  *string `json:"last_id"`
+	HasMore bool    `json:"has_more"`
+}
+
+type PageCursor struct {
+	FirstID *string
+	LastID  *string
+	HasMore bool
+	Total   int64
+}
+
+func BuildCursorPage[T any](
+	items []*T,
+	getID func(*T) *string,
+	hasMoreFunc func() ([]*T, error),
+	CountFunc func() (int64, error),
+) (*PageCursor, error) {
+	cursorPage := &PageCursor{}
+	if len(items) > 0 {
+		cursorPage.FirstID = getID(items[0])
+		cursorPage.LastID = getID(items[len(items)-1])
+		moreRecords, err := hasMoreFunc()
+		if len(moreRecords) > 0 {
+			cursorPage.HasMore = true
+		}
+		if err != nil {
+			return nil, err
+		}
+	}
+	count, err := CountFunc()
+	if err != nil {
+		return cursorPage, err
+	}
+	cursorPage.Total = count
+	return cursorPage, nil
+}
+
+func NewCookieWithSecurity(name string, value string, expires time.Time) *http.Cookie {
+	// For cross-origin requests (e.g., frontend at different domain), we need SameSite=None with Secure
+	return &http.Cookie{
+		Name:     name,
+		Value:    value,
+		Expires:  expires,
+		HttpOnly: true,
+		Secure:   true,
+		Path:     "/",
+		SameSite: http.SameSiteNoneMode,
+	}
+}
diff --git a/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/mcp_route.go b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/mcp_route.go
new file mode 100644
index 00000000..72d47583
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/mcp_route.go
@@ -0,0 +1,189 @@
+package mcp
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/golang-jwt/jwt/v5"
+	mcpserver "github.com/mark3labs/mcp-go/server"
+
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/responses"
+	"jan-server/services/mcp-tools/utils/platformerrors"
+)
+
+var allowedMCPMethods = map[string]bool{
+	// Initialization / handshake
+	"initialize":                true,
+	"notifications/initialized": true,
+	"ping":                      true,
+
+	// Tools
+	"tools/list": true,
+	"tools/call": true,
+
+	// Prompts
+	"prompts/list": true,
+	"prompts/call": true,
+
+	// Resources
+	"resources/list":           true,
+	"resources/templates/list": true,
+	"resources/read":           true,
+	"resources/subscribe":      true,
+}
+
+type MCPRoute struct {
+	serperMCP   *SerperMCP
+	providerMCP *ProviderMCP
+	sandboxMCP  *SandboxFusionMCP
+	memoryMCP   *MemoryMCP
+	mcpServer   *mcpserver.MCPServer
+	httpHandler http.Handler
+}
+
+func NewMCPRoute(
+	serperMCP *SerperMCP,
+	providerMCP *ProviderMCP,
+	sandboxMCP *SandboxFusionMCP,
+	memoryMCP *MemoryMCP,
+) *MCPRoute {
+	server := mcpserver.NewMCPServer("menlo-platform", "1.0.0",
+		mcpserver.WithToolCapabilities(true),
+		mcpserver.WithRecovery(),
+	)
+
+	serperMCP.RegisterTools(server)
+
+	if sandboxMCP != nil {
+		sandboxMCP.RegisterTools(server)
+	}
+
+	// Register memory tools
+	if memoryMCP != nil {
+		memoryMCP.RegisterTools(server)
+	}
+
+	// Register tools from external MCP providers
+	if providerMCP != nil {
+		if err := providerMCP.RegisterTools(server); err != nil {
+			// Log error but continue
+			// (error already logged in RegisterTools)
+		}
+	}
+
+	return &MCPRoute{
+		serperMCP:   serperMCP,
+		providerMCP: providerMCP,
+		sandboxMCP:  sandboxMCP,
+		memoryMCP:   memoryMCP,
+		mcpServer:   server,
+		httpHandler: mcpserver.NewStreamableHTTPServer(server, mcpserver.WithStateLess(true)),
+	}
+}
+
+func (route *MCPRoute) RegisterRouter(router *gin.RouterGroup) {
+	router.POST("/mcp",
+		MCPMethodGuard(allowedMCPMethods),
+		InjectUserContext(),
+		route.serveMCP,
+	)
+}
+
+// serveMCP streams Model Context Protocol responses using the underlying MCP server.
+// @Summary MCP endpoint for tool execution
+// @Description Handles Model Context Protocol (MCP) requests over HTTP. Supports MCP methods: initialize, ping, tools/list, tools/call, prompts/list, prompts/call, resources/list, resources/read.
+// @Description
+// @Description **Available Tools:**
+// @Description - `google_search`: Web search via pluggable engines (Serper/SearXNG/duckduckgo) with params: q, gl, hl, location, num, tbs, page, autocorrect, domain_allow_list, location_hint, offline_mode. Returns structured citations.
+// @Description - `scrape`: Web page scraping (params: url, includeMarkdown) returning text, preview, cache_status, and metadata.
+// @Description - `file_search_index` / `file_search_query`: Index arbitrary text and run similarity queries against the lightweight vector store.
+// @Description - `python_exec`: Execute trusted code through SandboxFusion (params: code, language, session_id, approved) to retrieve stdout/stderr/artifacts.
+// @Description - `memory_retrieve`: Retrieve relevant user preferences, project context, or conversation history (params: query, user_id, project_id, max_user_items, max_project_items, min_similarity). Returns personalized context.
+// @Description
+// @Description **MCP Protocol:**
+// @Description - Request format: JSON-RPC 2.0 with method and params
+// @Description - Response format: Server-Sent Events (SSE) stream
+// @Description - Stateless mode (no session management)
+// @Tags MCP API
+// @Accept json
+// @Produce text/event-stream
+// @Param request body object true "MCP JSON-RPC request payload (e.g., {\"jsonrpc\":\"2.0\",\"method\":\"tools/list\",\"id\":1})"
+// @Success 200 {string} string "Streamed MCP response in SSE format"
+// @Failure 400 {object} responses.ErrorResponse "Invalid MCP request payload or unsupported method"
+// @Failure 500 {object} responses.ErrorResponse "Internal server error"
+// @Router /v1/mcp [post]
+func (route *MCPRoute) serveMCP(reqCtx *gin.Context) {
+	route.httpHandler.ServeHTTP(reqCtx.Writer, reqCtx.Request)
+}
+
+// InjectUserContext extracts user_id from JWT token and injects it into request context
+func InjectUserContext() gin.HandlerFunc {
+	return func(reqCtx *gin.Context) {
+		// Try to get auth token from gin context (set by auth middleware)
+		if tokenVal, exists := reqCtx.Get("auth_token"); exists {
+			if token, ok := tokenVal.(*jwt.Token); ok && token.Valid {
+				if claims, ok := token.Claims.(jwt.MapClaims); ok {
+					// Try to extract user_id from various claim fields
+					var userID string
+					if sub, ok := claims["sub"].(string); ok && sub != "" {
+						userID = sub
+					} else if uid, ok := claims["user_id"].(string); ok && uid != "" {
+						userID = uid
+					} else if uid, ok := claims["uid"].(string); ok && uid != "" {
+						userID = uid
+					}
+
+					if userID != "" {
+						// Inject user_id into request context
+						ctx := context.WithValue(reqCtx.Request.Context(), "user_id", userID)
+						reqCtx.Request = reqCtx.Request.WithContext(ctx)
+					}
+				}
+			}
+		}
+		reqCtx.Next()
+	}
+}
+
+func MCPMethodGuard(allowedMethods map[string]bool) gin.HandlerFunc {
+	return func(reqCtx *gin.Context) {
+		bodyBytes, err := io.ReadAll(reqCtx.Request.Body)
+		if err != nil {
+			responses.HandleNewError(reqCtx, platformerrors.ErrorTypeInternal, "failed to read MCP request body", "f10df80f-1651-4faa-8a75-3d91814d7990")
+			return
+		}
+		_ = reqCtx.Request.Body.Close()
+
+		if len(bodyBytes) == 0 {
+			responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "empty MCP request body", "abf862e2-f2a8-4bd7-b1b7-56fc16647759")
+			return
+		}
+
+		reqCtx.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
+
+		var payload struct {
+			Method string `json:"method"`
+		}
+
+		if err := json.Unmarshal(bodyBytes, &payload); err != nil {
+			responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "invalid MCP request payload", "81f2eaae-8aa1-4569-95ec-c7a611fda0d0")
+			return
+		}
+
+		if payload.Method == "" {
+			responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "missing method field in MCP request", "7b3c9e5a-2f4d-4a1e-9c8b-1d5f3e7a9b2c")
+			return
+		}
+
+		if !allowedMethods[payload.Method] {
+			responses.HandleNewError(reqCtx, platformerrors.ErrorTypeValidation, "unsupported MCP method: "+payload.Method, "6e5f62bb-a0fb-4146-969b-7d6dd1bbe8d6")
+			return
+		}
+
+		reqCtx.Next()
+	}
+}
diff --git a/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/memory_mcp.go b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/memory_mcp.go
new file mode 100644
index 00000000..370b0254
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/memory_mcp.go
@@ -0,0 +1,363 @@
+package mcp
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"jan-server/services/mcp-tools/utils/mcp"
+
+	mcpgo "github.com/mark3labs/mcp-go/mcp"
+	mcpserver "github.com/mark3labs/mcp-go/server"
+	"github.com/rs/zerolog/log"
+)
+
+// MemoryRetrieveArgs defines the arguments for the memory_retrieve tool
+type MemoryRetrieveArgs struct {
+	Query            string   `json:"query" jsonschema:"required,description=What to search for in memory (e.g., 'user programming preferences', 'project tech stack decisions')"`
+	UserID           *string  `json:"user_id,omitempty" jsonschema:"description=Optional user ID to retrieve memories for. If not provided, will be extracted from JWT authentication."`
+	ProjectID        *string  `json:"project_id,omitempty" jsonschema:"description=Optional project ID to filter project-specific memories"`
+	ConversationID   *string  `json:"conversation_id,omitempty" jsonschema:"description=Optional conversation ID for episodic memory context"`
+	Scopes           []string `json:"scopes,omitempty" jsonschema:"description=Memory scopes to search (e.g., ['preference', 'decision', 'fact'])"`
+	MaxUserItems     *int     `json:"max_user_items,omitempty" jsonschema:"description=Maximum number of user memory items to return (default: 3, max: 10)"`
+	MaxProjectItems  *int     `json:"max_project_items,omitempty" jsonschema:"description=Maximum number of project memory items to return (default: 5, max: 10)"`
+	MaxEpisodicItems *int     `json:"max_episodic_items,omitempty" jsonschema:"description=Maximum number of episodic memory items to return (default: 3, max: 10)"`
+	MinSimilarity    *float32 `json:"min_similarity,omitempty" jsonschema:"description=Minimum similarity score threshold (0.0-1.0, default: 0.75)"`
+}
+
+// memoryLoadRequest matches the memory-tools API structure
+type memoryLoadRequest struct {
+	UserID         string            `json:"user_id"`
+	ProjectID      string            `json:"project_id,omitempty"`
+	ConversationID string            `json:"conversation_id,omitempty"`
+	Query          string            `json:"query"`
+	Options        memoryLoadOptions `json:"options"`
+}
+
+type memoryLoadOptions struct {
+	AugmentWithMemory bool    `json:"augment_with_memory"`
+	MaxUserItems      int     `json:"max_user_items"`
+	MaxProjectItems   int     `json:"max_project_items"`
+	MaxEpisodicItems  int     `json:"max_episodic_items"`
+	MinSimilarity     float32 `json:"min_similarity"`
+}
+
+// memoryLoadResponse matches the memory-tools API response
+type memoryLoadResponse struct {
+	CoreMemory     []memoryItem `json:"core_memory"`
+	EpisodicMemory []memoryItem `json:"episodic_memory"`
+	SemanticMemory []memoryItem `json:"semantic_memory"`
+}
+
+type memoryItem struct {
+	ID             string                 `json:"id"`
+	Scope          string                 `json:"scope,omitempty"`
+	Key            string                 `json:"key,omitempty"`
+	Text           string                 `json:"text"`
+	Importance     string                 `json:"importance,omitempty"`
+	CreatedAt      time.Time              `json:"created_at"`
+	UpdatedAt      time.Time              `json:"updated_at"`
+	RelevanceScore float64                `json:"relevance_score,omitempty"`
+	Metadata       map[string]interface{} `json:"metadata,omitempty"`
+}
+
+// memoryToolResult is the formatted result returned to the LLM
+type memoryToolResult struct {
+	Query            string       `json:"query"`
+	TotalItems       int          `json:"total_items"`
+	UserMemories     []memoryItem `json:"user_memories"`
+	ProjectMemories  []memoryItem `json:"project_memories"`
+	EpisodicMemories []memoryItem `json:"episodic_memories"`
+	QueryTimeMS      int64        `json:"query_time_ms"`
+	EstimatedTokens  int          `json:"estimated_tokens"`
+}
+
+// MemoryMCP handles MCP tool registration for memory retrieval.
+type MemoryMCP struct {
+	memoryToolsURL string
+	httpClient     *http.Client
+	enabled        bool
+}
+
+// NewMemoryMCP creates a new memory MCP handler.
+func NewMemoryMCP(memoryToolsURL string, enabled bool) *MemoryMCP {
+	return &MemoryMCP{
+		memoryToolsURL: memoryToolsURL,
+		enabled:        enabled,
+		httpClient: &http.Client{
+			Timeout: 10 * time.Second,
+		},
+	}
+}
+
+// RegisterTools registers memory tools with the MCP server
+func (m *MemoryMCP) RegisterTools(server *mcpserver.MCPServer) {
+	if !m.enabled {
+		log.Warn().Msg("memory_retrieve MCP tool disabled via config")
+		return
+	}
+	if m.memoryToolsURL == "" {
+		log.Warn().Msg("Memory tools URL not configured, skipping memory_retrieve tool registration")
+		return
+	}
+
+	// Register memory_retrieve tool
+	server.AddTool(
+		mcpgo.NewTool("memory_retrieve",
+			mcp.ReflectToMCPOptions(
+				"READ-ONLY: Search and retrieve relevant user preferences, project facts, or conversation history from memory storage. This tool ONLY reads existing memories - it does NOT create, update, or sync memories. Use this to recall what you already know about the user or project context.",
+				MemoryRetrieveArgs{},
+			)...,
+		),
+		func(ctx context.Context, req mcpgo.CallToolRequest) (*mcpgo.CallToolResult, error) {
+			startTime := time.Now()
+
+			// Extract required parameters
+			query, err := req.RequireString("query")
+			if err != nil {
+				return nil, fmt.Errorf("query is required: %w", err)
+			}
+
+			// Get user_id - prioritize JWT context over parameter
+			var userID string
+
+			// First, try to get user_id from JWT context (most secure)
+			if ctxUserID, ok := ctx.Value("user_id").(string); ok && ctxUserID != "" {
+				userID = ctxUserID
+				log.Info().Str("user_id", userID).Str("query", query).Msg("[Memory MCP] Using user_id from JWT authentication")
+			} else {
+				// Fallback to parameter if no JWT context
+				userID = req.GetString("user_id", "")
+				if userID == "" {
+					log.Error().Str("query", query).Msg("[Memory MCP] user_id is required but not provided")
+					return nil, fmt.Errorf("user_id is required: provide it as a parameter or authenticate with JWT")
+				}
+				log.Info().Str("user_id", userID).Str("query", query).Msg("[Memory MCP] Using user_id from parameter (no JWT)")
+			}
+
+			// Build memory load request with defaults
+			memReq := memoryLoadRequest{
+				UserID: userID,
+				Query:  query,
+				Options: memoryLoadOptions{
+					AugmentWithMemory: true,
+					MaxUserItems:      3,
+					MaxProjectItems:   5,
+					MaxEpisodicItems:  3,
+					MinSimilarity:     0.75,
+				},
+			}
+
+			// Apply optional parameters
+			if projectID := req.GetString("project_id", ""); projectID != "" {
+				memReq.ProjectID = projectID
+				log.Info().Str("user_id", userID).Str("project_id", projectID).Msg("[Memory MCP] Including project_id filter")
+			}
+			if conversationID := req.GetString("conversation_id", ""); conversationID != "" {
+				memReq.ConversationID = conversationID
+				log.Info().Str("user_id", userID).Str("conversation_id", conversationID).Msg("[Memory MCP] Including conversation_id filter")
+			}
+
+			// Apply limits with guardrails (max 10 per type)
+			if maxUser := req.GetInt("max_user_items", 0); maxUser > 0 {
+				if maxUser > 10 {
+					maxUser = 10
+				}
+				memReq.Options.MaxUserItems = maxUser
+			}
+			if maxProject := req.GetInt("max_project_items", 0); maxProject > 0 {
+				if maxProject > 10 {
+					maxProject = 10
+				}
+				memReq.Options.MaxProjectItems = maxProject
+			}
+			if maxEpisodic := req.GetInt("max_episodic_items", 0); maxEpisodic > 0 {
+				if maxEpisodic > 10 {
+					maxEpisodic = 10
+				}
+				memReq.Options.MaxEpisodicItems = maxEpisodic
+			}
+
+			// Apply similarity threshold
+			if args := req.GetArguments(); args != nil {
+				if minSimRaw, ok := args["min_similarity"]; ok {
+					switch v := minSimRaw.(type) {
+					case float64:
+						if v >= 0.0 && v <= 1.0 {
+							memReq.Options.MinSimilarity = float32(v)
+						}
+					case float32:
+						if v >= 0.0 && v <= 1.0 {
+							memReq.Options.MinSimilarity = v
+						}
+					}
+				}
+			}
+
+			// Log the full request being sent to memory service
+			log.Info().
+				Str("user_id", userID).
+				Str("query", query).
+				Str("project_id", memReq.ProjectID).
+				Str("conversation_id", memReq.ConversationID).
+				Int("max_user_items", memReq.Options.MaxUserItems).
+				Int("max_project_items", memReq.Options.MaxProjectItems).
+				Int("max_episodic_items", memReq.Options.MaxEpisodicItems).
+				Float32("min_similarity", memReq.Options.MinSimilarity).
+				Str("memory_url", m.memoryToolsURL).
+				Msg("[Memory MCP] Calling memory-tools service")
+
+			// Call memory-tools API
+			response, err := m.callMemoryLoad(ctx, memReq)
+			if err != nil {
+				log.Error().
+					Err(err).
+					Str("user_id", userID).
+					Str("query", query).
+					Str("memory_url", m.memoryToolsURL).
+					Msg("[Memory MCP] Failed to retrieve memories")
+				// Return empty result instead of error to not break agent flow
+				return mcpgo.NewToolResultText(fmt.Sprintf(`{"query":"%s","total_items":0,"user_memories":[],"project_memories":[],"episodic_memories":[],"error":"memory service unavailable"}`, query)), nil
+			}
+
+			// Calculate elapsed time
+			elapsed := time.Since(startTime).Milliseconds()
+
+			// Format result
+			result := memoryToolResult{
+				Query:            query,
+				UserMemories:     response.CoreMemory,
+				ProjectMemories:  response.SemanticMemory,
+				EpisodicMemories: response.EpisodicMemory,
+				TotalItems:       len(response.CoreMemory) + len(response.SemanticMemory) + len(response.EpisodicMemory),
+				QueryTimeMS:      elapsed,
+				EstimatedTokens:  m.estimateTokens(response),
+			}
+
+			// Log successful retrieval with result summary
+			log.Info().
+				Str("user_id", userID).
+				Str("query", query).
+				Int("user_memories", len(response.CoreMemory)).
+				Int("project_memories", len(response.SemanticMemory)).
+				Int("episodic_memories", len(response.EpisodicMemory)).
+				Int("total_items", result.TotalItems).
+				Int64("query_time_ms", elapsed).
+				Int("estimated_tokens", result.EstimatedTokens).
+				Msg("[Memory MCP] Successfully retrieved memories")
+
+			// Marshal to JSON
+			resultJSON, err := json.Marshal(result)
+			if err != nil {
+				return nil, fmt.Errorf("failed to marshal result: %w", err)
+			}
+
+			return mcpgo.NewToolResultText(string(resultJSON)), nil
+		},
+	)
+
+	log.Info().Str("url", m.memoryToolsURL).Msg("Registered memory_retrieve MCP tool")
+}
+
+// callMemoryLoad calls the memory-tools /v1/memory/load endpoint
+func (m *MemoryMCP) callMemoryLoad(ctx context.Context, req memoryLoadRequest) (*memoryLoadResponse, error) {
+	// Marshal request
+	reqBody, err := json.Marshal(req)
+	if err != nil {
+		log.Error().Err(err).Str("user_id", req.UserID).Msg("[Memory MCP] Failed to marshal request")
+		return nil, fmt.Errorf("failed to marshal request: %w", err)
+	}
+
+	// Log the raw request being sent
+	log.Debug().
+		Str("user_id", req.UserID).
+		Str("url", m.memoryToolsURL+"/v1/memory/load").
+		Str("request_body", string(reqBody)).
+		Msg("[Memory MCP] Sending HTTP request to memory service")
+
+	// Create HTTP request
+	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, m.memoryToolsURL+"/v1/memory/load", bytes.NewReader(reqBody))
+	if err != nil {
+		log.Error().Err(err).Str("user_id", req.UserID).Str("url", m.memoryToolsURL).Msg("[Memory MCP] Failed to create HTTP request")
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+	httpReq.Header.Set("Content-Type", "application/json")
+
+	// Execute request
+	httpResp, err := m.httpClient.Do(httpReq)
+	if err != nil {
+		log.Error().
+			Err(err).
+			Str("user_id", req.UserID).
+			Str("url", m.memoryToolsURL+"/v1/memory/load").
+			Msg("[Memory MCP] HTTP request failed - connection error")
+		return nil, fmt.Errorf("failed to call memory service: %w", err)
+	}
+	defer httpResp.Body.Close()
+
+	// Check status
+	if httpResp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(httpResp.Body)
+		log.Error().
+			Str("user_id", req.UserID).
+			Int("status_code", httpResp.StatusCode).
+			Str("response_body", string(body)).
+			Str("url", m.memoryToolsURL+"/v1/memory/load").
+			Msg("[Memory MCP] Memory service returned non-OK status")
+		return nil, fmt.Errorf("memory service returned status %d: %s", httpResp.StatusCode, string(body))
+	}
+
+	// Read response body for logging
+	respBody, err := io.ReadAll(httpResp.Body)
+	if err != nil {
+		log.Error().Err(err).Str("user_id", req.UserID).Msg("[Memory MCP] Failed to read response body")
+		return nil, fmt.Errorf("failed to read response: %w", err)
+	}
+
+	log.Debug().
+		Str("user_id", req.UserID).
+		Str("response_body", string(respBody)).
+		Msg("[Memory MCP] Received response from memory service")
+
+	// Parse response
+	var response memoryLoadResponse
+	if err := json.Unmarshal(respBody, &response); err != nil {
+		log.Error().
+			Err(err).
+			Str("user_id", req.UserID).
+			Str("response_body", string(respBody)).
+			Msg("[Memory MCP] Failed to decode response JSON")
+		return nil, fmt.Errorf("failed to decode response: %w", err)
+	}
+
+	log.Info().
+		Str("user_id", req.UserID).
+		Int("core_memory_count", len(response.CoreMemory)).
+		Int("semantic_memory_count", len(response.SemanticMemory)).
+		Int("episodic_memory_count", len(response.EpisodicMemory)).
+		Msg("[Memory MCP] Successfully parsed memory service response")
+
+	return &response, nil
+}
+
+// estimateTokens provides a rough estimate of token count for the response
+func (m *MemoryMCP) estimateTokens(response *memoryLoadResponse) int {
+	// Rough estimate: 1 token ≈ 4 characters
+	totalChars := 0
+
+	for _, item := range response.CoreMemory {
+		totalChars += len(item.Text)
+	}
+	for _, item := range response.SemanticMemory {
+		totalChars += len(item.Text)
+	}
+	for _, item := range response.EpisodicMemory {
+		totalChars += len(item.Text)
+	}
+
+	return totalChars / 4
+}
diff --git a/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/provider_mcp.go b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/provider_mcp.go
new file mode 100644
index 00000000..b88f4ee7
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/provider_mcp.go
@@ -0,0 +1,203 @@
+package mcp
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"jan-server/services/mcp-tools/internal/infrastructure/mcpprovider"
+
+	mcpgo "github.com/mark3labs/mcp-go/mcp"
+	mcpserver "github.com/mark3labs/mcp-go/server"
+	"github.com/rs/zerolog/log"
+)
+
+// ProviderMCP handles MCP tool registration for external providers
+type ProviderMCP struct {
+	bridges map[string]*mcpprovider.Bridge
+	config  *mcpprovider.Config
+}
+
+// NewProviderMCP creates a new Provider MCP handler
+func NewProviderMCP(config *mcpprovider.Config) *ProviderMCP {
+	return &ProviderMCP{
+		bridges: make(map[string]*mcpprovider.Bridge),
+		config:  config,
+	}
+}
+
+// Initialize initializes connections to all enabled MCP providers
+func (p *ProviderMCP) Initialize(ctx context.Context) error {
+	enabledProviders := p.config.GetEnabledProviders()
+
+	log.Info().
+		Int("count", len(enabledProviders)).
+		Msg("Initializing MCP provider bridges")
+
+	for _, provider := range enabledProviders {
+		if provider.Type != mcpprovider.ProviderTypeMCPHTTP {
+			log.Warn().
+				Str("provider", provider.Name).
+				Str("type", string(provider.Type)).
+				Msg("Skipping non-MCP provider (not yet implemented)")
+			continue
+		}
+
+		bridge := mcpprovider.NewBridge(provider)
+
+		// Try to initialize the provider
+		if err := bridge.Initialize(ctx); err != nil {
+			log.Error().
+				Err(err).
+				Str("provider", provider.Name).
+				Str("endpoint", provider.Endpoint).
+				Msg("Failed to initialize MCP provider, skipping")
+			continue
+		}
+
+		p.bridges[provider.Name] = bridge
+
+		log.Info().
+			Str("provider", provider.Name).
+			Str("endpoint", provider.Endpoint).
+			Msg("MCP provider bridge initialized")
+	}
+
+	return nil
+}
+
+// RegisterTools registers all tools from external MCP providers
+func (p *ProviderMCP) RegisterTools(server *mcpserver.MCPServer) error {
+	ctx := context.Background()
+
+	for providerName, bridge := range p.bridges {
+		log.Info().
+			Str("provider", providerName).
+			Msg("Fetching tool list from MCP provider")
+
+		toolsResult, err := bridge.ListTools(ctx)
+		if err != nil {
+			log.Error().
+				Err(err).
+				Str("provider", providerName).
+				Msg("Failed to list tools from provider")
+			continue
+		}
+
+		// Parse the tools/list response
+		var toolsResponse struct {
+			Tools []struct {
+				Name        string                 `json:"name"`
+				Description string                 `json:"description"`
+				InputSchema map[string]interface{} `json:"inputSchema"`
+			} `json:"tools"`
+		}
+
+		if err := json.Unmarshal(toolsResult, &toolsResponse); err != nil {
+			log.Error().
+				Err(err).
+				Str("provider", providerName).
+				Msg("Failed to parse tools response")
+			continue
+		}
+
+		// Register each tool as a proxy
+		for _, tool := range toolsResponse.Tools {
+			toolName := fmt.Sprintf("%s_%s", providerName, tool.Name)
+			toolDesc := fmt.Sprintf("[%s] %s", providerName, tool.Description)
+
+			log.Info().
+				Str("provider", providerName).
+				Str("original_tool", tool.Name).
+				Str("registered_as", toolName).
+				Msg("Registering proxied MCP tool")
+
+			// Create a closure to capture the current provider and tool
+			currentBridge := bridge
+			currentToolName := tool.Name
+
+			// Register the tool with the MCP server
+			server.AddTool(
+				mcpgo.NewTool(toolName,
+					mcpgo.WithDescription(toolDesc),
+					// TODO: Parse inputSchema and convert to mcp-go options
+					// For now, we'll accept any arguments and forward them
+				),
+				func(ctx context.Context, req mcpgo.CallToolRequest) (*mcpgo.CallToolResult, error) {
+					// Extract all arguments from the request
+					arguments := make(map[string]interface{})
+
+					// The request.Params contains the arguments
+					// We need to forward them to the external MCP provider
+					if req.Params.Arguments != nil {
+						// Convert arguments to map
+						if argsMap, ok := req.Params.Arguments.(map[string]interface{}); ok {
+							arguments = argsMap
+						}
+					}
+
+					log.Debug().
+						Str("tool", currentToolName).
+						Str("provider", providerName).
+						Interface("arguments", arguments).
+						Msg("Forwarding tool call to MCP provider")
+
+					// Call the external MCP provider
+					result, err := currentBridge.CallTool(ctx, currentToolName, arguments)
+					if err != nil {
+						log.Error().
+							Err(err).
+							Str("tool", currentToolName).
+							Str("provider", providerName).
+							Msg("Failed to call tool on MCP provider")
+						return nil, fmt.Errorf("provider %s tool call failed: %w", providerName, err)
+					}
+
+					// Parse the result from the external provider
+					var toolResult struct {
+						Content []struct {
+							Type string `json:"type"`
+							Text string `json:"text"`
+						} `json:"content"`
+					}
+
+					if err := json.Unmarshal(result, &toolResult); err != nil {
+						log.Error().
+							Err(err).
+							Str("tool", currentToolName).
+							Msg("Failed to parse tool result")
+						// Return raw result as text
+						return mcpgo.NewToolResultText(string(result)), nil
+					}
+
+					// Combine all text content
+					var combinedText string
+					for _, content := range toolResult.Content {
+						if content.Type == "text" {
+							combinedText += content.Text + "\n"
+						}
+					}
+
+					if combinedText != "" {
+						return mcpgo.NewToolResultText(combinedText), nil
+					}
+
+					// Fallback: return raw JSON
+					return mcpgo.NewToolResultText(string(result)), nil
+				},
+			)
+		}
+
+		log.Info().
+			Str("provider", providerName).
+			Int("tools_count", len(toolsResponse.Tools)).
+			Msg("Successfully registered tools from MCP provider")
+	}
+
+	return nil
+}
+
+// GetBridge returns the bridge for a specific provider
+func (p *ProviderMCP) GetBridge(providerName string) *mcpprovider.Bridge {
+	return p.bridges[providerName]
+}
diff --git a/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/sandboxfusion_mcp.go b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/sandboxfusion_mcp.go
new file mode 100644
index 00000000..2d83eaf9
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/sandboxfusion_mcp.go
@@ -0,0 +1,104 @@
+package mcp
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"jan-server/services/mcp-tools/internal/infrastructure/sandboxfusion"
+	"jan-server/services/mcp-tools/utils/mcp"
+
+	mcpgo "github.com/mark3labs/mcp-go/mcp"
+	mcpserver "github.com/mark3labs/mcp-go/server"
+	"github.com/rs/zerolog/log"
+)
+
+type SandboxFusionArgs struct {
+	Code      string  `json:"code" jsonschema:"required,description=Python snippet to execute"`
+	Language  *string `json:"language,omitempty" jsonschema:"description=Execution language (default: python)"`
+	SessionID *string `json:"session_id,omitempty" jsonschema:"description=Existing SandboxFusion session to reuse"`
+	Approved  *bool   `json:"approved,omitempty" jsonschema:"description=Set true when approval is required to run code"`
+}
+
+type SandboxFusionMCP struct {
+	client          *sandboxfusion.Client
+	requireApproval bool
+	enabled         bool
+}
+
+func NewSandboxFusionMCP(client *sandboxfusion.Client, requireApproval bool, enabled bool) *SandboxFusionMCP {
+	if client == nil {
+		return nil
+	}
+	return &SandboxFusionMCP{
+		client:          client,
+		requireApproval: requireApproval,
+		enabled:         enabled,
+	}
+}
+
+func (s *SandboxFusionMCP) RegisterTools(server *mcpserver.MCPServer) {
+	if s == nil || s.client == nil {
+		return
+	}
+	if !s.enabled {
+		log.Warn().Msg("python_exec MCP tool disabled via config")
+		return
+	}
+
+	server.AddTool(
+		mcpgo.NewTool("python_exec",
+			mcp.ReflectToMCPOptions(
+				"Execute trusted code inside SandboxFusion and return stdout/stderr/artifacts.",
+				SandboxFusionArgs{},
+			)...,
+		),
+		func(ctx context.Context, req mcpgo.CallToolRequest) (*mcpgo.CallToolResult, error) {
+			if s.requireApproval {
+				if args := req.GetArguments(); args != nil {
+					if approvedRaw, ok := args["approved"]; !ok || approvedRaw == nil || !req.GetBool("approved", false) {
+						return nil, fmt.Errorf("sandboxfusion execution requires approval; set the `approved` argument to true")
+					}
+				} else {
+					return nil, fmt.Errorf("sandboxfusion execution requires approval; set the `approved` argument to true")
+				}
+			}
+
+			code, err := req.RequireString("code")
+			if err != nil {
+				return nil, err
+			}
+
+			runReq := sandboxfusion.RunCodeRequest{
+				Code: code,
+			}
+
+			if lang := req.GetString("language", ""); lang != "" {
+				runReq.Language = lang
+			}
+			if session := req.GetString("session_id", ""); session != "" {
+				runReq.SessionID = session
+			}
+
+			resp, err := s.client.RunCode(ctx, runReq)
+			if err != nil {
+				return nil, err
+			}
+
+			payload := map[string]any{
+				"stdout":      resp.Stdout,
+				"stderr":      resp.Stderr,
+				"duration_ms": resp.Duration,
+				"session_id":  resp.SessionID,
+				"artifacts":   resp.Artifacts,
+				"error":       resp.Error,
+			}
+			jsonBytes, err := json.Marshal(payload)
+			if err != nil {
+				return nil, err
+			}
+
+			return mcpgo.NewToolResultText(string(jsonBytes)), nil
+		},
+	)
+}
diff --git a/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/serper_mcp.go b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/serper_mcp.go
new file mode 100644
index 00000000..00485b70
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/routes/mcp/serper_mcp.go
@@ -0,0 +1,444 @@
+package mcp
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+
+	domainsearch "jan-server/services/mcp-tools/internal/domain/search"
+	"jan-server/services/mcp-tools/internal/infrastructure/vectorstore"
+	"jan-server/services/mcp-tools/utils/mcp"
+
+	mcpgo "github.com/mark3labs/mcp-go/mcp"
+	mcpserver "github.com/mark3labs/mcp-go/server"
+)
+
+// SerperSearchArgs defines the arguments for the google_search tool
+type SerperSearchArgs struct {
+	Q               string   `json:"q" jsonschema:"required,description=Search query string"`
+	GL              *string  `json:"gl,omitempty" jsonschema:"description=Optional region code for search results in ISO 3166-1 alpha-2 format (e.g., 'us')"`
+	HL              *string  `json:"hl,omitempty" jsonschema:"description=Optional language code for search results in ISO 639-1 format (e.g., 'en')"`
+	Location        *string  `json:"location,omitempty" jsonschema:"description=Optional location for search results (e.g., 'SoHo, New York, United States', 'California, United States')"`
+	Num             *int     `json:"num,omitempty" jsonschema:"description=Number of results to return (default: 10)"`
+	Tbs             *string  `json:"tbs,omitempty" jsonschema:"description=Time-based search filter ('qdr:h' for past hour, 'qdr:d' for past day, 'qdr:w' for past week, 'qdr:m' for past month, 'qdr:y' for past year)"`
+	Page            *int     `json:"page,omitempty" jsonschema:"description=Page number of results to return (default: 1)"`
+	Autocorrect     *bool    `json:"autocorrect,omitempty" jsonschema:"description=Whether to autocorrect spelling in query"`
+	DomainAllowList []string `json:"domain_allow_list,omitempty" jsonschema:"description=Restrict results to the provided domains, e.g., ['example.com','wikipedia.org']"`
+	LocationHint    *string  `json:"location_hint,omitempty" jsonschema:"description=Soft location hint (region or timezone) applied when the upstream engine supports it"`
+	OfflineMode     *bool    `json:"offline_mode,omitempty" jsonschema:"description=Force cached/offline search mode even when live engines are available"`
+}
+
+// SerperScrapeArgs defines the arguments for the scrape tool
+type SerperScrapeArgs struct {
+	Url             string `json:"url" jsonschema:"required,description=The URL of webpage to scrape"`
+	IncludeMarkdown *bool  `json:"includeMarkdown,omitempty" jsonschema:"description=Whether to include markdown content"`
+}
+
+type FileSearchIndexArgs struct {
+	DocumentID string         `json:"document_id" jsonschema:"required,description=Stable identifier for the document"`
+	Text       string         `json:"text" jsonschema:"required,description=Raw text to index"`
+	Metadata   map[string]any `json:"metadata,omitempty" jsonschema:"description=Optional metadata object stored with the document"`
+	Tags       []string       `json:"tags,omitempty" jsonschema:"description=Optional list of tags used to filter search results"`
+}
+
+type FileSearchQueryArgs struct {
+	Query       string   `json:"query" jsonschema:"required,description=Natural language query to search for"`
+	TopK        *int     `json:"top_k,omitempty" jsonschema:"description=Maximum number of results to return (default: 5, max: 20)"`
+	DocumentIDs []string `json:"document_ids,omitempty" jsonschema:"description=Optional whitelist of document IDs to search within"`
+}
+
+type searchToolResult struct {
+	Position    int    `json:"position"`
+	Title       string `json:"title"`
+	SourceURL   string `json:"source_url"`
+	Snippet     string `json:"snippet"`
+	CacheStatus string `json:"cache_status"`
+	FetchedAt   string `json:"fetched_at"`
+}
+
+type searchToolPayload struct {
+	Query       string                       `json:"query"`
+	Engine      string                       `json:"engine"`
+	Live        bool                         `json:"live"`
+	CacheStatus string                       `json:"cache_status"`
+	Metadata    map[string]any               `json:"metadata"`
+	Results     []searchToolResult           `json:"results"`
+	Citations   []string                     `json:"citations"`
+	Raw         *domainsearch.SearchResponse `json:"raw,omitempty"`
+}
+
+type scrapeToolPayload struct {
+	SourceURL   string         `json:"source_url"`
+	Text        string         `json:"text"`
+	TextPreview string         `json:"text_preview"`
+	Metadata    map[string]any `json:"metadata"`
+	CacheStatus string         `json:"cache_status"`
+	FetchedAt   string         `json:"fetched_at"`
+}
+
+// SerperMCP handles MCP tool registration for search tooling.
+type SerperMCP struct {
+	searchService *domainsearch.SearchService
+	vectorStore   *vectorstore.Client
+}
+
+// NewSerperMCP creates a new search MCP handler.
+func NewSerperMCP(searchService *domainsearch.SearchService, vectorStore *vectorstore.Client) *SerperMCP {
+	return &SerperMCP{
+		searchService: searchService,
+		vectorStore:   vectorStore,
+	}
+}
+
+// RegisterTools registers Serper tools with the MCP server
+func (s *SerperMCP) RegisterTools(server *mcpserver.MCPServer) {
+	// Register google_search tool
+	server.AddTool(
+		mcpgo.NewTool("google_search",
+			mcp.ReflectToMCPOptions(
+				"Perform web searches via the configured engines (Serper, SearXNG, or cached fallback) and fetch structured citations.",
+				SerperSearchArgs{},
+			)...,
+		),
+		func(ctx context.Context, req mcpgo.CallToolRequest) (*mcpgo.CallToolResult, error) {
+			q, err := req.RequireString("q")
+			if err != nil {
+				return nil, err
+			}
+
+			searchReq := domainsearch.SearchRequest{
+				Q: q,
+			}
+
+			if gl := req.GetString("gl", ""); gl != "" {
+				searchReq.GL = &gl
+			}
+			if hl := req.GetString("hl", ""); hl != "" {
+				searchReq.HL = &hl
+			}
+			if location := req.GetString("location", ""); location != "" {
+				searchReq.Location = &location
+			}
+			if num := req.GetInt("num", 0); num > 0 {
+				searchReq.Num = &num
+			}
+			if page := req.GetInt("page", 0); page > 0 {
+				searchReq.Page = &page
+			}
+			if tbs := req.GetString("tbs", ""); tbs != "" {
+				val := domainsearch.TBSTimeRange(tbs)
+				searchReq.TBS = &val
+			}
+			autocorrect := req.GetBool("autocorrect", true)
+			searchReq.Autocorrect = &autocorrect
+
+			if domains := req.GetStringSlice("domain_allow_list", nil); len(domains) > 0 {
+				searchReq.DomainAllowList = domains
+			}
+			if locationHint := req.GetString("location_hint", ""); locationHint != "" {
+				searchReq.LocationHint = &locationHint
+			}
+			if args := req.GetArguments(); args != nil {
+				if _, ok := args["offline_mode"]; ok {
+					override := req.GetBool("offline_mode", false)
+					searchReq.OfflineMode = &override
+				}
+			}
+
+			searchResp, err := s.searchService.Search(ctx, searchReq)
+			if err != nil {
+				return nil, err
+			}
+
+			payload := buildSearchPayload(searchReq.Q, searchReq, searchResp)
+			jsonBytes, err := json.Marshal(payload)
+			if err != nil {
+				return nil, err
+			}
+
+			return mcpgo.NewToolResultText(string(jsonBytes)), nil
+		},
+	)
+
+	// Register scrape tool
+	server.AddTool(
+		mcpgo.NewTool("scrape",
+			mcp.ReflectToMCPOptions(
+				"Scrape a webpage and retrieve the text with optional markdown formatting.",
+				SerperScrapeArgs{},
+			)...,
+		),
+		func(ctx context.Context, req mcpgo.CallToolRequest) (*mcpgo.CallToolResult, error) {
+			url, err := req.RequireString("url")
+			if err != nil {
+				return nil, err
+			}
+
+			scrapeReq := domainsearch.FetchWebpageRequest{
+				Url: url,
+			}
+
+			if includeMarkdown := req.GetBool("includeMarkdown", false); includeMarkdown {
+				scrapeReq.IncludeMarkdown = &includeMarkdown
+			}
+
+			scrapeResp, err := s.searchService.FetchWebpage(ctx, scrapeReq)
+			if err != nil {
+				return nil, err
+			}
+
+			payload := buildScrapePayload(scrapeReq.Url, scrapeResp)
+			jsonBytes, err := json.Marshal(payload)
+			if err != nil {
+				return nil, err
+			}
+
+			return mcpgo.NewToolResultText(string(jsonBytes)), nil
+		},
+	)
+
+	if s.vectorStore != nil {
+		server.AddTool(
+			mcpgo.NewTool("file_search_index",
+				mcp.ReflectToMCPOptions(
+					"Index arbitrary text into the lightweight vector store used for MCP automations.",
+					FileSearchIndexArgs{},
+				)...,
+			),
+			func(ctx context.Context, req mcpgo.CallToolRequest) (*mcpgo.CallToolResult, error) {
+				if s.vectorStore == nil {
+					return nil, fmt.Errorf("vector store client is not configured")
+				}
+
+				docID, err := req.RequireString("document_id")
+				if err != nil {
+					return nil, err
+				}
+				text, err := req.RequireString("text")
+				if err != nil {
+					return nil, err
+				}
+
+				metadata := extractMapArgument(req.GetArguments(), "metadata")
+				tags := req.GetStringSlice("tags", nil)
+
+				resp, err := s.vectorStore.IndexDocument(ctx, vectorstore.IndexRequest{
+					DocumentID: docID,
+					Text:       text,
+					Metadata:   metadata,
+					Tags:       tags,
+				})
+				if err != nil {
+					return nil, err
+				}
+
+				payload := map[string]any{
+					"document_id": resp.DocumentID,
+					"status":      resp.Status,
+					"indexed_at":  resp.IndexedAt,
+					"token_count": resp.TokenCount,
+				}
+				jsonBytes, err := json.Marshal(payload)
+				if err != nil {
+					return nil, err
+				}
+
+				return mcpgo.NewToolResultText(string(jsonBytes)), nil
+			},
+		)
+
+		server.AddTool(
+			mcpgo.NewTool("file_search_query",
+				mcp.ReflectToMCPOptions(
+					"Run a semantic query against documents indexed via file_search_index.",
+					FileSearchQueryArgs{},
+				)...,
+			),
+			func(ctx context.Context, req mcpgo.CallToolRequest) (*mcpgo.CallToolResult, error) {
+				if s.vectorStore == nil {
+					return nil, fmt.Errorf("vector store client is not configured")
+				}
+
+				query, err := req.RequireString("query")
+				if err != nil {
+					return nil, err
+				}
+
+				topK := req.GetInt("top_k", 5)
+				if topK <= 0 {
+					topK = 5
+				}
+				if topK > 20 {
+					topK = 20
+				}
+				docIDs := req.GetStringSlice("document_ids", nil)
+
+				resp, err := s.vectorStore.Query(ctx, vectorstore.QueryRequest{
+					Text:        query,
+					TopK:        topK,
+					DocumentIDs: docIDs,
+				})
+				if err != nil {
+					return nil, err
+				}
+
+				if resp.TopK == 0 {
+					resp.TopK = topK
+				}
+
+				jsonBytes, err := json.Marshal(resp)
+				if err != nil {
+					return nil, err
+				}
+
+				return mcpgo.NewToolResultText(string(jsonBytes)), nil
+			},
+		)
+	}
+}
+
+func buildSearchPayload(query string, req domainsearch.SearchRequest, resp *domainsearch.SearchResponse) searchToolPayload {
+	now := time.Now().UTC().Format(time.RFC3339)
+
+	metadata := map[string]any{}
+	if resp != nil && resp.SearchParameters != nil {
+		metadata = resp.SearchParameters
+	}
+
+	engine := stringFromMap(metadata, "engine")
+	if engine == "" {
+		engine = "serper"
+	}
+
+	live := true
+	if resp != nil && resp.SearchParameters != nil {
+		if val, ok := resp.SearchParameters["live"].(bool); ok {
+			live = val
+		}
+	}
+
+	cacheStatus := "live"
+	if !live {
+		if reason := stringFromMap(metadata, "reason"); reason != "" {
+			cacheStatus = reason
+		} else {
+			cacheStatus = "fallback"
+		}
+	}
+
+	results := make([]searchToolResult, 0)
+	citations := make([]string, 0)
+
+	if resp != nil {
+		for idx, item := range resp.Organic {
+			sourceURL := stringFromMap(item, "link")
+			snippet := firstNonEmpty(
+				stringFromMap(item, "snippet"),
+				stringFromMap(item, "description"),
+			)
+			if snippet == "" {
+				snippet = "No snippet returned by upstream engine."
+			}
+
+			results = append(results, searchToolResult{
+				Position:    idx + 1,
+				Title:       stringFromMap(item, "title"),
+				SourceURL:   sourceURL,
+				Snippet:     truncateSnippet(snippet, 420),
+				CacheStatus: cacheStatus,
+				FetchedAt:   now,
+			})
+
+			if sourceURL != "" {
+				citations = append(citations, sourceURL)
+			}
+		}
+	}
+
+	payload := searchToolPayload{
+		Query:       query,
+		Engine:      engine,
+		Live:        live,
+		CacheStatus: cacheStatus,
+		Metadata:    metadata,
+		Results:     results,
+		Citations:   citations,
+		Raw:         resp,
+	}
+
+	return payload
+}
+
+func buildScrapePayload(url string, resp *domainsearch.FetchWebpageResponse) scrapeToolPayload {
+	metadata := map[string]any{}
+	if resp != nil && resp.Metadata != nil {
+		metadata = resp.Metadata
+	}
+
+	cacheStatus := "live"
+	if metadata != nil {
+		if val, ok := metadata["fallback_mode"].(bool); ok && val {
+			cacheStatus = "fallback"
+		}
+	}
+
+	text := ""
+	if resp != nil {
+		text = resp.Text
+	}
+
+	return scrapeToolPayload{
+		SourceURL:   url,
+		Text:        text,
+		TextPreview: truncateSnippet(text, 600),
+		Metadata:    metadata,
+		CacheStatus: cacheStatus,
+		FetchedAt:   time.Now().UTC().Format(time.RFC3339),
+	}
+}
+
+func stringFromMap(data map[string]any, key string) string {
+	if data == nil {
+		return ""
+	}
+	if val, ok := data[key]; ok {
+		if str, ok := val.(string); ok {
+			return str
+		}
+	}
+	return ""
+}
+
+func firstNonEmpty(values ...string) string {
+	for _, v := range values {
+		if strings.TrimSpace(v) != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+func truncateSnippet(text string, maxLen int) string {
+	trimmed := strings.TrimSpace(text)
+	runes := []rune(trimmed)
+	if len(runes) <= maxLen {
+		return trimmed
+	}
+	return string(runes[:maxLen]) + "…"
+}
+
+func extractMapArgument(args map[string]any, key string) map[string]any {
+	if args == nil {
+		return nil
+	}
+	raw, ok := args[key]
+	if !ok {
+		return nil
+	}
+	if cast, ok := raw.(map[string]any); ok {
+		return cast
+	}
+	return nil
+}
diff --git a/services/mcp-tools/internal/interfaces/httpserver/routes/routes_provider.go b/services/mcp-tools/internal/interfaces/httpserver/routes/routes_provider.go
new file mode 100644
index 00000000..9ee17d3d
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/httpserver/routes/routes_provider.go
@@ -0,0 +1,46 @@
+package routes
+
+import (
+	"github.com/google/wire"
+	"github.com/rs/zerolog/log"
+
+	"jan-server/services/mcp-tools/internal/infrastructure/config"
+	sandboxfusionclient "jan-server/services/mcp-tools/internal/infrastructure/sandboxfusion"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/routes/mcp"
+)
+
+// RoutesProvider provides all route dependencies
+var RoutesProvider = wire.NewSet(
+	mcp.NewSerperMCP,
+	mcp.NewProviderMCP,
+	ProvideSandboxFusionMCP,
+	ProvideMemoryMCP,
+	mcp.NewMCPRoute,
+)
+
+// ProvideSandboxFusionMCP creates a SandboxFusionMCP if configured
+func ProvideSandboxFusionMCP(
+	client *sandboxfusionclient.Client,
+	cfg *config.Config,
+) *mcp.SandboxFusionMCP {
+	if !cfg.EnablePythonExec {
+		log.Warn().Msg("SandboxFusion python_exec tool disabled via config")
+		return nil
+	}
+	if client == nil {
+		return nil
+	}
+	return mcp.NewSandboxFusionMCP(client, cfg.SandboxFusionRequireApproval, cfg.EnablePythonExec)
+}
+
+// ProvideMemoryMCP creates a MemoryMCP if configured
+func ProvideMemoryMCP(cfg *config.Config) *mcp.MemoryMCP {
+	if !cfg.EnableMemoryRetrieve {
+		log.Warn().Msg("memory_retrieve MCP tool disabled via config")
+		return nil
+	}
+	if cfg.MemoryToolsURL == "" {
+		return nil
+	}
+	return mcp.NewMemoryMCP(cfg.MemoryToolsURL, cfg.EnableMemoryRetrieve)
+}
diff --git a/services/mcp-tools/internal/interfaces/interface_provider.go b/services/mcp-tools/internal/interfaces/interface_provider.go
new file mode 100644
index 00000000..b9a9558a
--- /dev/null
+++ b/services/mcp-tools/internal/interfaces/interface_provider.go
@@ -0,0 +1,12 @@
+package interfaces
+
+import (
+	"github.com/google/wire"
+
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver"
+)
+
+// InterfacesProvider provides all interface layer dependencies
+var InterfacesProvider = wire.NewSet(
+	httpserver.NewHTTPServer,
+)
diff --git a/services/mcp-tools/main.go b/services/mcp-tools/main.go
new file mode 100644
index 00000000..a3439f8a
--- /dev/null
+++ b/services/mcp-tools/main.go
@@ -0,0 +1,143 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog/log"
+
+	domainsearch "jan-server/services/mcp-tools/internal/domain/search"
+	"jan-server/services/mcp-tools/internal/infrastructure/auth"
+	"jan-server/services/mcp-tools/internal/infrastructure/config"
+	"jan-server/services/mcp-tools/internal/infrastructure/logger"
+	"jan-server/services/mcp-tools/internal/infrastructure/mcpprovider"
+	sandboxfusionclient "jan-server/services/mcp-tools/internal/infrastructure/sandboxfusion"
+	searchclient "jan-server/services/mcp-tools/internal/infrastructure/search"
+	vectorstoreclient "jan-server/services/mcp-tools/internal/infrastructure/vectorstore"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/middlewares"
+	"jan-server/services/mcp-tools/internal/interfaces/httpserver/routes/mcp"
+)
+
+// @title Jan Server MCP Tools Service
+// @version 1.0
+// @description Model Context Protocol (MCP) tools service providing search and scraping capabilities.
+// @contact.name Jan Server Team
+// @contact.url https://github.com/janhq/jan-server
+// @BasePath /
+
+func main() {
+	// Load configuration
+	cfg, err := config.LoadConfig()
+	if err != nil {
+		panic(fmt.Sprintf("Failed to load config: %v", err))
+	}
+
+	// Initialize logger
+	logger.Init(cfg.LogLevel, cfg.LogFormat)
+	log.Info().
+		Str("http_port", cfg.HTTPPort).
+		Str("log_level", cfg.LogLevel).
+		Msg("Starting MCP Tools service")
+
+	// Initialize infrastructure
+	searchClient := searchclient.NewSearchClient(searchclient.ClientConfig{
+		Engine:        searchclient.Engine(cfg.SearchEngine),
+		SerperAPIKey:  cfg.SerperAPIKey,
+		SearxngURL:    cfg.SearxngURL,
+		DomainFilters: cfg.SerperDomainFilter,
+		LocationHint:  cfg.SerperLocationHint,
+		OfflineMode:   cfg.SerperOfflineMode,
+	})
+	searchService := domainsearch.NewSearchService(searchClient)
+
+	var vectorClient *vectorstoreclient.Client
+	if cfg.VectorStoreURL != "" {
+		vectorClient = vectorstoreclient.NewClient(cfg.VectorStoreURL)
+	}
+	var sandboxMCP *mcp.SandboxFusionMCP
+	switch {
+	case !cfg.EnablePythonExec:
+		log.Warn().Msg("SandboxFusion python_exec tool disabled via config")
+	case cfg.SandboxFusionURL != "":
+		sandboxClient := sandboxfusionclient.NewClient(cfg.SandboxFusionURL)
+		sandboxMCP = mcp.NewSandboxFusionMCP(sandboxClient, cfg.SandboxFusionRequireApproval, cfg.EnablePythonExec)
+	default:
+		log.Warn().Msg("SandboxFusion URL not configured, python_exec tool will not be available")
+	}
+
+	// Load MCP provider configuration
+	providerConfig, err := mcpprovider.LoadConfig("configs/mcp-providers.yml")
+	if err != nil {
+		log.Warn().Err(err).Msg("Failed to load MCP provider config, continuing without external providers")
+		providerConfig = &mcpprovider.Config{} // Empty config
+	}
+
+	// Initialize MCP routes
+	serperMCP := mcp.NewSerperMCP(searchService, vectorClient)
+
+	// Initialize memory MCP
+	var memoryMCP *mcp.MemoryMCP
+	switch {
+	case !cfg.EnableMemoryRetrieve:
+		log.Warn().Msg("memory_retrieve MCP tool disabled via config")
+	case cfg.MemoryToolsURL != "":
+		memoryMCP = mcp.NewMemoryMCP(cfg.MemoryToolsURL, cfg.EnableMemoryRetrieve)
+		log.Info().Str("url", cfg.MemoryToolsURL).Msg("Memory tools integration enabled")
+	default:
+		log.Warn().Msg("Memory tools URL not configured, memory_retrieve tool will not be available")
+	}
+
+	// Initialize external MCP providers
+	ctx := context.Background()
+	providerMCP := mcp.NewProviderMCP(providerConfig)
+	if err := providerMCP.Initialize(ctx); err != nil {
+		log.Error().Err(err).Msg("Failed to initialize MCP providers")
+	}
+
+	mcpRoute := mcp.NewMCPRoute(serperMCP, providerMCP, sandboxMCP, memoryMCP)
+
+	authValidator, err := auth.NewValidator(ctx, cfg, log.Logger)
+	if err != nil {
+		log.Fatal().Err(err).Msg("Failed to initialize auth validator")
+	}
+
+	// Setup HTTP server
+	router := gin.New()
+	router.Use(gin.Recovery())
+	router.Use(middlewares.RequestLogger())
+	router.Use(middlewares.CORS())
+
+	// Apply auth middleware (will skip health checks internally)
+	if authValidator != nil {
+		router.Use(authValidator.Middleware())
+	}
+
+	// Health check endpoints
+	router.GET("/healthz", func(c *gin.Context) {
+		c.JSON(200, gin.H{"status": "ok", "service": "mcp-tools"})
+	})
+
+	router.GET("/readyz", func(c *gin.Context) {
+		c.JSON(200, gin.H{"status": "ready", "service": "mcp-tools"})
+	})
+
+	router.GET("/health/auth", func(c *gin.Context) {
+		if authValidator == nil || authValidator.Ready() {
+			c.JSON(200, gin.H{"status": "ready"})
+			return
+		}
+		c.JSON(http.StatusServiceUnavailable, gin.H{"status": "initializing"})
+	})
+
+	// Register MCP routes
+	v1 := router.Group("/v1")
+	mcpRoute.RegisterRouter(v1) // Start server
+	addr := fmt.Sprintf(":%s", cfg.HTTPPort)
+	log.Info().Str("address", addr).Msg("Server listening")
+
+	if err := router.Run(addr); err != nil {
+		log.Fatal().Err(err).Msg("Failed to start server")
+	}
+}
diff --git a/services/mcp-tools/mcp-providers.md b/services/mcp-tools/mcp-providers.md
new file mode 100644
index 00000000..3f11295a
--- /dev/null
+++ b/services/mcp-tools/mcp-providers.md
@@ -0,0 +1,353 @@
+# MCP Provider Integration
+
+The `mcp-tools` service now supports bridging to external MCP (Model Context Protocol) servers, allowing you to aggregate tools from multiple MCP providers into a single unified endpoint.
+
+## Architecture
+
+```
++-----------------+
+|   AI Model/     |
+|   LLM Client    |
++--------+--------+
+         | MCP Request
+         v
++-------------------------------------+
+|      mcp-tools (Bridge Service)     |
+|  +-------------------------------+  |
+|  |  Internal Tools (Serper)      |  |
+|  +-------------------------------+  |
+|  +-------------------------------+  |
+|  |  External Provider Bridges    |  |
+|  |  - Code Sandbox MCP           |  |
+|  |  - Playwright MCP             |  |
+|  |  - SearXNG (future)           |  |
+|  +-------------------------------+  |
++-------------------------------------+
+         | Proxied MCP Calls
+         v
++--------------------------------------+
+|   External MCP Servers (separate)    |
+|  +--------------------------------+  |
+|  |  code-sandbox-mcp:3000/mcp     |  |
+|  |  (Execute code in sandboxes)   |  |
+|  +--------------------------------+  |
+|  +--------------------------------+  |
+|  |  playwright-mcp:3000/mcp           |  |
+|  |  (Browser automation)          |  |
+|  +--------------------------------+  |
++--------------------------------------+
+```
+
+## Quick Start
+
+### 1. Start the Full MCP Stack
+
+```bash
+# Start all MCP services + mcp-tools bridge
+make mcp-with-tools
+
+# Or start MCP services only (without bridge)
+make mcp-full
+```
+
+This will start:
+- **SearXNG** (http://localhost:8086) - Meta search engine
+- **Vector Store MCP** (http://localhost:3015) - Lightweight embedding service for file search
+- **SandboxFusion** (http://localhost:3010) - Python code interpreter
+- **Code Sandbox MCP** (http://localhost:3002) - Code execution in sandboxes
+- **Playwright MCP** (http://localhost:3003) - Browser automation
+- **mcp-tools Bridge** (http://localhost:8091/v1/mcp) - Unified MCP endpoint
+
+### 2. Query Available Tools
+
+```bash
+# List all tools (internal + external)
+curl -X POST http://localhost:8091/v1/mcp \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "tools/list",
+    "id": 1
+  }'
+```
+
+Expected response includes:
+- `google_search` - Internal Serper tool
+- `scrape` - Internal Serper tool
+- `code-sandbox_*` - Tools from Code Sandbox MCP
+- `playwright_*` - Tools from Playwright MCP
+
+### 3. Call an External Tool
+
+Example: Call a Playwright tool through the bridge
+
+```bash
+curl -X POST http://localhost:8091/v1/mcp \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc": "2.0",
+    "method": "tools/call",
+    "params": {
+      "name": "playwright_screenshot",
+      "arguments": {
+        "url": "https://example.com",
+        "fullPage": true
+      }
+    },
+    "id": 2
+  }'
+```
+
+## Configuration
+
+### MCP Provider Config File
+
+Location: `services/mcp-tools/configs/mcp-providers.yml`
+
+### Environment Variables
+
+Add to your `.env` file:
+
+```env
+# Enable/disable specific MCP providers
+SEARXNG_URL=http://searxng:8080
+
+# Debug logging for MCP providers
+MCP_PROVIDER_DEBUG=false
+```
+
+## Docker Compose Configuration
+
+### Shared Network
+
+Both `docker-compose.yml` and `docker/services-mcp.yml` use the `mcp-network` bridge to enable communication:
+
+```yaml
+# docker/services-mcp.yml
+networks:
+  mcp-network:
+    driver: bridge
+
+# docker-compose.yml
+networks:
+  mcp-network:
+    external: true
+    name: jan-server_mcp-network
+```
+
+### mcp-tools Service
+
+The `mcp-tools` service is configured to:
+1. Mount the config directory: `./services/mcp-tools/configs:/app/configs:ro`
+2. Connect to both default and mcp-network
+3. Reference MCP providers by Docker service names
+
+```yaml
+mcp-tools:
+  build: ./services/mcp-tools
+  environment:
+    CODE_SANDBOX_URL: http://code-sandbox-mcp:3000/mcp
+    PLAYWRIGHT_URL: http://playwright-mcp:3000/mcp
+  networks:
+    - default
+    - mcp-network
+  volumes:
+    - ./services/mcp-tools/configs:/app/configs:ro
+```
+
+## How It Works
+
+### Initialization Flow
+
+1. **mcp-tools starts** -> Loads `configs/mcp-providers.yml`
+2. **For each enabled provider** -> Creates a Bridge instance
+3. **Bridge initialization** -> Sends `initialize` MCP request to provider
+4. **Fetch tool list** -> Calls `tools/list` on each provider
+5. **Register proxy tools** -> Adds prefixed tools (e.g., `playwright_screenshot`) to main MCP server
+
+### Tool Call Flow
+
+1. **Client calls mcp-tools** -> `POST /v1/mcp` with `tools/call` method
+2. **mcp-tools identifies provider** -> Based on tool name prefix
+3. **Forward to provider** -> Bridge sends MCP `tools/call` request to external server
+4. **Provider executes** -> Code Sandbox runs code, Playwright automates browser, etc.
+5. **Return result** -> Bridge forwards response back to client
+
+### Tool Naming Convention
+
+External tools are prefixed with their provider name:
+- `code-sandbox_write_file_sandbox` - Write files into a sandbox workspace
+- `code-sandbox_sandbox_exec` - Execute Python/shell commands inside the sandbox
+- `playwright_navigate` - Navigate browser via Playwright MCP
+- `playwright_screenshot` - Take screenshot via Playwright MCP
+
+Internal tools keep their original names:
+- `google_search` - Serper search (internal)
+- `scrape` - Serper scraper (internal)
+
+## Adding New MCP Providers
+
+### 1. Add Provider to docker/services-mcp.yml
+
+```yaml
+services:
+  my-new-mcp:
+    image: my-org/my-mcp-server:latest
+    ports:
+      - "3004:3000"
+    networks:
+      - mcp-network
+    profiles: ["mcp", "mcp-full"]
+```
+
+### 2. Add Provider Config
+
+Edit `services/mcp-tools/configs/mcp-providers.yml`:
+
+```yaml
+providers:
+  - name: my-new-provider
+    description: Description of what this provider does
+    enabled: ${MY_PROVIDER_ENABLED:-true}
+    endpoint: ${MY_PROVIDER_URL:-http://my-new-mcp:3000}
+    type: mcp-http
+    proxy_mode: true
+    timeout: 30s
+```
+
+### 3. Add Environment Variables
+
+In `docker-compose.yml` under `mcp-tools` service:
+
+```yaml
+environment:
+  MY_PROVIDER_ENABLED: ${MY_PROVIDER_ENABLED:-true}
+  MY_PROVIDER_URL: ${MY_PROVIDER_URL:-http://my-new-mcp:3000}
+```
+
+### 4. Restart Services
+
+```bash
+make mcp-down-all
+make mcp-with-tools
+```
+
+## Makefile Commands
+
+| Command | Description |
+|---------|-------------|
+| `make mcp-full` | Start MCP services only (SearXNG, Code Sandbox, Playwright) |
+| `make mcp-down` | Stop MCP services |
+| `make mcp-with-tools` | Start MCP services + mcp-tools bridge |
+| `make mcp-down-all` | Stop all MCP-related services |
+
+## Troubleshooting
+
+### Check Provider Health
+
+```bash
+# Check if mcp-tools can reach providers
+docker compose logs mcp-tools
+
+# Expected output:
+# "MCP provider initialized successfully" for each provider
+```
+
+### Test Individual Provider
+
+```bash
+# Test Code Sandbox MCP directly
+curl -X POST http://localhost:3002 \
+  -H "Content-Type: application/json" \
+  -d '{"jsonrpc":"2.0","method":"tools/list","id":1}'
+
+# Test Playwright MCP directly
+curl -X POST http://localhost:3003 \
+  -H "Content-Type: application/json" \
+  -d '{"jsonrpc":"2.0","method":"tools/list","id":1}'
+```
+
+### Network Issues
+
+```bash
+# Verify mcp-tools can resolve provider hostnames
+docker exec -it jan-server-mcp-tools-1 ping -c 2 code-sandbox-mcp
+docker exec -it jan-server-mcp-tools-1 ping -c 2 playwright-mcp
+```
+
+### Provider Not Showing Tools
+
+1. Check if provider is enabled in `mcp-providers.yml`
+2. Verify endpoint URL is correct
+3. Check logs: `docker compose logs code-sandbox-mcp`
+4. Ensure network connectivity between services
+
+## MCP Protocol Support
+
+The bridge supports these MCP protocol methods:
+
+| Method | Description | Status |
+|--------|-------------|--------|
+| `initialize` | Initialize MCP session |  Supported |
+| `tools/list` | List available tools |  Supported |
+| `tools/call` | Execute a tool |  Supported |
+| `ping` | Health check |  Supported |
+| `prompts/list` | List prompts | Work TODO |
+| `prompts/call` | Execute prompt | Work TODO |
+| `resources/list` | List resources | Work TODO |
+| `resources/read` | Read resource | Work TODO |
+
+## References
+
+- **MCP Protocol Spec**: https://modelcontextprotocol.io/
+- **Code Sandbox MCP**: https://github.com/philschmid/code-sandbox-mcp
+- **Playwright MCP**: https://github.com/microsoft/playwright-mcp
+- **SearXNG**: https://github.com/searxng/searxng
+- **mcp-go SDK**: https://github.com/mark3labs/mcp-go
+
+## Example: Full Workflow
+
+```bash
+# 1. Start everything
+make mcp-with-tools
+
+# 2. Wait for services to initialize (30-60 seconds)
+sleep 60
+
+# 3. List all available tools
+curl -X POST http://localhost:8091/v1/mcp \
+  -H "Content-Type: application/json" \
+  -d '{"jsonrpc":"2.0","method":"tools/list","id":1}' | jq
+
+# 4. Call a Playwright tool (example)
+curl -X POST http://localhost:8091/v1/mcp \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0",
+    "method":"tools/call",
+    "params":{
+      "name":"playwright_navigate",
+      "arguments":{"url":"https://github.com"}
+    },
+    "id":2
+  }' | jq
+
+# 5. Call a Code Sandbox tool (example)
+curl -X POST http://localhost:8091/v1/mcp \
+  -H "Content-Type: application/json" \
+  -d '{
+    "jsonrpc":"2.0",
+    "method":"tools/call",
+    "params":{
+      "name":"code-sandbox_sandbox_exec",
+      "arguments":{
+        "container_id":"<container id from code-sandbox_sandbox_initialize>",
+        "commands":["python -c \"print('Hello from Code Sandbox!')\""]
+      }
+    },
+    "id":3
+  }' | jq
+
+# 6. Cleanup
+make mcp-down-all
+```
diff --git a/services/mcp-tools/tools.go b/services/mcp-tools/tools.go
new file mode 100644
index 00000000..9e49619b
--- /dev/null
+++ b/services/mcp-tools/tools.go
@@ -0,0 +1,11 @@
+//go:build tools
+// +build tools
+
+package tools
+
+import (
+	_ "github.com/swaggo/swag/cmd/swag"
+)
+
+// This file declares dependencies on build tools.
+// Go modules will download these tools, but they won't be included in the binary.
diff --git a/services/mcp-tools/tools/vector-store-service/Dockerfile b/services/mcp-tools/tools/vector-store-service/Dockerfile
new file mode 100644
index 00000000..dcdc9e1d
--- /dev/null
+++ b/services/mcp-tools/tools/vector-store-service/Dockerfile
@@ -0,0 +1,16 @@
+ARG GO_VERSION=1.25
+
+FROM golang:${GO_VERSION} AS build
+
+WORKDIR /app
+COPY go.mod go.sum ./
+RUN go mod download
+COPY . .
+RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o vector-store-service .
+
+FROM gcr.io/distroless/base-debian12
+WORKDIR /app
+COPY --from=build /app/vector-store-service /app/vector-store-service
+EXPOSE 3015
+ENV VECTOR_STORE_PORT=3015
+ENTRYPOINT ["/app/vector-store-service"]
diff --git a/services/mcp-tools/tools/vector-store-service/README.md b/services/mcp-tools/tools/vector-store-service/README.md
new file mode 100644
index 00000000..28ac715f
--- /dev/null
+++ b/services/mcp-tools/tools/vector-store-service/README.md
@@ -0,0 +1,24 @@
+# Vector Store Service
+
+A lightweight HTTP service that stores document embeddings locally and exposes two endpoints:
+
+- `POST /documents` - index a document with `{ "document_id": "doc-1", "text": "..." }`
+- `POST /query` - run a semantic search with `{ "text": "foo", "top_k": 3 }`
+
+The service keeps the documents in memory, builds a simple normalized bag-of-words embedding, and returns cosine-similarity scores to keep the stack self-contained for MCP automation testing.
+
+## Run locally
+
+```bash
+cd services/mcp-tools/tools/vector-store-service
+go run .
+# Service listens on :3015 by default (override with VECTOR_STORE_PORT)
+```
+
+## Docker build
+
+```bash
+docker build -t vector-store-service .
+docker run -p 3015:3015 vector-store-service
+
+```
diff --git a/services/mcp-tools/tools/vector-store-service/go.mod b/services/mcp-tools/tools/vector-store-service/go.mod
new file mode 100644
index 00000000..b62f1198
--- /dev/null
+++ b/services/mcp-tools/tools/vector-store-service/go.mod
@@ -0,0 +1,35 @@
+module jan-server/services/mcp-tools/tools/vector-store-service
+
+go 1.25.0
+
+require github.com/gin-gonic/gin v1.11.0
+
+require (
+	github.com/bytedance/sonic v1.14.0 // indirect
+	github.com/bytedance/sonic/loader v0.3.0 // indirect
+	github.com/cloudwego/base64x v0.1.6 // indirect
+	github.com/gabriel-vasile/mimetype v1.4.8 // indirect
+	github.com/gin-contrib/sse v1.1.0 // indirect
+	github.com/go-playground/locales v0.14.1 // indirect
+	github.com/go-playground/universal-translator v0.18.1 // indirect
+	github.com/go-playground/validator/v10 v10.27.0 // indirect
+	github.com/goccy/go-json v0.10.2 // indirect
+	github.com/goccy/go-yaml v1.18.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
+	github.com/leodido/go-urn v1.4.0 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.4 // indirect
+	github.com/quic-go/qpack v0.6.0 // indirect
+	github.com/quic-go/quic-go v0.57.0 // indirect
+	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
+	github.com/ugorji/go/codec v1.3.0 // indirect
+	golang.org/x/arch v0.20.0 // indirect
+	golang.org/x/crypto v0.45.0 // indirect
+	golang.org/x/net v0.47.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+	google.golang.org/protobuf v1.36.9 // indirect
+)
diff --git a/services/mcp-tools/tools/vector-store-service/go.sum b/services/mcp-tools/tools/vector-store-service/go.sum
new file mode 100644
index 00000000..b5ac9971
--- /dev/null
+++ b/services/mcp-tools/tools/vector-store-service/go.sum
@@ -0,0 +1,84 @@
+github.com/bytedance/sonic v1.14.0 h1:/OfKt8HFw0kh2rj8N0F6C/qPGRESq0BbaNZgcNXXzQQ=
+github.com/bytedance/sonic v1.14.0/go.mod h1:WoEbx8WTcFJfzCe0hbmyTGrfjt8PzNEBdxlNUO24NhA=
+github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA=
+github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
+github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
+github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
+github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
+github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
+github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM=
+github.com/gin-gonic/gin v1.11.0 h1:OW/6PLjyusp2PPXtyxKHU0RbX6I/l28FTdDlae5ueWk=
+github.com/gin-gonic/gin v1.11.0/go.mod h1:+iq/FyxlGzII0KHiBGjuNn4UNENUlKbGlNmc+W50Dls=
+github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
+github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
+github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
+github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
+github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
+github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
+github.com/go-playground/validator/v10 v10.27.0 h1:w8+XrWVMhGkxOaaowyKH35gFydVHOvC0/uWoy2Fzwn4=
+github.com/go-playground/validator/v10 v10.27.0/go.mod h1:I5QpIEbmr8On7W0TktmJAumgzX4CA1XNl4ZmDuVHKKo=
+github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
+github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
+github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
+github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
+github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
+github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
+github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8=
+github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII=
+github.com/quic-go/quic-go v0.57.0 h1:AsSSrrMs4qI/hLrKlTH/TGQeTMY0ib1pAOX7vA3AdqE=
+github.com/quic-go/quic-go v0.57.0/go.mod h1:ly4QBAjHA2VhdnxhojRsCUOeJwKYg+taDlos92xb1+s=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
+github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
+github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA=
+github.com/ugorji/go/codec v1.3.0/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
+go.uber.org/mock v0.5.2 h1:LbtPTcP8A5k9WPXj54PPPbjcI4Y6lhyOZXn+VS7wNko=
+go.uber.org/mock v0.5.2/go.mod h1:wLlUxC2vVTPTaE3UD51E0BGOAElKrILxhVSDYQLld5o=
+golang.org/x/arch v0.20.0 h1:dx1zTU0MAE98U+TQ8BLl7XsJbgze2WnNKF/8tGp/Q6c=
+golang.org/x/arch v0.20.0/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
+golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
+google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
+google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/services/mcp-tools/tools/vector-store-service/main.go b/services/mcp-tools/tools/vector-store-service/main.go
new file mode 100644
index 00000000..931e5953
--- /dev/null
+++ b/services/mcp-tools/tools/vector-store-service/main.go
@@ -0,0 +1,125 @@
+package main
+
+import (
+	"net/http"
+	"os"
+	"time"
+
+	"jan-server/services/mcp-tools/tools/vector-store-service/store"
+
+	"github.com/gin-gonic/gin"
+)
+
+type config struct {
+	Port string
+}
+
+func loadConfig() config {
+	port := os.Getenv("VECTOR_STORE_PORT")
+	if port == "" {
+		port = "3015"
+	}
+	return config{Port: port}
+}
+
+type indexRequest struct {
+	DocumentID string         `json:"document_id" binding:"required"`
+	Text       string         `json:"text" binding:"required"`
+	Metadata   map[string]any `json:"metadata"`
+	Tags       []string       `json:"tags"`
+}
+
+type queryRequest struct {
+	Text   string   `json:"text" binding:"required"`
+	TopK   int      `json:"top_k"`
+	Filter []string `json:"document_ids"`
+}
+
+func main() {
+	cfg := loadConfig()
+	memStore := store.NewMemoryStore()
+
+	router := gin.Default()
+
+	router.GET("/healthz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "ok"})
+	})
+
+	router.GET("/readyz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "ready"})
+	})
+
+	router.POST("/documents", func(c *gin.Context) {
+		var req indexRequest
+		if err := c.ShouldBindJSON(&req); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+
+		doc := store.Document{
+			ID:        req.DocumentID,
+			Text:      req.Text,
+			Tags:      req.Tags,
+			Metadata:  req.Metadata,
+			Embedding: store.BuildEmbedding(req.Text),
+			CreatedAt: time.Now().UTC(),
+			UpdatedAt: time.Now().UTC(),
+		}
+		memStore.Upsert(doc)
+
+		c.JSON(http.StatusCreated, gin.H{
+			"status":      "indexed",
+			"document_id": doc.ID,
+			"token_count": len(doc.Embedding),
+			"indexed_at":  doc.UpdatedAt.Format(time.RFC3339),
+		})
+	})
+
+	router.POST("/query", func(c *gin.Context) {
+		var req queryRequest
+		if err := c.ShouldBindJSON(&req); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+			return
+		}
+
+		topK := req.TopK
+		if topK <= 0 {
+			topK = 5
+		}
+		if topK > 20 {
+			topK = 20
+		}
+
+		results := memStore.Query(store.BuildEmbedding(req.Text), topK, req.Filter)
+		response := make([]map[string]any, 0, len(results))
+		for _, result := range results {
+			response = append(response, map[string]any{
+				"document_id":  result.Document.ID,
+				"score":        result.Score,
+				"text_preview": previewText(result.Document.Text),
+				"metadata":     result.Document.Metadata,
+				"tags":         result.Document.Tags,
+			})
+		}
+
+		c.JSON(http.StatusOK, gin.H{
+			"query":   req.Text,
+			"top_k":   topK,
+			"count":   len(response),
+			"results": response,
+		})
+	})
+
+	addr := ":" + cfg.Port
+	if err := router.Run(addr); err != nil {
+		panic(err)
+	}
+}
+
+func previewText(text string) string {
+	runes := []rune(text)
+	if len(runes) <= 240 {
+		return text
+	}
+	return string(runes[:240]) + "…"
+}
diff --git a/services/mcp-tools/tools/vector-store-service/store/store.go b/services/mcp-tools/tools/vector-store-service/store/store.go
new file mode 100644
index 00000000..f8e16f69
--- /dev/null
+++ b/services/mcp-tools/tools/vector-store-service/store/store.go
@@ -0,0 +1,156 @@
+package store
+
+import (
+	"math"
+	"regexp"
+	"sort"
+	"strings"
+	"sync"
+	"time"
+)
+
+type Document struct {
+	ID        string
+	Text      string
+	Metadata  map[string]any
+	Tags      []string
+	Embedding map[string]float64
+	CreatedAt time.Time
+	UpdatedAt time.Time
+}
+
+type Result struct {
+	Document Document
+	Score    float64
+}
+
+type MemoryStore struct {
+	mu   sync.RWMutex
+	docs map[string]Document
+}
+
+func NewMemoryStore() *MemoryStore {
+	return &MemoryStore{
+		docs: make(map[string]Document),
+	}
+}
+
+func (s *MemoryStore) Upsert(doc Document) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	now := time.Now().UTC()
+	if existing, ok := s.docs[doc.ID]; ok {
+		if doc.CreatedAt.IsZero() {
+			doc.CreatedAt = existing.CreatedAt
+		}
+	} else if doc.CreatedAt.IsZero() {
+		doc.CreatedAt = now
+	}
+	doc.UpdatedAt = now
+	s.docs[doc.ID] = doc
+}
+
+func (s *MemoryStore) Query(queryEmbedding map[string]float64, topK int, filter []string) []Result {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	if topK <= 0 {
+		topK = 5
+	}
+
+	filterSet := make(map[string]struct{})
+	if len(filter) > 0 {
+		for _, id := range filter {
+			filterSet[id] = struct{}{}
+		}
+	}
+
+	results := make([]Result, 0, len(s.docs))
+	for _, doc := range s.docs {
+		if len(filterSet) > 0 {
+			if _, ok := filterSet[doc.ID]; !ok {
+				continue
+			}
+		}
+		score := cosineSimilarity(queryEmbedding, doc.Embedding)
+		if score <= 0 {
+			continue
+		}
+		results = append(results, Result{
+			Document: doc,
+			Score:    score,
+		})
+	}
+
+	sort.Slice(results, func(i, j int) bool {
+		if results[i].Score == results[j].Score {
+			return results[i].Document.ID < results[j].Document.ID
+		}
+		return results[i].Score > results[j].Score
+	})
+
+	if len(results) > topK {
+		results = results[:topK]
+	}
+
+	return results
+}
+
+var tokenRegex = regexp.MustCompile(`[a-zA-Z0-9]+`)
+
+func BuildEmbedding(text string) map[string]float64 {
+	text = strings.ToLower(text)
+	tokens := tokenRegex.FindAllString(text, -1)
+	if len(tokens) == 0 {
+		return map[string]float64{}
+	}
+
+	freq := make(map[string]float64)
+	for _, token := range tokens {
+		if len(token) < 2 {
+			continue
+		}
+		freq[token]++
+	}
+
+	var norm float64
+	for _, count := range freq {
+		norm += count * count
+	}
+	if norm == 0 {
+		return freq
+	}
+	norm = math.Sqrt(norm)
+	for k, count := range freq {
+		freq[k] = count / norm
+	}
+	return freq
+}
+
+func cosineSimilarity(a, b map[string]float64) float64 {
+	if len(a) == 0 || len(b) == 0 {
+		return 0
+	}
+	var dot float64
+	for token, aval := range a {
+		if bval, ok := b[token]; ok {
+			dot += aval * bval
+		}
+	}
+	if dot == 0 {
+		return 0
+	}
+
+	var normA, normB float64
+	for _, val := range a {
+		normA += val * val
+	}
+	for _, val := range b {
+		normB += val * val
+	}
+	if normA == 0 || normB == 0 {
+		return 0
+	}
+	return dot / (math.Sqrt(normA) * math.Sqrt(normB))
+}
diff --git a/services/mcp-tools/utils/mcp/options.go b/services/mcp-tools/utils/mcp/options.go
new file mode 100644
index 00000000..249cb614
--- /dev/null
+++ b/services/mcp-tools/utils/mcp/options.go
@@ -0,0 +1,90 @@
+package mcp
+
+import (
+	"reflect"
+	"strings"
+
+	mcpgo "github.com/mark3labs/mcp-go/mcp"
+)
+
+// ReflectToMCPOptions converts a struct definition into MCP tool options using
+// reflection metadata. It parses json and jsonschema tags to construct the
+// appropriate argument definitions for the mark3labs MCP server SDK.
+func ReflectToMCPOptions(description string, structValue interface{}) []mcpgo.ToolOption {
+	structType := reflect.TypeOf(structValue)
+	if structType.Kind() == reflect.Ptr {
+		structType = structType.Elem()
+	}
+
+	opts := []mcpgo.ToolOption{
+		mcpgo.WithDescription(description),
+	}
+
+	for i := 0; i < structType.NumField(); i++ {
+		field := structType.Field(i)
+
+		jsonTag := field.Tag.Get("json")
+		if jsonTag == "" || jsonTag == "-" {
+			continue
+		}
+
+		name := strings.Split(jsonTag, ",")[0]
+
+		jsSchema := field.Tag.Get("jsonschema")
+		required := strings.Contains(jsSchema, "required")
+		desc := extractDescription(jsSchema)
+
+		baseType := field.Type
+		if baseType.Kind() == reflect.Ptr {
+			baseType = baseType.Elem()
+		}
+
+		var arg mcpgo.ToolOption
+		switch baseType.Kind() {
+		case reflect.String:
+			if required {
+				arg = mcpgo.WithString(name, mcpgo.Required(), mcpgo.Description(desc))
+			} else {
+				arg = mcpgo.WithString(name, mcpgo.Description(desc))
+			}
+		case reflect.Int:
+			if required {
+				arg = mcpgo.WithNumber(name, mcpgo.Required(), mcpgo.Description(desc))
+			} else {
+				arg = mcpgo.WithNumber(name, mcpgo.Description(desc))
+			}
+		case reflect.Bool:
+			if required {
+				arg = mcpgo.WithBoolean(name, mcpgo.Required(), mcpgo.Description(desc))
+			} else {
+				arg = mcpgo.WithBoolean(name, mcpgo.Description(desc))
+			}
+		case reflect.Slice:
+			if baseType.Elem().Kind() == reflect.String {
+				propertyOpts := []mcpgo.PropertyOption{mcpgo.WithStringItems()}
+				if desc != "" {
+					propertyOpts = append(propertyOpts, mcpgo.Description(desc))
+				}
+				if required {
+					propertyOpts = append(propertyOpts, mcpgo.Required())
+				}
+				arg = mcpgo.WithArray(name, propertyOpts...)
+			}
+		default:
+			continue
+		}
+
+		opts = append(opts, arg)
+	}
+
+	return opts
+}
+
+func extractDescription(tag string) string {
+	for _, part := range strings.Split(tag, ",") {
+		if strings.HasPrefix(part, "description=") {
+			return strings.TrimPrefix(part, "description=")
+		}
+	}
+	return ""
+}
diff --git a/services/mcp-tools/utils/platformerrors/errors.go b/services/mcp-tools/utils/platformerrors/errors.go
new file mode 100644
index 00000000..3b15d9aa
--- /dev/null
+++ b/services/mcp-tools/utils/platformerrors/errors.go
@@ -0,0 +1,206 @@
+package platformerrors
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/rs/zerolog"
+)
+
+// getRequestIDFromContext extracts request ID from context
+func getRequestIDFromContext(ctx context.Context) string {
+	val := ctx.Value("requestID")
+	if requestID, ok := val.(string); ok {
+		return requestID
+	}
+	return ""
+}
+
+// ErrorType represents the category of error
+type ErrorType string
+
+const (
+	ErrorTypeNotFound       ErrorType = "NOT_FOUND"
+	ErrorTypeTooManyRecords ErrorType = "TOO_MANY_RECORDS"
+	ErrorTypeValidation     ErrorType = "VALIDATION"
+	ErrorTypeConflict       ErrorType = "CONFLICT"
+	ErrorTypeUnauthorized   ErrorType = "UNAUTHORIZED"
+	ErrorTypeForbidden      ErrorType = "FORBIDDEN"
+	ErrorTypeInternal       ErrorType = "INTERNAL"
+	ErrorTypeExternal       ErrorType = "EXTERNAL"
+	ErrorTypeDatabaseError  ErrorType = "DATABASE_ERROR"
+	ErrorTypeNotImplemented ErrorType = "NOT_IMPLEMENTED"
+)
+
+// Layer represents the application layer where the error occurred
+type Layer string
+
+const (
+	LayerRepository     Layer = "repository"
+	LayerDomain         Layer = "domain"
+	LayerHandler        Layer = "handler"
+	LayerRoute          Layer = "route"
+	LayerInfrastructure Layer = "infrastructure"
+	LayerCommon         Layer = "common"
+)
+
+// PlatformError represents an error with context and metadata
+type PlatformError struct {
+	UUID      string
+	Type      ErrorType
+	Message   string
+	Err       error
+	Context   map[string]any
+	RequestID string
+	Layer     Layer
+	Timestamp time.Time
+}
+
+// Error implements the error interface
+func (e *PlatformError) Error() string {
+	if e.Err != nil {
+		return fmt.Sprintf("[%s][%s][%s] %s: %v", e.Layer, e.Type, e.UUID, e.Message, e.Err)
+	}
+	return fmt.Sprintf("[%s][%s][%s] %s", e.Layer, e.Type, e.UUID, e.Message)
+}
+
+// Unwrap returns the underlying error
+func (e *PlatformError) Unwrap() error {
+	return e.Err
+}
+
+// GetErrorType returns the error type
+func (e *PlatformError) GetErrorType() ErrorType {
+	return e.Type
+}
+
+// GetRequestID returns the request ID
+func (e *PlatformError) GetRequestID() string {
+	return e.RequestID
+}
+
+// GetUUID returns the error UUID
+func (e *PlatformError) GetUUID() string {
+	return e.UUID
+}
+
+// NewError creates a new PlatformError with the specified parameters
+func NewError(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string) *PlatformError {
+	return NewErrorWithContext(ctx, layer, errorType, message, err, customUUID, nil)
+}
+
+// NewErrorWithContext creates a new PlatformError with additional context fields
+func NewErrorWithContext(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string, contextFields map[string]any) *PlatformError {
+	requestID := getRequestIDFromContext(ctx)
+
+	errorUUID := customUUID
+	if errorUUID == "" {
+		errorUUID = "auto-generated-uuid"
+	}
+
+	errorContext := make(map[string]any)
+	for k, v := range contextFields {
+		errorContext[k] = v
+	}
+
+	platformError := &PlatformError{
+		UUID:      errorUUID,
+		Type:      errorType,
+		Message:   message,
+		Err:       err,
+		RequestID: requestID,
+		Layer:     layer,
+		Timestamp: time.Now().UTC(),
+		Context:   errorContext,
+	}
+
+	return platformError
+}
+
+// AsError wraps an error with layer context
+func AsError(ctx context.Context, layer Layer, err error, message string) *PlatformError {
+	if err == nil {
+		return nil
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return NewError(ctx, layer, platformErr.Type, fmt.Sprintf("%s: %s", message, platformErr.Message), platformErr, platformErr.UUID)
+	}
+
+	errorType := ErrorTypeInternal
+
+	return NewError(ctx, layer, errorType, message, err, "")
+}
+
+// ErrorTypeToHTTPStatus maps error types to HTTP status codes
+func ErrorTypeToHTTPStatus(errorType ErrorType) int {
+	switch errorType {
+	case ErrorTypeNotFound:
+		return http.StatusNotFound
+	case ErrorTypeValidation:
+		return http.StatusBadRequest
+	case ErrorTypeConflict:
+		return http.StatusConflict
+	case ErrorTypeUnauthorized:
+		return http.StatusUnauthorized
+	case ErrorTypeForbidden:
+		return http.StatusForbidden
+	case ErrorTypeNotImplemented:
+		return http.StatusNotImplemented
+	case ErrorTypeTooManyRecords:
+		return http.StatusInternalServerError
+	case ErrorTypeDatabaseError:
+		return http.StatusInternalServerError
+	case ErrorTypeExternal:
+		return http.StatusBadGateway
+	case ErrorTypeInternal:
+		fallthrough
+	default:
+		return http.StatusInternalServerError
+	}
+}
+
+// IsErrorType checks if an error is a PlatformError with the specified type
+func IsErrorType(err error, errorType ErrorType) bool {
+	if err == nil {
+		return false
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return platformErr.Type == errorType
+	}
+
+	return false
+}
+
+// LogError logs a platform error with proper structure
+func LogError(logger zerolog.Logger, err *PlatformError) {
+	if err == nil {
+		return
+	}
+
+	event := logger.Error().
+		Str("error_uuid", err.UUID).
+		Str("error_type", string(err.Type)).
+		Str("layer", string(err.Layer)).
+		Time("timestamp_utc", err.Timestamp)
+
+	if err.RequestID != "" {
+		event = event.Str("request_id", err.RequestID)
+	}
+
+	for k, v := range err.Context {
+		event = event.Interface(k, v)
+	}
+
+	if err.Err != nil {
+		event = event.Err(err.Err)
+	}
+
+	event.Msg(err.Message)
+}
diff --git a/services/media-api/Dockerfile b/services/media-api/Dockerfile
new file mode 100644
index 00000000..2c64685a
--- /dev/null
+++ b/services/media-api/Dockerfile
@@ -0,0 +1,19 @@
+ARG GO_VERSION=1.25
+
+FROM golang:${GO_VERSION} as build
+WORKDIR /src
+COPY go.mod go.sum ./
+RUN go mod download
+COPY . .
+RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/media-api ./cmd/server
+
+FROM debian:bookworm-slim
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ca-certificates curl && \
+    rm -rf /var/lib/apt/lists/*
+RUN useradd --system --home /app --no-create-home --uid 10002 media
+WORKDIR /app
+COPY --from=build /out/media-api /app/media-api
+EXPOSE 8285
+USER media
+ENTRYPOINT ["/app/media-api"]
diff --git a/services/media-api/Makefile b/services/media-api/Makefile
new file mode 100644
index 00000000..5134add2
--- /dev/null
+++ b/services/media-api/Makefile
@@ -0,0 +1,28 @@
+SERVICE := media-api
+BIN := bin/$(SERVICE)
+
+.PHONY: run build test tidy wire swagger clean
+
+run:
+	@echo "Starting $(SERVICE)..."
+	go run ./cmd/server
+
+build:
+	go build -o $(BIN) ./cmd/server
+
+test:
+	go test ./...
+
+tidy:
+	go mod tidy
+
+wire:
+	@if ! command -v wire >/dev/null 2>&1; then echo "wire not installed (go install github.com/google/wire/cmd/wire@latest)"; exit 1; fi
+	wire ./cmd/server
+
+swagger:
+	@if ! command -v swag >/dev/null 2>&1; then echo "swag CLI not installed (go install github.com/swaggo/swag/cmd/swag@latest)"; exit 1; fi
+	swag init -g cmd/server/server.go -o docs/swagger
+
+clean:
+	rm -rf $(BIN)
diff --git a/services/media-api/README.md b/services/media-api/README.md
new file mode 100644
index 00000000..df3a6f24
--- /dev/null
+++ b/services/media-api/README.md
@@ -0,0 +1,184 @@
+# media-api
+
+`media-api` is the dedicated ingestion and resolution service for binary assets used by Jan Server. It accepts data URLs or remote URLs, pushes bytes to private S3-compatible storage, records metadata in Postgres, and returns short `jan_*` identifiers with presigned URLs for immediate access.
+
+## Highlights
+
+- Environment-driven config (`internal/config`) tailored for Menlo's S3 endpoint (`https://s3.menlo.ai`) and `platform-dev` bucket.
+- PostgreSQL metadata store with schema managed by GORM.
+- Automatic creation of the target database when using `postgres://` URLs.
+- Keycloak JWT-protected routes enforced at the edge with optional observability hooks.
+- Shared `utils/mediaid` package for consistent `jan_*` identifiers across services.
+- Returns presigned URLs immediately upon upload for instant client access.
+
+> All HTTP requests must include an `Authorization: Bearer <token>` header issued by Keycloak (guest tokens work for read flows; service workloads should use client credentials).
+
+## Usage Flow
+
+### Method 1: Direct Upload via API (Server-Proxied)
+
+Client uploads an image directly through the media-api (via data URL or remote URL) and receives:
+
+- `jan_id` - Persistent identifier for the media
+- `presigned_url` - Short-lived URL for immediate access (default 5 min TTL)
+
+```bash
+curl -X POST http://localhost:8285/v1/media \
+  -H "Authorization: Bearer <token>" \
+  -H "Content-Type: application/json" \
+  -d '{"source":{"type":"remote_url","url":"https://placekitten.com/512/512"},"user_id":"user123"}'
+
+# Response:
+# {
+#   "id": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+#   "mime": "image/jpeg",
+#   "bytes": 45678,
+#   "deduped": false,
+#   "presigned_url": "https://s3.menlo.ai/platform-dev/images/jan_...?signature=..."
+# }
+```
+
+**Use Case**: Simple uploads, remote URLs, or when client doesn't want to handle S3 directly.
+
+---
+
+### Method 2: Client-Side Direct Upload (Presigned URL)
+
+Client requests a presigned upload URL, uploads directly to S3, then uses the `jan_id`:
+
+#### Step 1: Request Presigned Upload URL
+
+```bash
+curl -X POST http://localhost:8285/v1/media/prepare-upload \
+  -H "Authorization: Bearer <token>" \
+  -H "Content-Type: application/json" \
+  -d '{"mime_type":"image/jpeg","user_id":"user123"}'
+
+# Response:
+# {
+#   "id": "jan_01hqr8v9k2x3f4g5h6j7k8m9n0",
+#   "upload_url": "https://s3.menlo.ai/platform-dev/images/jan_...?X-Amz-Signature=...",
+#   "mime_type": "image/jpeg",
+#   "expires_in": 300
+# }
+```
+
+#### Step 2: Client Uploads Directly to S3
+
+```bash
+curl -X PUT "https://s3.menlo.ai/platform-dev/images/jan_...?X-Amz-Signature=..." \
+  -H "Content-Type: image/jpeg" \
+  --data-binary @my-image.jpg
+```
+
+#### Step 3: Use jan_id in Completions
+
+Client immediately uses the `jan_id` without waiting for server confirmation.
+
+**Use Case**: Large files, faster uploads (bypass API), better for mobile/web apps.
+
+---
+
+### Using jan_id in LLM Completion Payload
+
+Client injects the `jan_id` into the completion request using the format `data:image/<mime>;jan_<id>`:
+
+```json
+{
+  "model": "gpt-4-vision",
+  "messages": [
+    {
+      "role": "user",
+      "content": [
+        {"type": "text", "text": "What's in this image?"},
+        {
+          "type": "image_url",
+          "image_url": {
+            "url": "data:image/jpeg;jan_01hqr8v9k2x3f4g5h6j7k8m9n0"
+          }
+        }
+      ]
+    }
+  ]
+}
+```
+
+---
+
+### Backend Resolves jan_id to Fresh Presigned URL
+
+Before forwarding to the LLM provider, the backend calls `/v1/media/resolve` to replace `jan_*` placeholders with fresh presigned URLs:
+
+```bash
+curl -X POST http://localhost:8285/v1/media/resolve \
+  -H "Authorization: Bearer <token>" \
+  -H "Content-Type: application/json" \
+  -d '{"payload":{"messages":[{"content":[{"type":"image_url","image_url":{"url":"data:image/jpeg;jan_01hqr8v9k2x3f4g5h6j7k8m9n0"}}]}]}}'
+
+# Response:
+# {
+#   "payload": {
+#     "messages": [{
+#       "content": [{
+#         "type": "image_url",
+#         "image_url": {
+#           "url": "https://s3.menlo.ai/platform-dev/images/jan_...?signature=NEW_FRESH_SIG"
+#         }
+#       }]
+#     }]
+#   }
+# }
+```
+
+## Environment variables
+
+Populate the repo-level `.env` (via `make env-create`) and tweak the following keys:
+
+| Variable | Description |
+| --- | --- |
+| `MEDIA_API_PORT` | HTTP listen port (default `8285`). |
+| `MEDIA_DATABASE_URL` | Postgres DSN for metadata. |
+| `MEDIA_S3_ENDPOINT` | S3-compatible endpoint (`https://s3.menlo.ai`). |
+| `MEDIA_S3_PUBLIC_ENDPOINT` | Optional public endpoint used when returning presigned URLs (e.g., `http://localhost:9000`). |
+| `MEDIA_S3_ACCESS_KEY_ID` / `MEDIA_S3_SECRET_ACCESS_KEY` | Credentials (`XXXXX` / `YYYY`). |
+| `MEDIA_S3_BUCKET` | Target bucket (`platform-dev`). |
+| `MEDIA_MAX_BYTES` | Max upload size (default 20 MB). |
+| `MEDIA_S3_PRESIGN_TTL` | Lifespan of presigned URLs (default 5 min). |
+| `MEDIA_RETENTION_DAYS` | Metadata retention window. |
+| `AUTH_ENABLED` | Set to `true` to enforce Keycloak-issued JWTs (required in shared environments). |
+| `AUTH_ISSUER` | Expected Keycloak issuer claim (e.g., `http://localhost:8085/realms/jan`). |
+| `ACCOUNT` | Audience or client ID the token is minted for (e.g., `account`). |
+| `AUTH_JWKS_URL` | JWKS endpoint used to validate signatures (e.g., `http://keycloak:8085/realms/jan/protocol/openid-connect/certs`). |
+
+> If the S3 bucket or credentials are omitted the service still starts, but media upload/resolve endpoints will respond with `media storage backend is not configured` until valid `MEDIA_S3_*` values are provided.
+
+All env samples already contain the provided Menlo dev bucket configuration.
+
+## Quick start
+
+```bash
+cd services/media-api
+make run
+curl -H "Authorization: Bearer <token>" \
+  http://localhost:8285/healthz
+```
+
+## API surface
+
+| Method & Path | Description |
+| --- | --- |
+| `POST /v1/media` | **Method 1**: Ingests data URL or remote URL, stores bytes privately, returns `{id, mime, bytes, deduped, presigned_url}`. |
+| `POST /v1/media/prepare-upload` | **Method 2**: Generates presigned upload URL and reserves `jan_id`. Client uploads directly to S3. |
+| `POST /v1/media/resolve` | Replaces `data:<mime>;jan_<id>` placeholders in arbitrary JSON with fresh presigned URLs. |
+| `GET /v1/media/{id}` | Streams media bytes through the API or returns presigned URL (see `PROXY_DOWNLOAD` config). |
+
+See `docs/swagger/swagger.yaml` for the full OpenAPI schema (regenerate with `make swagger`).
+
+## Development scripts
+
+- `make run` - start the service locally (loads `.env`).
+- `make wire` - regenerate dependency injection graph after wiring changes.
+- `make swagger` - refresh OpenAPI docs after editing handler annotations.
+- `make tidy` - clean up go.mod / go.sum.
+
+Need to integrate with `llm-api`? Wire the media client, dual-write uploads, resolve before calling the LLM, then enforce `jan_*` identifiers everywhere (see `docs/services.md` for the full flow).
diff --git a/services/media-api/cmd/server/server.go b/services/media-api/cmd/server/server.go
new file mode 100644
index 00000000..4435ad81
--- /dev/null
+++ b/services/media-api/cmd/server/server.go
@@ -0,0 +1,120 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"github.com/joho/godotenv"
+	"github.com/rs/zerolog"
+	gormlogger "gorm.io/gorm/logger"
+
+	"jan-server/services/media-api/internal/config"
+	domain "jan-server/services/media-api/internal/domain/media"
+	"jan-server/services/media-api/internal/infrastructure/auth"
+	"jan-server/services/media-api/internal/infrastructure/database"
+	"jan-server/services/media-api/internal/infrastructure/logger"
+	"jan-server/services/media-api/internal/infrastructure/observability"
+	repo "jan-server/services/media-api/internal/infrastructure/repository/media"
+	"jan-server/services/media-api/internal/infrastructure/storage"
+	"jan-server/services/media-api/internal/interfaces/httpserver"
+)
+
+// @title Media API
+// @version 1.0
+// @description Secure media ingestion and resolution service
+// @BasePath /
+// @securityDefinitions.apikey ApiKeyAuth
+// @in header
+// @name X-Media-Service-Key
+type Application struct {
+	httpServer *httpserver.HTTPServer
+	log        zerolog.Logger
+}
+
+func NewApplication(httpServer *httpserver.HTTPServer, log zerolog.Logger) *Application {
+	return &Application{
+		httpServer: httpServer,
+		log:        log,
+	}
+}
+
+func (a *Application) Start(ctx context.Context) error {
+	return a.httpServer.Run(ctx)
+}
+
+func main() {
+	loadEnvFiles()
+
+	cfg, err := config.Load()
+	if err != nil {
+		panic(err)
+	}
+
+	log := logger.New(cfg)
+
+	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
+	defer stop()
+
+	shutdownTelemetry, err := observability.Setup(ctx, cfg, log)
+	if err != nil {
+		log.Fatal().Err(err).Msg("initialize observability")
+	}
+	defer func() {
+		shutdownCtx, cancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout)
+		defer cancel()
+		if err := shutdownTelemetry(shutdownCtx); err != nil {
+			log.Error().Err(err).Msg("shutdown telemetry")
+		}
+	}()
+
+	db, err := database.Connect(database.Config{
+		DSN:             cfg.GetDatabaseWriteDSN(),
+		MaxIdleConns:    cfg.DBMaxIdleConns,
+		MaxOpenConns:    cfg.DBMaxOpenConns,
+		ConnMaxLifetime: cfg.DBConnLifetime,
+		LogLevel:        gormlogger.Warn,
+	})
+	if err != nil {
+		log.Fatal().Err(err).Msg("connect database")
+	}
+
+	if err := database.AutoMigrate(ctx, db, log); err != nil {
+		log.Fatal().Err(err).Msg("migrate database")
+	}
+
+	storageClient, err := storage.NewS3Storage(ctx, cfg, log)
+	if err != nil {
+		log.Fatal().Err(err).Msg("initialize storage")
+	}
+
+	mediaRepository := repo.NewRepository(db)
+	mediaService := domain.NewService(cfg, mediaRepository, storageClient, log)
+
+	authValidator, err := auth.NewValidator(ctx, cfg, log)
+	if err != nil {
+		log.Fatal().Err(err).Msg("failed to initialize auth validator")
+	}
+
+	httpServer := httpserver.New(cfg, log, mediaService, authValidator)
+	app := NewApplication(httpServer, log)
+
+	if err := app.Start(ctx); err != nil {
+		log.Fatal().Err(err).Msg("application stopped with error")
+	}
+
+	log.Info().Msg("application exited cleanly")
+}
+
+func loadEnvFiles() {
+	paths := []string{".env", "../.env"}
+	for _, path := range paths {
+		if _, err := os.Stat(path); err == nil {
+			if err := godotenv.Overload(path); err != nil {
+				fmt.Fprintf(os.Stderr, "warning: failed to load %s: %v\n", path, err)
+			}
+		}
+	}
+}
diff --git a/services/media-api/cmd/server/wire.go b/services/media-api/cmd/server/wire.go
new file mode 100644
index 00000000..11d2d06c
--- /dev/null
+++ b/services/media-api/cmd/server/wire.go
@@ -0,0 +1,81 @@
+//go:build wireinject
+
+package main
+
+import (
+	"context"
+
+	"github.com/google/wire"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+	gormlogger "gorm.io/gorm/logger"
+
+	"jan-server/services/media-api/internal/config"
+	domain "jan-server/services/media-api/internal/domain/media"
+	"jan-server/services/media-api/internal/infrastructure/auth"
+	"jan-server/services/media-api/internal/infrastructure/database"
+	"jan-server/services/media-api/internal/infrastructure/logger"
+	repo "jan-server/services/media-api/internal/infrastructure/repository/media"
+	"jan-server/services/media-api/internal/interfaces/httpserver"
+)
+
+var mediaSet = wire.NewSet(
+	repo.NewRepository,
+	wire.Bind(new(domain.Repository), new(*repo.Repository)),
+	provideStorage,
+	domain.NewService,
+)
+
+// BuildApplication assembles the media API with Wire.
+func BuildApplication(ctx context.Context) (*Application, error) {
+	wire.Build(
+		config.Load,
+		logger.New,
+		auth.NewValidator,
+		newDatabaseConfig,
+		newGormDB,
+		mediaSet,
+		httpserver.New,
+		NewApplication,
+	)
+	return nil, nil
+}
+
+func newDatabaseConfig(cfg *config.Config) database.Config {
+	return database.Config{
+		DSN:             cfg.GetDatabaseWriteDSN(),
+		MaxIdleConns:    cfg.DBMaxIdleConns,
+		MaxOpenConns:    cfg.DBMaxOpenConns,
+		ConnMaxLifetime: cfg.DBConnLifetime,
+		LogLevel:        gormlogger.Warn,
+	}
+}
+
+func newGormDB(ctx context.Context, cfg database.Config, log zerolog.Logger) (*gorm.DB, error) {
+	db, err := database.Connect(cfg)
+	if err != nil {
+		return nil, err
+	}
+	if err := database.AutoMigrate(ctx, db, log); err != nil {
+		return nil, err
+	}
+	return db, nil
+}
+
+// provideStorage creates the appropriate storage backend based on configuration.
+func provideStorage(ctx context.Context, cfg *config.Config, log zerolog.Logger) (domain.Storage, error) {
+	if cfg.IsLocalStorage() {
+		localStorage, err := storage.NewLocalStorage(cfg, log)
+		if err != nil {
+			return nil, err
+		}
+		return localStorage, nil
+	}
+
+	// Default to S3 storage
+	s3Storage, err := storage.NewS3Storage(ctx, cfg, log)
+	if err != nil {
+		return nil, err
+	}
+	return s3Storage, nil
+}
diff --git a/services/media-api/cmd/server/wire_gen.go b/services/media-api/cmd/server/wire_gen.go
new file mode 100644
index 00000000..d15d55a3
--- /dev/null
+++ b/services/media-api/cmd/server/wire_gen.go
@@ -0,0 +1,78 @@
+// Code generated by Wire. DO NOT EDIT.
+
+//go:generate go run -mod=mod github.com/google/wire/cmd/wire
+//go:build !wireinject
+// +build !wireinject
+
+package main
+
+import (
+	"context"
+	"jan-server/services/media-api/internal/config"
+	media2 "jan-server/services/media-api/internal/domain/media"
+	"jan-server/services/media-api/internal/infrastructure/auth"
+	"jan-server/services/media-api/internal/infrastructure/database"
+	"jan-server/services/media-api/internal/infrastructure/logger"
+	"jan-server/services/media-api/internal/infrastructure/repository/media"
+	"jan-server/services/media-api/internal/infrastructure/storage"
+	"jan-server/services/media-api/internal/interfaces/httpserver"
+
+	"github.com/google/wire"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+	logger2 "gorm.io/gorm/logger"
+)
+
+// Injectors from wire.go:
+
+// BuildApplication assembles the media API with Wire.
+func BuildApplication(ctx context.Context) (*Application, error) {
+	configConfig, err := config.Load()
+	if err != nil {
+		return nil, err
+	}
+	zerologLogger := logger.New(configConfig)
+	databaseConfig := newDatabaseConfig(configConfig)
+	db, err := newGormDB(ctx, databaseConfig, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	repository := media.NewRepository(db)
+	s3Storage, err := storage.NewS3Storage(ctx, configConfig, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	service := media2.NewService(configConfig, repository, s3Storage, zerologLogger)
+	validator, err := auth.NewValidator(ctx, configConfig, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	httpServer := httpserver.New(configConfig, zerologLogger, service, validator)
+	application := NewApplication(httpServer, zerologLogger)
+	return application, nil
+}
+
+// wire.go:
+
+var mediaSet = wire.NewSet(media.NewRepository, wire.Bind(new(media2.Repository), new(*media.Repository)), storage.NewS3Storage, wire.Bind(new(media2.Storage), new(*storage.S3Storage)), media2.NewService)
+
+func newDatabaseConfig(cfg *config.Config) database.Config {
+	return database.Config{
+		DSN:             cfg.GetDatabaseWriteDSN(),
+		MaxIdleConns:    cfg.DBMaxIdleConns,
+		MaxOpenConns:    cfg.DBMaxOpenConns,
+		ConnMaxLifetime: cfg.DBConnLifetime,
+		LogLevel:        logger2.Warn,
+	}
+}
+
+func newGormDB(ctx context.Context, cfg database.Config, log zerolog.Logger) (*gorm.DB, error) {
+	db, err := database.Connect(cfg)
+	if err != nil {
+		return nil, err
+	}
+	if err := database.AutoMigrate(ctx, db, log); err != nil {
+		return nil, err
+	}
+	return db, nil
+}
diff --git a/services/media-api/doc.go b/services/media-api/doc.go
new file mode 100644
index 00000000..a63b7e83
--- /dev/null
+++ b/services/media-api/doc.go
@@ -0,0 +1,2 @@
+// Package templateapi provides module-level docs to satisfy tooling.
+package templateapi
diff --git a/services/media-api/docs/swagger/docs.go b/services/media-api/docs/swagger/docs.go
new file mode 100644
index 00000000..b21fcb16
--- /dev/null
+++ b/services/media-api/docs/swagger/docs.go
@@ -0,0 +1,445 @@
+// Code generated by swaggo/swag. DO NOT EDIT.
+
+package swagger
+
+import "github.com/swaggo/swag"
+
+const docTemplate = `{
+    "schemes": {{ marshal .Schemes }},
+    "swagger": "2.0",
+    "info": {
+        "description": "{{escape .Description}}",
+        "title": "{{.Title}}",
+        "contact": {},
+        "version": "{{.Version}}"
+    },
+    "host": "{{.Host}}",
+    "basePath": "{{.BasePath}}",
+    "paths": {
+        "/v1/media": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Accepts data URLs or remote URLs and stores content privately.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Upload media",
+                "parameters": [
+                    {
+                        "description": "Media request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/media.IngestRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/handlers.ingestResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/prepare-upload": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Generates a presigned upload URL and reserves a jan_id. Client uploads directly to S3 using the URL. Not available for local storage.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Request presigned upload URL",
+                "parameters": [
+                    {
+                        "description": "Upload preparation request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/media.PrepareUploadRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/media.UploadPreparation"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "501": {
+                        "description": "Not Implemented",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/resolve": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Replaces pseudo data URLs with short-lived signed URLs.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Resolve jan_* placeholders",
+                "parameters": [
+                    {
+                        "description": "Payload to resolve",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/handlers.resolveRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/handlers.resolveResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/upload": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Accepts multipart file upload for local storage. Alternative to presigned uploads.",
+                "consumes": [
+                    "multipart/form-data"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Direct file upload",
+                "parameters": [
+                    {
+                        "type": "file",
+                        "description": "File to upload",
+                        "name": "file",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "User ID",
+                        "name": "user_id",
+                        "in": "formData"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/handlers.ingestResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/{id}": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Streams the object through the media API without exposing storage URLs.",
+                "produces": [
+                    "application/octet-stream"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Stream media bytes",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Media ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "binary data"
+                    },
+                    "404": {
+                        "description": "Not Found",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/{id}/presign": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns a temporary signed URL for downloading media by jan_id.",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Get presigned download URL",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Media ID (jan_xxx)",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "404": {
+                        "description": "Not Found",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "handlers.ingestResponse": {
+            "type": "object",
+            "properties": {
+                "bytes": {
+                    "type": "integer"
+                },
+                "deduped": {
+                    "type": "boolean"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "mime": {
+                    "type": "string"
+                },
+                "presigned_url": {
+                    "type": "string"
+                }
+            }
+        },
+        "handlers.resolveRequest": {
+            "type": "object",
+            "required": [
+                "payload"
+            ],
+            "properties": {
+                "payload": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
+                    }
+                }
+            }
+        },
+        "handlers.resolveResponse": {
+            "type": "object",
+            "properties": {
+                "payload": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
+                    }
+                }
+            }
+        },
+        "media.IngestRequest": {
+            "type": "object",
+            "required": [
+                "source"
+            ],
+            "properties": {
+                "filename": {
+                    "type": "string"
+                },
+                "source": {
+                    "$ref": "#/definitions/media.Source"
+                },
+                "user_id": {
+                    "type": "string"
+                }
+            }
+        },
+        "media.PrepareUploadRequest": {
+            "type": "object",
+            "required": [
+                "mime_type"
+            ],
+            "properties": {
+                "mime_type": {
+                    "type": "string"
+                },
+                "user_id": {
+                    "type": "string"
+                }
+            }
+        },
+        "media.Source": {
+            "type": "object",
+            "required": [
+                "type"
+            ],
+            "properties": {
+                "data_url": {
+                    "type": "string"
+                },
+                "type": {
+                    "type": "string"
+                },
+                "url": {
+                    "type": "string"
+                }
+            }
+        },
+        "media.UploadPreparation": {
+            "type": "object",
+            "properties": {
+                "expires_in": {
+                    "description": "seconds",
+                    "type": "integer"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "mime_type": {
+                    "type": "string"
+                },
+                "upload_url": {
+                    "type": "string"
+                }
+            }
+        }
+    },
+    "securityDefinitions": {
+        "ApiKeyAuth": {
+            "type": "apiKey",
+            "name": "X-Media-Service-Key",
+            "in": "header"
+        }
+    }
+}`
+
+// SwaggerInfo holds exported Swagger Info so clients can modify it
+var SwaggerInfo = &swag.Spec{
+	Version:          "1.0",
+	Host:             "",
+	BasePath:         "/",
+	Schemes:          []string{},
+	Title:            "Media API",
+	Description:      "Secure media ingestion and resolution service",
+	InfoInstanceName: "swagger",
+	SwaggerTemplate:  docTemplate,
+}
+
+func init() {
+	swag.Register(SwaggerInfo.InstanceName(), SwaggerInfo)
+}
diff --git a/services/media-api/docs/swagger/swagger.json b/services/media-api/docs/swagger/swagger.json
new file mode 100644
index 00000000..108299ff
--- /dev/null
+++ b/services/media-api/docs/swagger/swagger.json
@@ -0,0 +1,421 @@
+{
+    "swagger": "2.0",
+    "info": {
+        "description": "Secure media ingestion and resolution service",
+        "title": "Media API",
+        "contact": {},
+        "version": "1.0"
+    },
+    "basePath": "/",
+    "paths": {
+        "/v1/media": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Accepts data URLs or remote URLs and stores content privately.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Upload media",
+                "parameters": [
+                    {
+                        "description": "Media request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/media.IngestRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/handlers.ingestResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/prepare-upload": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Generates a presigned upload URL and reserves a jan_id. Client uploads directly to S3 using the URL. Not available for local storage.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Request presigned upload URL",
+                "parameters": [
+                    {
+                        "description": "Upload preparation request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/media.PrepareUploadRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/media.UploadPreparation"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    },
+                    "501": {
+                        "description": "Not Implemented",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/resolve": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Replaces pseudo data URLs with short-lived signed URLs.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Resolve jan_* placeholders",
+                "parameters": [
+                    {
+                        "description": "Payload to resolve",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/handlers.resolveRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/handlers.resolveResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/upload": {
+            "post": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Accepts multipart file upload for local storage. Alternative to presigned uploads.",
+                "consumes": [
+                    "multipart/form-data"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Direct file upload",
+                "parameters": [
+                    {
+                        "type": "file",
+                        "description": "File to upload",
+                        "name": "file",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "User ID",
+                        "name": "user_id",
+                        "in": "formData"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/handlers.ingestResponse"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/{id}": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Streams the object through the media API without exposing storage URLs.",
+                "produces": [
+                    "application/octet-stream"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Stream media bytes",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Media ID",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "binary data"
+                    },
+                    "404": {
+                        "description": "Not Found",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/media/{id}/presign": {
+            "get": {
+                "security": [
+                    {
+                        "ApiKeyAuth": []
+                    }
+                ],
+                "description": "Returns a temporary signed URL for downloading media by jan_id.",
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "media"
+                ],
+                "summary": "Get presigned download URL",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Media ID (jan_xxx)",
+                        "name": "id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": true
+                        }
+                    },
+                    "404": {
+                        "description": "Not Found",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "handlers.ingestResponse": {
+            "type": "object",
+            "properties": {
+                "bytes": {
+                    "type": "integer"
+                },
+                "deduped": {
+                    "type": "boolean"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "mime": {
+                    "type": "string"
+                },
+                "presigned_url": {
+                    "type": "string"
+                }
+            }
+        },
+        "handlers.resolveRequest": {
+            "type": "object",
+            "required": [
+                "payload"
+            ],
+            "properties": {
+                "payload": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
+                    }
+                }
+            }
+        },
+        "handlers.resolveResponse": {
+            "type": "object",
+            "properties": {
+                "payload": {
+                    "type": "array",
+                    "items": {
+                        "type": "integer"
+                    }
+                }
+            }
+        },
+        "media.IngestRequest": {
+            "type": "object",
+            "required": [
+                "source"
+            ],
+            "properties": {
+                "filename": {
+                    "type": "string"
+                },
+                "source": {
+                    "$ref": "#/definitions/media.Source"
+                },
+                "user_id": {
+                    "type": "string"
+                }
+            }
+        },
+        "media.PrepareUploadRequest": {
+            "type": "object",
+            "required": [
+                "mime_type"
+            ],
+            "properties": {
+                "mime_type": {
+                    "type": "string"
+                },
+                "user_id": {
+                    "type": "string"
+                }
+            }
+        },
+        "media.Source": {
+            "type": "object",
+            "required": [
+                "type"
+            ],
+            "properties": {
+                "data_url": {
+                    "type": "string"
+                },
+                "type": {
+                    "type": "string"
+                },
+                "url": {
+                    "type": "string"
+                }
+            }
+        },
+        "media.UploadPreparation": {
+            "type": "object",
+            "properties": {
+                "expires_in": {
+                    "description": "seconds",
+                    "type": "integer"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "mime_type": {
+                    "type": "string"
+                },
+                "upload_url": {
+                    "type": "string"
+                }
+            }
+        }
+    },
+    "securityDefinitions": {
+        "ApiKeyAuth": {
+            "type": "apiKey",
+            "name": "X-Media-Service-Key",
+            "in": "header"
+        }
+    }
+}
\ No newline at end of file
diff --git a/services/media-api/docs/swagger/swagger.yaml b/services/media-api/docs/swagger/swagger.yaml
new file mode 100644
index 00000000..ffcffe85
--- /dev/null
+++ b/services/media-api/docs/swagger/swagger.yaml
@@ -0,0 +1,271 @@
+basePath: /
+definitions:
+  handlers.ingestResponse:
+    properties:
+      bytes:
+        type: integer
+      deduped:
+        type: boolean
+      id:
+        type: string
+      mime:
+        type: string
+      presigned_url:
+        type: string
+    type: object
+  handlers.resolveRequest:
+    properties:
+      payload:
+        items:
+          type: integer
+        type: array
+    required:
+    - payload
+    type: object
+  handlers.resolveResponse:
+    properties:
+      payload:
+        items:
+          type: integer
+        type: array
+    type: object
+  media.IngestRequest:
+    properties:
+      filename:
+        type: string
+      source:
+        $ref: '#/definitions/media.Source'
+      user_id:
+        type: string
+    required:
+    - source
+    type: object
+  media.PrepareUploadRequest:
+    properties:
+      mime_type:
+        type: string
+      user_id:
+        type: string
+    required:
+    - mime_type
+    type: object
+  media.Source:
+    properties:
+      data_url:
+        type: string
+      type:
+        type: string
+      url:
+        type: string
+    required:
+    - type
+    type: object
+  media.UploadPreparation:
+    properties:
+      expires_in:
+        description: seconds
+        type: integer
+      id:
+        type: string
+      mime_type:
+        type: string
+      upload_url:
+        type: string
+    type: object
+info:
+  contact: {}
+  description: Secure media ingestion and resolution service
+  title: Media API
+  version: "1.0"
+paths:
+  /v1/media:
+    post:
+      consumes:
+      - application/json
+      description: Accepts data URLs or remote URLs and stores content privately.
+      parameters:
+      - description: Media request
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/media.IngestRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/handlers.ingestResponse'
+        "400":
+          description: Bad Request
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      security:
+      - ApiKeyAuth: []
+      summary: Upload media
+      tags:
+      - media
+  /v1/media/{id}:
+    get:
+      description: Streams the object through the media API without exposing storage
+        URLs.
+      parameters:
+      - description: Media ID
+        in: path
+        name: id
+        required: true
+        type: string
+      produces:
+      - application/octet-stream
+      responses:
+        "200":
+          description: binary data
+        "404":
+          description: Not Found
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      security:
+      - ApiKeyAuth: []
+      summary: Stream media bytes
+      tags:
+      - media
+  /v1/media/{id}/presign:
+    get:
+      description: Returns a temporary signed URL for downloading media by jan_id.
+      parameters:
+      - description: Media ID (jan_xxx)
+        in: path
+        name: id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            additionalProperties: true
+            type: object
+        "404":
+          description: Not Found
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      security:
+      - ApiKeyAuth: []
+      summary: Get presigned download URL
+      tags:
+      - media
+  /v1/media/prepare-upload:
+    post:
+      consumes:
+      - application/json
+      description: Generates a presigned upload URL and reserves a jan_id. Client
+        uploads directly to S3 using the URL. Not available for local storage.
+      parameters:
+      - description: Upload preparation request
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/media.PrepareUploadRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/media.UploadPreparation'
+        "400":
+          description: Bad Request
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+        "501":
+          description: Not Implemented
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      security:
+      - ApiKeyAuth: []
+      summary: Request presigned upload URL
+      tags:
+      - media
+  /v1/media/resolve:
+    post:
+      consumes:
+      - application/json
+      description: Replaces pseudo data URLs with short-lived signed URLs.
+      parameters:
+      - description: Payload to resolve
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/handlers.resolveRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/handlers.resolveResponse'
+        "400":
+          description: Bad Request
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      security:
+      - ApiKeyAuth: []
+      summary: Resolve jan_* placeholders
+      tags:
+      - media
+  /v1/media/upload:
+    post:
+      consumes:
+      - multipart/form-data
+      description: Accepts multipart file upload for local storage. Alternative to
+        presigned uploads.
+      parameters:
+      - description: File to upload
+        in: formData
+        name: file
+        required: true
+        type: file
+      - description: User ID
+        in: formData
+        name: user_id
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/handlers.ingestResponse'
+        "400":
+          description: Bad Request
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      security:
+      - ApiKeyAuth: []
+      summary: Direct file upload
+      tags:
+      - media
+securityDefinitions:
+  ApiKeyAuth:
+    in: header
+    name: X-Media-Service-Key
+    type: apiKey
+swagger: "2.0"
diff --git a/services/media-api/go.mod b/services/media-api/go.mod
new file mode 100644
index 00000000..49218487
--- /dev/null
+++ b/services/media-api/go.mod
@@ -0,0 +1,105 @@
+module jan-server/services/media-api
+
+go 1.25.0
+
+require (
+	github.com/MicahParks/keyfunc/v2 v2.1.0
+	github.com/aws/aws-sdk-go-v2 v1.33.0
+	github.com/aws/aws-sdk-go-v2/config v1.27.13
+	github.com/aws/aws-sdk-go-v2/credentials v1.17.13
+	github.com/aws/aws-sdk-go-v2/service/s3 v1.54.2
+	github.com/caarlos0/env/v10 v10.0.0
+	github.com/gabriel-vasile/mimetype v1.4.3
+	github.com/gin-gonic/gin v1.10.0
+	github.com/golang-jwt/jwt/v5 v5.3.0
+	github.com/golang-migrate/migrate/v4 v4.19.0
+	github.com/google/wire v0.7.0
+	github.com/joho/godotenv v1.5.1
+	github.com/lib/pq v1.10.9
+	github.com/oklog/ulid/v2 v2.1.0
+	github.com/rs/zerolog v1.31.0
+	github.com/swaggo/files v1.0.1
+	github.com/swaggo/gin-swagger v1.6.0
+	github.com/swaggo/swag v1.16.4
+	go.opentelemetry.io/otel v1.37.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0
+	go.opentelemetry.io/otel/sdk v1.29.0
+	gorm.io/driver/postgres v1.5.7
+	gorm.io/gorm v1.26.0
+)
+
+require (
+	github.com/KyleBanks/depth v1.2.1 // indirect
+	github.com/PuerkitoBio/purell v1.1.1 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
+	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.2 // indirect
+	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.7 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.7 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 // indirect
+	github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.7 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.2 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.9 // indirect
+	github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.2 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sso v1.20.6 // indirect
+	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.24.0 // indirect
+	github.com/aws/aws-sdk-go-v2/service/sts v1.28.7 // indirect
+	github.com/aws/smithy-go v1.22.1 // indirect
+	github.com/bytedance/sonic v1.11.6 // indirect
+	github.com/bytedance/sonic/loader v0.1.1 // indirect
+	github.com/cenkalti/backoff/v4 v4.2.1 // indirect
+	github.com/cloudwego/base64x v0.1.4 // indirect
+	github.com/cloudwego/iasm v0.2.0 // indirect
+	github.com/gin-contrib/sse v0.1.0 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-openapi/jsonpointer v0.19.5 // indirect
+	github.com/go-openapi/jsonreference v0.19.6 // indirect
+	github.com/go-openapi/spec v0.20.4 // indirect
+	github.com/go-openapi/swag v0.19.15 // indirect
+	github.com/go-playground/locales v0.14.1 // indirect
+	github.com/go-playground/universal-translator v0.18.1 // indirect
+	github.com/go-playground/validator/v10 v10.20.0 // indirect
+	github.com/goccy/go-json v0.10.2 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
+	github.com/hashicorp/errwrap v1.1.0 // indirect
+	github.com/hashicorp/go-multierror v1.1.1 // indirect
+	github.com/jackc/pgpassfile v1.0.0 // indirect
+	github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
+	github.com/jackc/pgx/v5 v5.5.4 // indirect
+	github.com/jackc/puddle/v2 v2.2.1 // indirect
+	github.com/jinzhu/inflection v1.0.0 // indirect
+	github.com/jinzhu/now v1.1.5 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
+	github.com/leodido/go-urn v1.4.0 // indirect
+	github.com/mailru/easyjson v0.7.6 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
+	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
+	github.com/ugorji/go/codec v1.2.12 // indirect
+	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect
+	go.opentelemetry.io/otel/metric v1.37.0 // indirect
+	go.opentelemetry.io/otel/trace v1.37.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.3.1 // indirect
+	golang.org/x/arch v0.8.0 // indirect
+	golang.org/x/crypto v0.45.0 // indirect
+	golang.org/x/net v0.47.0 // indirect
+	golang.org/x/sync v0.18.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+	golang.org/x/tools v0.38.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
+	google.golang.org/grpc v1.67.0 // indirect
+	google.golang.org/protobuf v1.34.2 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)
diff --git a/services/media-api/go.sum b/services/media-api/go.sum
new file mode 100644
index 00000000..0be46593
--- /dev/null
+++ b/services/media-api/go.sum
@@ -0,0 +1,326 @@
+github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
+github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
+github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
+github.com/MicahParks/keyfunc/v2 v2.1.0 h1:6ZXKb9Rp6qp1bDbJefnG7cTH8yMN1IC/4nf+GVjO99k=
+github.com/MicahParks/keyfunc/v2 v2.1.0/go.mod h1:rW42fi+xgLJ2FRRXAfNx9ZA8WpD4OeE/yHVMteCkw9k=
+github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
+github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
+github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
+github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
+github.com/aws/aws-sdk-go-v2 v1.33.0 h1:Evgm4DI9imD81V0WwD+TN4DCwjUMdc94TrduMLbgZJs=
+github.com/aws/aws-sdk-go-v2 v1.33.0/go.mod h1:P5WJBrYqqbWVaOxgH0X/FYYD47/nooaPOZPlQdmiN2U=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.2 h1:x6xsQXGSmW6frevwDA+vi/wqhp1ct18mVXYN08/93to=
+github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.2/go.mod h1:lPprDr1e6cJdyYeGXnRaJoP4Md+cDBvi2eOj00BlGmg=
+github.com/aws/aws-sdk-go-v2/config v1.27.13 h1:WbKW8hOzrWoOA/+35S5okqO/2Ap8hkkFUzoW8Hzq24A=
+github.com/aws/aws-sdk-go-v2/config v1.27.13/go.mod h1:XLiyiTMnguytjRER7u5RIkhIqS8Nyz41SwAWb4xEjxs=
+github.com/aws/aws-sdk-go-v2/credentials v1.17.13 h1:XDCJDzk/u5cN7Aple7D/MiAhx1Rjo/0nueJ0La8mRuE=
+github.com/aws/aws-sdk-go-v2/credentials v1.17.13/go.mod h1:FMNcjQrmuBYvOTZDtOLCIu0esmxjF7RuA/89iSXWzQI=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1 h1:FVJ0r5XTHSmIHJV6KuDmdYhEpvlHpiSd38RQWhut5J4=
+github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.1/go.mod h1:zusuAeqezXzAB24LGuzuekqMAEgWkVYukBec3kr3jUg=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.7 h1:lf/8VTF2cM+N4SLzaYJERKEWAXq8MOMpZfU6wEPWsPk=
+github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.7/go.mod h1:4SjkU7QiqK2M9oozyMzfZ/23LmUY+h3oFqhdeP5OMiI=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.7 h1:4OYVp0705xu8yjdyoWix0r9wPIRXnIzzOoUpQVHIJ/g=
+github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.7/go.mod h1:vd7ESTEvI76T2Na050gODNmNU7+OyKrIKroYTu4ABiI=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0 h1:hT8rVHwugYE2lEfdFE0QWVo81lF7jMrYJVDWI+f+VxU=
+github.com/aws/aws-sdk-go-v2/internal/ini v1.8.0/go.mod h1:8tu/lYfQfFe6IGnaOdrpVgEL2IrrDOf6/m9RQum4NkY=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.7 h1:/FUtT3xsoHO3cfh+I/kCbcMCN98QZRsiFet/V8QkWSs=
+github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.7/go.mod h1:MaCAgWpGooQoCWZnMur97rGn5dp350w2+CeiV5406wE=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2 h1:Ji0DY1xUsUr3I8cHps0G+XM3WWU16lP6yG8qu1GAZAs=
+github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2/go.mod h1:5CsjAbs3NlGQyZNFACh+zztPDI7fU6eW9QsxjfnuBKg=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.2 h1:e6um6+DWYQP1XCa+E9YVtG/9v1qk5lyAOelMOVwSyO8=
+github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.5.2/go.mod h1:dIW8puxSbYLSPv/ju0d9A3CpwXdtqvJtYKDMVmPLOWE=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.9 h1:TQmKDyETFGiXVhZfQ/I0cCFziqqX58pi4tKJGYGFSz0=
+github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.9/go.mod h1:HVLPK2iHQBUx7HfZeOQSEu3v2ubZaAY2YPbAm5/WUyY=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.2 h1:t7iUP9+4wdc5lt3E41huP+GvQZJD38WLsgVp4iOtAjg=
+github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.2/go.mod h1:/niFCtmuQNxqx9v8WAPq5qh7EH25U4BF6tjoyq9bObM=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.54.2 h1:gYSJhNiOF6J9xaYxu2NFNstoiNELwt0T9w29FxSfN+Y=
+github.com/aws/aws-sdk-go-v2/service/s3 v1.54.2/go.mod h1:739CllldowZiPPsDFcJHNF4FXrVxaSGVnZ9Ez9Iz9hc=
+github.com/aws/aws-sdk-go-v2/service/sso v1.20.6 h1:o5cTaeunSpfXiLTIBx5xo2enQmiChtu1IBbzXnfU9Hs=
+github.com/aws/aws-sdk-go-v2/service/sso v1.20.6/go.mod h1:qGzynb/msuZIE8I75DVRCUXw3o3ZyBmUvMwQ2t/BrGM=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.24.0 h1:Qe0r0lVURDDeBQJ4yP+BOrJkvkiCo/3FH/t+wY11dmw=
+github.com/aws/aws-sdk-go-v2/service/ssooidc v1.24.0/go.mod h1:mUYPBhaF2lGiukDEjJX2BLRRKTmoUSitGDUgM4tRxak=
+github.com/aws/aws-sdk-go-v2/service/sts v1.28.7 h1:et3Ta53gotFR4ERLXXHIHl/Uuk1qYpP5uU7cvNql8ns=
+github.com/aws/aws-sdk-go-v2/service/sts v1.28.7/go.mod h1:FZf1/nKNEkHdGGJP/cI2MoIMquumuRK6ol3QQJNDxmw=
+github.com/aws/smithy-go v1.22.1 h1:/HPHZQ0g7f4eUeK6HKglFz8uwVfZKgoI25rb/J+dnro=
+github.com/aws/smithy-go v1.22.1/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
+github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
+github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
+github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
+github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
+github.com/caarlos0/env/v10 v10.0.0 h1:yIHUBZGsyqCnpTkbjk8asUlx6RFhhEs+h7TOBdgdzXA=
+github.com/caarlos0/env/v10 v10.0.0/go.mod h1:ZfulV76NvVPw3tm591U4SwL3Xx9ldzBP9aGxzeN7G18=
+github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
+github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
+github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
+github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
+github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
+github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
+github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
+github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
+github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dhui/dktest v0.4.6 h1:+DPKyScKSEp3VLtbMDHcUq6V5Lm5zfZZVb0Sk7Ahom4=
+github.com/dhui/dktest v0.4.6/go.mod h1:JHTSYDtKkvFNFHJKqCzVzqXecyv+tKt8EzceOmQOgbU=
+github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
+github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
+github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI=
+github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
+github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
+github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
+github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
+github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
+github.com/gin-contrib/gzip v0.0.6 h1:NjcunTcGAj5CO1gn4N8jHOSIeRFHIbn51z6K+xaN4d4=
+github.com/gin-contrib/gzip v0.0.6/go.mod h1:QOJlmV2xmayAjkNS2Y8NQsMneuRShOU/kjovCXNuzzk=
+github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
+github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
+github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
+github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
+github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
+github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
+github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
+github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
+github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
+github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
+github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
+github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
+github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
+github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
+github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
+github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
+github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
+github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
+github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
+github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
+github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
+github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
+github.com/golang-migrate/migrate/v4 v4.19.0 h1:RcjOnCGz3Or6HQYEJ/EEVLfWnmw9KnoigPSjzhCuaSE=
+github.com/golang-migrate/migrate/v4 v4.19.0/go.mod h1:9dyEcu+hO+G9hPSw8AIg50yg622pXJsoHItQnDGZkI0=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4=
+github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
+github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
+github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
+github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
+github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
+github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
+github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
+github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
+github.com/jackc/pgx/v5 v5.5.4 h1:Xp2aQS8uXButQdnCMWNmvx6UysWQQC+u1EoizjguY+8=
+github.com/jackc/pgx/v5 v5.5.4/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
+github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
+github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
+github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
+github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
+github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
+github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
+github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
+github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
+github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
+github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
+github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
+github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
+github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
+github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
+github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
+github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
+github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
+github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
+github.com/oklog/ulid/v2 v2.1.0 h1:+9lhoxAP56we25tyYETBBY1YLA2SaoLvUFgrP2miPJU=
+github.com/oklog/ulid/v2 v2.1.0/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ=
+github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
+github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
+github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
+github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
+github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o=
+github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
+github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
+github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
+github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg=
+github.com/swaggo/gin-swagger v1.6.0 h1:y8sxvQ3E20/RCyrXeFfg60r6H0Z+SwpTjMYsMm+zy8M=
+github.com/swaggo/gin-swagger v1.6.0/go.mod h1:BG00cCEy294xtVpyIAHG6+e2Qzj/xKlRdOqDkvq0uzo=
+github.com/swaggo/swag v1.16.4 h1:clWJtd9LStiG3VeijiCfOVODP6VpHtKdQy9ELFG3s1A=
+github.com/swaggo/swag v1.16.4/go.mod h1:VBsHJRsDvfYvqoiMKnsdwhNV9LEMHgEDZcyVYX0sxPg=
+github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
+github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
+github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
+github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
+go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=
+go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
+go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 h1:dIIDULZJpgdiHz5tXrTgKIMLkus6jEFa7x5SOKcyR7E=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0/go.mod h1:jlRVBe7+Z1wyxFSUs48L6OBQZ5JwH2Hg/Vbl+t9rAgI=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0 h1:Xw8U6u2f8DK2XAkGRFV7BBLENgnTGX9i4rQRxJf+/vs=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0/go.mod h1:6KW1Fm6R/s6Z3PGXwSJN2K4eT6wQB3vXX6CVnYX9NmM=
+go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
+go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
+go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo=
+go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok=
+go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
+go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
+go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
+go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
+golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
+golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
+golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8=
+google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
+google.golang.org/grpc v1.67.0 h1:IdH9y6PF5MPSdAntIcpjQ+tXO41pcQsfZV2RxtQgVcw=
+google.golang.org/grpc v1.67.0/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
+google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
+google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gorm.io/driver/postgres v1.5.7 h1:8ptbNJTDbEmhdr62uReG5BGkdQyeasu/FZHxI0IMGnM=
+gorm.io/driver/postgres v1.5.7/go.mod h1:3e019WlBaYI5o5LIdNV+LyxCMNtLOQETBXL2h4chKpA=
+gorm.io/gorm v1.26.0 h1:9lqQVPG5aNNS6AyHdRiwScAVnXHg/L/Srzx55G5fOgs=
+gorm.io/gorm v1.26.0/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
+nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/services/media-api/internal/config/config.go b/services/media-api/internal/config/config.go
new file mode 100644
index 00000000..f5fd3df7
--- /dev/null
+++ b/services/media-api/internal/config/config.go
@@ -0,0 +1,122 @@
+package config
+
+import (
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/caarlos0/env/v10"
+)
+
+// Config holds the environment driven configuration for the media service.
+type Config struct {
+	// Service Configuration
+	ServiceName     string        `env:"SERVICE_NAME" envDefault:"media-api"`
+	Environment     string        `env:"ENVIRONMENT" envDefault:"development"`
+	HTTPPort        int           `env:"MEDIA_API_PORT" envDefault:"8285"`
+	LogLevel        string        `env:"MEDIA_LOG_LEVEL" envDefault:"info"`
+	EnableTracing   bool          `env:"ENABLE_TRACING" envDefault:"false"`
+	OTLPEndpoint    string        `env:"OTEL_EXPORTER_OTLP_ENDPOINT" envDefault:""`
+	ShutdownTimeout time.Duration `env:"SHUTDOWN_TIMEOUT" envDefault:"10s"`
+
+	// Database - Read/Write Split (required, no defaults)
+	DBPostgresqlWriteDSN string `env:"DB_POSTGRESQL_WRITE_DSN,notEmpty"`
+	DBPostgresqlRead1DSN string `env:"DB_POSTGRESQL_READ1_DSN"` // Optional read replica
+
+	// Database Connection Pool
+	DBMaxIdleConns int           `env:"DB_MAX_IDLE_CONNS" envDefault:"5"`
+	DBMaxOpenConns int           `env:"DB_MAX_OPEN_CONNS" envDefault:"15"`
+	DBConnLifetime time.Duration `env:"DB_CONN_MAX_LIFETIME" envDefault:"30m"`
+
+	// API Configuration
+	APIURL string `env:"MEDIA_API_URL"`
+
+	// Storage Backend Selection
+	StorageBackend string `env:"MEDIA_STORAGE_BACKEND" envDefault:"s3"` // Options: "s3" or "local"
+
+	// Local Storage Configuration
+	LocalStoragePath    string `env:"MEDIA_LOCAL_STORAGE_PATH"`     // Path to store files (e.g., "/var/media" or "./media-data")
+	LocalStorageBaseURL string `env:"MEDIA_LOCAL_STORAGE_BASE_URL"` // Base URL for serving files (e.g., "http://localhost:8285/v1/files")
+
+	// S3 Storage Configuration
+	S3Endpoint       string        `env:"MEDIA_S3_ENDPOINT" envDefault:"https://s3.menlo.ai"`
+	S3PublicEndpoint string        `env:"MEDIA_S3_PUBLIC_ENDPOINT"`
+	S3Region         string        `env:"MEDIA_S3_REGION" envDefault:"us-west-2"`
+	S3Bucket         string        `env:"MEDIA_S3_BUCKET"`
+	S3AccessKeyID    string        `env:"MEDIA_S3_ACCESS_KEY_ID"`     // AWS standard naming
+	S3SecretKey      string        `env:"MEDIA_S3_SECRET_ACCESS_KEY"` // AWS standard naming
+	S3UsePathStyle   bool          `env:"MEDIA_S3_USE_PATH_STYLE" envDefault:"true"`
+	S3PresignTTL     time.Duration `env:"MEDIA_S3_PRESIGN_TTL" envDefault:"5m"`
+
+	// Media Configuration
+	MaxMediaBytes      int64         `env:"MEDIA_MAX_BYTES" envDefault:"20971520"`
+	ProxyDownload      bool          `env:"MEDIA_PROXY_DOWNLOAD" envDefault:"true"`
+	RetentionDays      int           `env:"MEDIA_RETENTION_DAYS" envDefault:"30"`
+	RemoteFetchTimeout time.Duration `env:"MEDIA_REMOTE_FETCH_TIMEOUT" envDefault:"15s"`
+
+	// GCS Storage (alternative to S3)
+	GCSBucket string `env:"MEDIA_GCS_BUCKET"`
+
+	// Authentication
+	AuthEnabled bool   `env:"AUTH_ENABLED" envDefault:"false"`
+	AuthIssuer  string `env:"AUTH_ISSUER"`
+	Account     string `env:"ACCOUNT"`
+	AuthJWKSURL string `env:"AUTH_JWKS_URL"`
+}
+
+// Load parses environment variables into Config.
+func Load() (*Config, error) {
+	cfg := &Config{}
+	if err := env.Parse(cfg); err != nil {
+		return nil, fmt.Errorf("parse env config: %w", err)
+	}
+
+	cfg.S3Bucket = strings.TrimSpace(cfg.S3Bucket)
+	cfg.S3AccessKeyID = strings.TrimSpace(cfg.S3AccessKeyID)
+	cfg.S3SecretKey = strings.TrimSpace(cfg.S3SecretKey)
+	cfg.S3Endpoint = strings.TrimSpace(cfg.S3Endpoint)
+	cfg.S3PublicEndpoint = strings.TrimSpace(cfg.S3PublicEndpoint)
+	if cfg.MaxMediaBytes <= 0 {
+		cfg.MaxMediaBytes = 20 * 1024 * 1024
+	}
+	if cfg.AuthEnabled {
+		if strings.TrimSpace(cfg.AuthIssuer) == "" {
+			return nil, fmt.Errorf("AUTH_ISSUER is required when AUTH_ENABLED is true")
+		}
+		if strings.TrimSpace(cfg.AuthJWKSURL) == "" {
+			return nil, fmt.Errorf("AUTH_JWKS_URL is required when AUTH_ENABLED is true")
+		}
+	}
+	return cfg, nil
+}
+
+// GetDatabaseWriteDSN returns the write database connection string.
+func (c *Config) GetDatabaseWriteDSN() string {
+	return c.DBPostgresqlWriteDSN
+}
+
+// GetDatabaseReadDSN returns the read database connection string.
+// If DB_POSTGRESQL_READ1_DSN is set, it returns that.
+// Otherwise, falls back to write DSN (no replica configured).
+func (c *Config) GetDatabaseReadDSN() string {
+	if c.DBPostgresqlRead1DSN != "" {
+		return c.DBPostgresqlRead1DSN
+	}
+	return c.GetDatabaseWriteDSN()
+}
+
+// Addr returns the HTTP listen address.
+func (c *Config) Addr() string {
+	return fmt.Sprintf(":%d", c.HTTPPort)
+}
+
+// IsLocalStorage returns true if local storage backend is configured.
+func (c *Config) IsLocalStorage() bool {
+	return strings.ToLower(strings.TrimSpace(c.StorageBackend)) == "local"
+}
+
+// IsS3Storage returns true if S3 storage backend is configured.
+func (c *Config) IsS3Storage() bool {
+	backend := strings.ToLower(strings.TrimSpace(c.StorageBackend))
+	return backend == "" || backend == "s3"
+}
diff --git a/services/media-api/internal/domain/media/model.go b/services/media-api/internal/domain/media/model.go
new file mode 100644
index 00000000..bea1c360
--- /dev/null
+++ b/services/media-api/internal/domain/media/model.go
@@ -0,0 +1,50 @@
+package media
+
+import "time"
+
+// MediaObject represents stored media metadata.
+type MediaObject struct {
+	ID              string    `json:"id"`
+	StorageProvider string    `json:"storage_provider"`
+	StorageKey      string    `json:"storage_key"`
+	MimeType        string    `json:"mime"`
+	Bytes           int64     `json:"bytes"`
+	Sha256          string    `json:"sha256"`
+	CreatedBy       string    `json:"created_by"`
+	RetentionUntil  time.Time `json:"retention_until"`
+	CreatedAt       time.Time `json:"created_at"`
+	UpdatedAt       time.Time `json:"updated_at"`
+}
+
+// IngestRequest defines the payload for ingesting new media.
+type IngestRequest struct {
+	Source   Source `json:"source" binding:"required"`
+	Filename string `json:"filename"`
+	UserID   string `json:"user_id"`
+}
+
+// Source describes the media source.
+type Source struct {
+	Type    string `json:"type" binding:"required"`
+	DataURL string `json:"data_url"`
+	URL     string `json:"url"`
+}
+
+// ResolveResult contains placeholder replacements.
+type ResolveResult struct {
+	Payload []byte
+}
+
+// UploadPreparation contains presigned upload URL and reserved jan_id.
+type UploadPreparation struct {
+	ID        string `json:"id"`
+	UploadURL string `json:"upload_url"`
+	MimeType  string `json:"mime_type"`
+	ExpiresIn int    `json:"expires_in"` // seconds
+}
+
+// PrepareUploadRequest defines the payload for requesting a presigned upload URL.
+type PrepareUploadRequest struct {
+	MimeType string `json:"mime_type" binding:"required"`
+	UserID   string `json:"user_id"`
+}
diff --git a/services/media-api/internal/domain/media/service.go b/services/media-api/internal/domain/media/service.go
new file mode 100644
index 00000000..f9768ed9
--- /dev/null
+++ b/services/media-api/internal/domain/media/service.go
@@ -0,0 +1,357 @@
+package media
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"net/url"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/gabriel-vasile/mimetype"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/media-api/internal/config"
+	"jan-server/services/media-api/utils/mediaid"
+)
+
+var allowedMIMEs = map[string]string{
+	"image/jpeg": "jpg",
+	"image/png":  "png",
+	"image/webp": "webp",
+	"image/gif":  "gif",
+	"image/bmp":  "bmp",
+	"image/tiff": "tiff",
+}
+
+var placeholderPattern = regexp.MustCompile(`data:(image/[a-z0-9.+-]+);(jan_[A-Za-z0-9]+)`)
+
+// Repository defines persistence operations needed by the service.
+type Repository interface {
+	FindByHash(ctx context.Context, hash string) (*MediaObject, error)
+	Create(ctx context.Context, obj *MediaObject) error
+	GetByID(ctx context.Context, id string) (*MediaObject, error)
+}
+
+// Storage defines media storage operations.
+type Storage interface {
+	Upload(ctx context.Context, key string, body io.Reader, size int64, contentType string) error
+	PresignGet(ctx context.Context, key string, ttl time.Duration) (string, error)
+	PresignPut(ctx context.Context, key string, contentType string, ttl time.Duration) (string, error)
+	Download(ctx context.Context, key string) (io.ReadCloser, string, error)
+	SupportsPresignedUploads() bool
+}
+
+// Service orchestrates media ingestion and retrieval.
+type Service struct {
+	cfg        *config.Config
+	repo       Repository
+	storage    Storage
+	log        zerolog.Logger
+	httpClient *http.Client
+}
+
+func NewService(cfg *config.Config, repo Repository, storage Storage, log zerolog.Logger) *Service {
+	return &Service{
+		cfg:     cfg,
+		repo:    repo,
+		storage: storage,
+		log:     log.With().Str("component", "media-service").Logger(),
+		httpClient: &http.Client{
+			Timeout: cfg.RemoteFetchTimeout,
+		},
+	}
+}
+
+// Ingest stores media and returns metadata. bool indicates whether content was deduplicated.
+func (s *Service) Ingest(ctx context.Context, req IngestRequest) (*MediaObject, bool, error) {
+	data, err := s.loadBytes(ctx, req.Source)
+	if err != nil {
+		return nil, false, err
+	}
+
+	if int64(len(data)) == 0 {
+		return nil, false, errors.New("file is empty")
+	}
+	if int64(len(data)) > s.cfg.MaxMediaBytes {
+		return nil, false, fmt.Errorf("file exceeds max size of %d bytes", s.cfg.MaxMediaBytes)
+	}
+
+	mimeType := mimetype.Detect(data).String()
+	ext, ok := allowedMIMEs[mimeType]
+	if !ok {
+		return nil, false, fmt.Errorf("unsupported mime type %s", mimeType)
+	}
+
+	sum := sha256.Sum256(data)
+	hash := fmt.Sprintf("%x", sum[:])
+
+	if existing, err := s.repo.FindByHash(ctx, hash); err != nil {
+		return nil, false, err
+	} else if existing != nil {
+		return existing, true, nil
+	}
+
+	id := mediaid.New()
+	key := fmt.Sprintf("images/%s.%s", id, ext)
+
+	if err := s.storage.Upload(ctx, key, bytes.NewReader(data), int64(len(data)), mimeType); err != nil {
+		return nil, false, err
+	}
+
+	obj := &MediaObject{
+		ID:              id,
+		StorageProvider: "s3",
+		StorageKey:      key,
+		MimeType:        mimeType,
+		Bytes:           int64(len(data)),
+		Sha256:          hash,
+		CreatedBy:       req.UserID,
+		RetentionUntil:  time.Now().Add(time.Duration(s.cfg.RetentionDays) * 24 * time.Hour),
+	}
+
+	if err := s.repo.Create(ctx, obj); err != nil {
+		return nil, false, err
+	}
+
+	return obj, false, nil
+}
+
+// ResolvePayload replaces jan_* placeholders with presigned URLs.
+func (s *Service) ResolvePayload(ctx context.Context, payload json.RawMessage) (json.RawMessage, error) {
+	text := string(payload)
+	matches := placeholderPattern.FindAllStringSubmatch(text, -1)
+	if len(matches) == 0 {
+		return payload, nil
+	}
+
+	replacements := make(map[string]string)
+	for _, match := range matches {
+		token := match[2]
+		if _, exists := replacements[token]; exists {
+			continue
+		}
+
+		obj, err := s.repo.GetByID(ctx, token)
+		if err != nil {
+			return nil, err
+		}
+		if obj == nil {
+			return nil, fmt.Errorf("unknown media id %s", token)
+		}
+
+		url, err := s.storage.PresignGet(ctx, obj.StorageKey, s.cfg.S3PresignTTL)
+		if err != nil {
+			return nil, err
+		}
+
+		replacements[match[0]] = s.externalizeURL(url)
+	}
+
+	builder := strings.Builder{}
+	builder.Grow(len(text))
+	lastIndex := 0
+	indices := placeholderPattern.FindAllStringIndex(text, -1)
+	for i, match := range matches {
+		start, end := indices[i][0], indices[i][1]
+		builder.WriteString(text[lastIndex:start])
+		builder.WriteString(replacements[match[0]])
+		lastIndex = end
+	}
+	builder.WriteString(text[lastIndex:])
+
+	return json.RawMessage([]byte(builder.String())), nil
+}
+
+// Download fetches object contents for proxying.
+func (s *Service) Download(ctx context.Context, id string) (io.ReadCloser, string, error) {
+	obj, err := s.repo.GetByID(ctx, id)
+	if err != nil {
+		return nil, "", err
+	}
+	if obj == nil {
+		return nil, "", fmt.Errorf("media %s not found", id)
+	}
+	reader, mime, err := s.storage.Download(ctx, obj.StorageKey)
+	if err != nil {
+		return nil, "", err
+	}
+	if mime == "" {
+		mime = obj.MimeType
+	}
+	return reader, mime, nil
+}
+
+// Presign returns a short-lived URL for the media object.
+func (s *Service) Presign(ctx context.Context, id string) (string, error) {
+	obj, err := s.repo.GetByID(ctx, id)
+	if err != nil {
+		return "", err
+	}
+	if obj == nil {
+		return "", fmt.Errorf("media %s not found", id)
+	}
+	url, err := s.storage.PresignGet(ctx, obj.StorageKey, s.cfg.S3PresignTTL)
+	if err != nil {
+		return "", err
+	}
+	return s.externalizeURL(url), nil
+}
+
+// PrepareUpload generates a presigned upload URL and reserves a jan_id for client-side upload.
+func (s *Service) PrepareUpload(ctx context.Context, mimeType string, userID string) (*UploadPreparation, error) {
+	// Validate MIME type
+	ext, ok := allowedMIMEs[mimeType]
+	if !ok {
+		return nil, fmt.Errorf("unsupported mime type %s", mimeType)
+	}
+
+	// Generate jan_id and storage key
+	id := mediaid.New()
+	key := fmt.Sprintf("images/%s.%s", id, ext)
+
+	// Generate presigned PUT URL
+	uploadURL, err := s.storage.PresignPut(ctx, key, mimeType, s.cfg.S3PresignTTL)
+	if err != nil {
+		return nil, err
+	}
+	uploadURL = s.externalizeURL(uploadURL)
+
+	// Create placeholder record in database (with zero bytes initially)
+	obj := &MediaObject{
+		ID:              id,
+		StorageProvider: "s3",
+		StorageKey:      key,
+		MimeType:        mimeType,
+		Bytes:           0,                             // Will be updated after upload
+		Sha256:          fmt.Sprintf("pending_%s", id), // Placeholder hash to satisfy unique index
+		CreatedBy:       userID,
+		RetentionUntil:  time.Now().Add(time.Duration(s.cfg.RetentionDays) * 24 * time.Hour),
+	}
+
+	if err := s.repo.Create(ctx, obj); err != nil {
+		return nil, err
+	}
+
+	return &UploadPreparation{
+		ID:        id,
+		UploadURL: uploadURL,
+		MimeType:  mimeType,
+		ExpiresIn: int(s.cfg.S3PresignTTL.Seconds()),
+	}, nil
+}
+
+func (s *Service) externalizeURL(raw string) string {
+	publicEndpoint := strings.TrimSpace(s.cfg.S3PublicEndpoint)
+	if publicEndpoint == "" || strings.TrimSpace(raw) == "" {
+		return raw
+	}
+
+	target, err := url.Parse(raw)
+	if err != nil {
+		return raw
+	}
+
+	external, err := url.Parse(publicEndpoint)
+	if err != nil || external.Scheme == "" || external.Host == "" {
+		return raw
+	}
+
+	target.Scheme = external.Scheme
+	target.Host = external.Host
+
+	if path := strings.TrimSpace(external.Path); path != "" && path != "/" {
+		target.Path = joinPublicPath(path, target.Path)
+	}
+
+	return target.String()
+}
+
+// SupportsPresignedUploads returns whether the storage backend supports presigned upload URLs.
+func (s *Service) SupportsPresignedUploads() bool {
+	return s.storage.SupportsPresignedUploads()
+}
+
+func joinPublicPath(basePath, objectPath string) string {
+	base := strings.TrimSuffix(basePath, "/")
+	if base == "" {
+		return ensureLeadingSlash(objectPath)
+	}
+
+	if !strings.HasPrefix(base, "/") {
+		base = "/" + base
+	}
+
+	relative := strings.TrimPrefix(objectPath, "/")
+	if relative == "" {
+		return base
+	}
+	return base + "/" + relative
+}
+
+func ensureLeadingSlash(path string) string {
+	if path == "" {
+		return "/"
+	}
+	if strings.HasPrefix(path, "/") {
+		return path
+	}
+	return "/" + path
+}
+
+func (s *Service) loadBytes(ctx context.Context, source Source) ([]byte, error) {
+	switch strings.ToLower(source.Type) {
+	case "data_url", "datauri", "dataurl":
+		return decodeDataURL(source.DataURL)
+	case "remote_url", "remoteuri", "remote":
+		return s.fetchRemote(ctx, source.URL)
+	default:
+		return nil, fmt.Errorf("unknown source type %s", source.Type)
+	}
+}
+
+func decodeDataURL(value string) ([]byte, error) {
+	if value == "" {
+		return nil, errors.New("data_url is required")
+	}
+	parts := strings.SplitN(value, ",", 2)
+	if len(parts) != 2 {
+		return nil, errors.New("invalid data url")
+	}
+	if !strings.Contains(parts[0], ";base64") {
+		return nil, errors.New("data url must be base64 encoded")
+	}
+	return base64.StdEncoding.DecodeString(parts[1])
+}
+
+func (s *Service) fetchRemote(ctx context.Context, url string) ([]byte, error) {
+	if url == "" {
+		return nil, errors.New("url is required")
+	}
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+	if err != nil {
+		return nil, err
+	}
+	resp, err := s.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode >= 400 {
+		return nil, fmt.Errorf("remote fetch error: %s", resp.Status)
+	}
+
+	data, err := io.ReadAll(io.LimitReader(resp.Body, s.cfg.MaxMediaBytes+1))
+	if err != nil {
+		return nil, err
+	}
+	return data, nil
+}
diff --git a/services/media-api/internal/infrastructure/auth/validator.go b/services/media-api/internal/infrastructure/auth/validator.go
new file mode 100644
index 00000000..a3e15009
--- /dev/null
+++ b/services/media-api/internal/infrastructure/auth/validator.go
@@ -0,0 +1,148 @@
+package auth
+
+import (
+	"context"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/MicahParks/keyfunc/v2"
+	"github.com/gin-gonic/gin"
+	"github.com/golang-jwt/jwt/v5"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/media-api/internal/config"
+)
+
+// Validator validates JWTs using JWKS.
+type Validator struct {
+	cfg  *config.Config
+	log  zerolog.Logger
+	jwks *keyfunc.JWKS
+}
+
+// NewValidator initializes JWKS fetching when auth is enabled.
+func NewValidator(ctx context.Context, cfg *config.Config, log zerolog.Logger) (*Validator, error) {
+	if !cfg.AuthEnabled {
+		return &Validator{cfg: cfg, log: log}, nil
+	}
+
+	options := keyfunc.Options{
+		Ctx:               ctx,
+		RefreshInterval:   time.Hour,
+		RefreshUnknownKID: true,
+		RefreshErrorHandler: func(err error) {
+			log.Error().Err(err).Msg("jwks refresh error")
+		},
+	}
+
+	jwks, err := keyfunc.Get(cfg.AuthJWKSURL, options)
+	if err != nil {
+		return nil, err
+	}
+
+	return &Validator{
+		cfg:  cfg,
+		log:  log,
+		jwks: jwks,
+	}, nil
+}
+
+// Middleware enforces JWT auth when enabled.
+func (v *Validator) Middleware() gin.HandlerFunc {
+	if v == nil || !v.cfg.AuthEnabled {
+		return func(c *gin.Context) {
+			c.Next()
+		}
+	}
+
+	return func(c *gin.Context) {
+		tokenString := bearerToken(c.GetHeader("Authorization"))
+		if tokenString == "" {
+			abortUnauthorized(c, "missing bearer token")
+			return
+		}
+
+		opts := []jwt.ParserOption{
+			jwt.WithValidMethods([]string{"RS256", "RS384", "RS512"}),
+		}
+		token, err := jwt.Parse(tokenString, v.jwks.Keyfunc, opts...)
+		if err != nil || !token.Valid {
+			abortUnauthorized(c, "invalid token")
+			return
+		}
+		claims, ok := token.Claims.(jwt.MapClaims)
+		if !ok {
+			abortUnauthorized(c, "invalid token claims")
+			return
+		}
+
+		if issuer := strings.TrimSpace(v.cfg.AuthIssuer); issuer != "" {
+			allowedIssuers := map[string]struct{}{}
+			allowedIssuers[issuer] = struct{}{}
+			allowedIssuers["http://localhost:8085/realms/jan"] = struct{}{}
+			allowedIssuers["http://keycloak:8085/realms/jan"] = struct{}{}
+			issuerClaim, _ := claims["iss"].(string)
+			if _, ok := allowedIssuers[issuerClaim]; !ok {
+				abortUnauthorized(c, "invalid token issuer")
+				return
+			}
+		}
+
+		if audience := strings.TrimSpace(v.cfg.Account); audience != "" {
+			audClaim, ok := claims["aud"]
+			if ok {
+				switch aud := audClaim.(type) {
+				case string:
+					if aud != audience {
+						abortUnauthorized(c, "invalid token audience")
+						return
+					}
+				case []any:
+					found := false
+					for _, item := range aud {
+						if s, isStr := item.(string); isStr && s == audience {
+							found = true
+							break
+						}
+					}
+					if !found {
+						abortUnauthorized(c, "invalid token audience")
+						return
+					}
+				default:
+					abortUnauthorized(c, "invalid token audience")
+					return
+				}
+			}
+		}
+
+		c.Set("auth_token", token)
+		c.Next()
+	}
+}
+
+// Ready indicates if the validator is prepared.
+func (v *Validator) Ready() bool {
+	if v == nil || !v.cfg.AuthEnabled {
+		return true
+	}
+	return v.jwks != nil
+}
+
+func bearerToken(header string) string {
+	if header == "" {
+		return ""
+	}
+	parts := strings.SplitN(header, " ", 2)
+	if len(parts) != 2 || !strings.EqualFold(parts[0], "Bearer") {
+		return ""
+	}
+	return strings.TrimSpace(parts[1])
+}
+
+func abortUnauthorized(c *gin.Context, message string) {
+	c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{
+		"error": message,
+	})
+}
diff --git a/services/media-api/internal/infrastructure/database/database.go b/services/media-api/internal/infrastructure/database/database.go
new file mode 100644
index 00000000..ae0e8b81
--- /dev/null
+++ b/services/media-api/internal/infrastructure/database/database.go
@@ -0,0 +1,115 @@
+package database
+
+import (
+	"database/sql"
+	"errors"
+	"fmt"
+	"net/url"
+	"strings"
+	"time"
+
+	_ "github.com/lib/pq"
+	"gorm.io/driver/postgres"
+	"gorm.io/gorm"
+	gormlogger "gorm.io/gorm/logger"
+	"gorm.io/gorm/schema"
+)
+
+// Config controls GORM/PostgreSQL connectivity.
+type Config struct {
+	DSN             string
+	MaxIdleConns    int
+	MaxOpenConns    int
+	ConnMaxLifetime time.Duration
+	LogLevel        gormlogger.LogLevel
+}
+
+// Connect initializes a GORM connection using the provided config.
+func Connect(cfg Config) (*gorm.DB, error) {
+	if cfg.DSN == "" {
+		return nil, fmt.Errorf("database DSN is empty")
+	}
+
+	// Add search_path to DSN if not present
+	dsn := cfg.DSN
+	if !strings.Contains(dsn, "search_path") {
+		separator := "?"
+		if strings.Contains(dsn, "?") {
+			separator = "&"
+		}
+		dsn = dsn + separator + "search_path=media_api"
+	}
+
+	if err := ensureDatabaseExists(dsn); err != nil {
+		return nil, fmt.Errorf("ensure database: %w", err)
+	}
+
+	if cfg.LogLevel == 0 {
+		cfg.LogLevel = gormlogger.Warn
+	}
+
+	db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
+		PrepareStmt: true,
+		NamingStrategy: schema.NamingStrategy{
+			SingularTable: true,
+		},
+		Logger: gormlogger.Default.LogMode(cfg.LogLevel),
+	})
+	if err != nil {
+		return nil, fmt.Errorf("connect database: %w", err)
+	}
+
+	sqlDB, err := db.DB()
+	if err != nil {
+		return nil, fmt.Errorf("retrieve sql db: %w", err)
+	}
+
+	if cfg.MaxIdleConns > 0 {
+		sqlDB.SetMaxIdleConns(cfg.MaxIdleConns)
+	}
+	if cfg.MaxOpenConns > 0 {
+		sqlDB.SetMaxOpenConns(cfg.MaxOpenConns)
+	}
+	if cfg.ConnMaxLifetime > 0 {
+		sqlDB.SetConnMaxLifetime(cfg.ConnMaxLifetime)
+	}
+
+	return db, nil
+}
+
+func ensureDatabaseExists(dsn string) error {
+	u, err := url.Parse(dsn)
+	if err != nil {
+		return nil // non-URL formats are ignored
+	}
+
+	dbName := strings.TrimPrefix(u.Path, "/")
+	if dbName == "" || dbName == "postgres" {
+		return nil
+	}
+
+	adminURL := *u
+	adminURL.Path = "/postgres"
+
+	sqlDB, err := sql.Open("postgres", adminURL.String())
+	if err != nil {
+		return err
+	}
+	defer sqlDB.Close()
+
+	var exists bool
+	err = sqlDB.QueryRow("SELECT EXISTS (SELECT 1 FROM pg_database WHERE datname = $1)", dbName).Scan(&exists)
+	if err != nil && !errors.Is(err, sql.ErrNoRows) {
+		return err
+	}
+	if exists {
+		return nil
+	}
+
+	_, err = sqlDB.Exec("CREATE DATABASE " + pqQuoteIdentifier(dbName))
+	return err
+}
+
+func pqQuoteIdentifier(ident string) string {
+	return `"` + strings.ReplaceAll(ident, `"`, `""`) + `"`
+}
diff --git a/services/media-api/internal/infrastructure/database/entities/media_object.go b/services/media-api/internal/infrastructure/database/entities/media_object.go
new file mode 100644
index 00000000..654b401c
--- /dev/null
+++ b/services/media-api/internal/infrastructure/database/entities/media_object.go
@@ -0,0 +1,21 @@
+package entities
+
+import "time"
+
+// MediaObject represents the persisted media metadata.
+type MediaObject struct {
+	ID              string `gorm:"type:varchar(40);primaryKey"`
+	StorageProvider string `gorm:"type:varchar(32);not null"`
+	StorageKey      string `gorm:"type:varchar(255);not null"`
+	MimeType        string `gorm:"type:varchar(64);not null"`
+	Bytes           int64  `gorm:"not null"`
+	Sha256          string `gorm:"type:char(64);uniqueIndex;not null"`
+	CreatedBy       string `gorm:"type:varchar(64)"`
+	RetentionUntil  time.Time
+	CreatedAt       time.Time `gorm:"autoCreateTime"`
+	UpdatedAt       time.Time `gorm:"autoUpdateTime"`
+}
+
+func (MediaObject) TableName() string {
+	return "media_objects"
+}
diff --git a/services/media-api/internal/infrastructure/database/migrate.go b/services/media-api/internal/infrastructure/database/migrate.go
new file mode 100644
index 00000000..a4163395
--- /dev/null
+++ b/services/media-api/internal/infrastructure/database/migrate.go
@@ -0,0 +1,120 @@
+package database
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io/fs"
+
+	"github.com/golang-migrate/migrate/v4"
+	"github.com/golang-migrate/migrate/v4/database/postgres"
+	iofs "github.com/golang-migrate/migrate/v4/source/iofs"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+
+	"jan-server/services/media-api/migrations"
+)
+
+// AutoMigrate applies all pending SQL migrations bundled with the service.
+func AutoMigrate(ctx context.Context, db *gorm.DB, log zerolog.Logger) error {
+	// List migration files
+	log.Info().Msg("Scanning migration files...")
+	entries, err := fs.ReadDir(migrations.FS, ".")
+	if err != nil {
+		return fmt.Errorf("read migration directory: %w", err)
+	}
+
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			log.Info().Str("file", entry.Name()).Msg("Found migration file")
+		}
+	}
+
+	sqlDB, err := db.DB()
+	if err != nil {
+		return fmt.Errorf("retrieve sql db: %w", err)
+	}
+
+	// Ensure media_api schema exists before running migrations
+	if err := db.Exec("CREATE SCHEMA IF NOT EXISTS media_api").Error; err != nil {
+		log.Warn().Err(err).Msg("Failed to create media_api schema, may already exist")
+	} else {
+		log.Info().Msg("Created media_api schema")
+	}
+
+	conn, err := sqlDB.Conn(context.Background())
+	if err != nil {
+		return fmt.Errorf("acquire dedicated connection: %w", err)
+	}
+
+	driver, err := postgres.WithConnection(context.Background(), conn, &postgres.Config{
+		MigrationsTable: "schema_migrations",
+		SchemaName:      "media_api",
+	})
+	if err != nil {
+		_ = conn.Close()
+		return fmt.Errorf("initialize postgres driver: %w", err)
+	}
+	defer func() {
+		if closeErr := driver.Close(); err == nil && closeErr != nil {
+			err = fmt.Errorf("close migration connection: %w", closeErr)
+		}
+	}()
+
+	source, err := iofs.New(migrations.FS, ".")
+	if err != nil {
+		return fmt.Errorf("load migrations: %w", err)
+	}
+	defer func() {
+		if closeErr := source.Close(); err == nil && closeErr != nil {
+			err = fmt.Errorf("close migration source: %w", closeErr)
+		}
+	}()
+
+	migrator, err := migrate.NewWithInstance("iofs", source, "postgres", driver)
+	if err != nil {
+		return fmt.Errorf("create migrator: %w", err)
+	}
+
+	// Check current version and dirty state
+	version, dirty, err := migrator.Version()
+	if err != nil && !errors.Is(err, migrate.ErrNilVersion) {
+		log.Warn().Err(err).Msg("Error getting migration version")
+	} else if errors.Is(err, migrate.ErrNilVersion) {
+		log.Info().Msg("No migrations have been applied yet")
+	} else {
+		log.Info().Uint("version", version).Bool("dirty", dirty).Msg("Current migration state")
+	}
+
+	// If database is dirty, force the version to allow re-running
+	if dirty {
+		log.Warn().Uint("version", version).Msg("Database is in dirty state, forcing version...")
+		// Force to the current version to clear dirty state
+		if forceErr := migrator.Force(int(version)); forceErr != nil {
+			return fmt.Errorf("force version %d to clear dirty state: %w", version, forceErr)
+		}
+		log.Info().Msg("Dirty state cleared")
+	}
+
+	log.Info().Msg("Applying migrations...")
+	err = migrator.Up()
+	if err != nil {
+		if errors.Is(err, migrate.ErrNoChange) {
+			log.Info().Msg("No new migrations to apply")
+		} else {
+			log.Error().Err(err).Msg("Failed to apply migrations")
+			return fmt.Errorf("apply migrations: %w", err)
+		}
+	} else {
+		log.Info().Msg("Migrations applied successfully")
+	}
+
+	// Get final version
+	finalVersion, _, versionErr := migrator.Version()
+	if versionErr == nil {
+		log.Info().Uint("version", finalVersion).Msg("Current migration version")
+	}
+
+	log.Info().Msg("applied media object migrations")
+	return nil
+}
diff --git a/services/media-api/internal/infrastructure/logger/logger.go b/services/media-api/internal/infrastructure/logger/logger.go
new file mode 100644
index 00000000..0a1fde27
--- /dev/null
+++ b/services/media-api/internal/infrastructure/logger/logger.go
@@ -0,0 +1,40 @@
+package logger
+
+import (
+	"os"
+	"strings"
+	"time"
+
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+
+	"jan-server/services/media-api/internal/config"
+)
+
+// New creates a zerolog.Logger configured for the template service.
+func New(cfg *config.Config) zerolog.Logger {
+	level := parseLevel(cfg.LogLevel)
+	output := zerolog.ConsoleWriter{
+		Out:        os.Stdout,
+		TimeFormat: time.RFC3339,
+	}
+	base := log.Output(output).
+		With().
+		Timestamp().
+		Str("service", cfg.ServiceName).
+		Str("environment", cfg.Environment).
+		Logger().
+		Level(level)
+	return base
+}
+
+func parseLevel(levelString string) zerolog.Level {
+	if levelString == "" {
+		return zerolog.InfoLevel
+	}
+	level, err := zerolog.ParseLevel(strings.ToLower(levelString))
+	if err != nil {
+		return zerolog.InfoLevel
+	}
+	return level
+}
diff --git a/services/media-api/internal/infrastructure/observability/observability.go b/services/media-api/internal/infrastructure/observability/observability.go
new file mode 100644
index 00000000..5cfc4c80
--- /dev/null
+++ b/services/media-api/internal/infrastructure/observability/observability.go
@@ -0,0 +1,56 @@
+package observability
+
+import (
+	"context"
+
+	"github.com/rs/zerolog"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
+	"go.opentelemetry.io/otel/sdk/resource"
+	sdktrace "go.opentelemetry.io/otel/sdk/trace"
+	semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
+
+	"jan-server/services/media-api/internal/config"
+)
+
+// Shutdown is a function that releases telemetry resources.
+type Shutdown func(ctx context.Context) error
+
+// Setup configures OpenTelemetry tracing if enabled.
+func Setup(ctx context.Context, cfg *config.Config, log zerolog.Logger) (Shutdown, error) {
+	if !cfg.EnableTracing || cfg.OTLPEndpoint == "" {
+		log.Info().Msg("Tracing disabled")
+		return func(context.Context) error { return nil }, nil
+	}
+
+	exporter, err := otlptracehttp.New(ctx,
+		otlptracehttp.WithEndpoint(cfg.OTLPEndpoint),
+		otlptracehttp.WithInsecure(),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	res, err := resource.New(ctx,
+		resource.WithAttributes(
+			semconv.ServiceName(cfg.ServiceName),
+			semconv.DeploymentEnvironment(cfg.Environment),
+		),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	tp := sdktrace.NewTracerProvider(
+		sdktrace.WithSampler(sdktrace.AlwaysSample()),
+		sdktrace.WithBatcher(exporter),
+		sdktrace.WithResource(res),
+	)
+	otel.SetTracerProvider(tp)
+
+	log.Info().Str("endpoint", cfg.OTLPEndpoint).Msg("Tracing enabled")
+
+	return func(ctx context.Context) error {
+		return tp.Shutdown(ctx)
+	}, nil
+}
diff --git a/services/media-api/internal/infrastructure/repository/media/repository.go b/services/media-api/internal/infrastructure/repository/media/repository.go
new file mode 100644
index 00000000..2fa1b4d7
--- /dev/null
+++ b/services/media-api/internal/infrastructure/repository/media/repository.go
@@ -0,0 +1,107 @@
+package media
+
+import (
+	"context"
+
+	"gorm.io/gorm"
+
+	domain "jan-server/services/media-api/internal/domain/media"
+	"jan-server/services/media-api/internal/infrastructure/database/entities"
+	"jan-server/services/media-api/internal/utils/platformerrors"
+)
+
+// Repository handles media object persistence.
+type Repository struct {
+	db *gorm.DB
+}
+
+func NewRepository(db *gorm.DB) *Repository {
+	return &Repository{db: db}
+}
+
+func (r *Repository) FindByHash(ctx context.Context, hash string) (*domain.MediaObject, error) {
+	var entity entities.MediaObject
+	err := r.db.WithContext(ctx).Where("sha256 = ?", hash).First(&entity).Error
+	if err != nil {
+		if err == gorm.ErrRecordNotFound {
+			return nil, nil
+		}
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to find media by hash",
+			err,
+			"7a8f3d2e-4b1c-4a9e-8f7d-2c3e4f5a6b7c",
+		)
+	}
+	obj := mapEntity(entity)
+	return &obj, nil
+}
+
+func (r *Repository) Create(ctx context.Context, obj *domain.MediaObject) error {
+	entity := entities.MediaObject{
+		ID:              obj.ID,
+		StorageProvider: obj.StorageProvider,
+		StorageKey:      obj.StorageKey,
+		MimeType:        obj.MimeType,
+		Bytes:           obj.Bytes,
+		Sha256:          obj.Sha256,
+		CreatedBy:       obj.CreatedBy,
+		RetentionUntil:  obj.RetentionUntil,
+	}
+	err := r.db.WithContext(ctx).Create(&entity).Error
+	if err != nil {
+		return platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to create media object",
+			err,
+			"9b2e4f5a-6c7d-4e8f-9a0b-1c2d3e4f5a6b",
+		)
+	}
+	return nil
+}
+
+func (r *Repository) GetByID(ctx context.Context, id string) (*domain.MediaObject, error) {
+	var entity entities.MediaObject
+	err := r.db.WithContext(ctx).Where("id = ?", id).First(&entity).Error
+	if err != nil {
+		if err == gorm.ErrRecordNotFound {
+			return nil, platformerrors.NewError(
+				ctx,
+				platformerrors.LayerRepository,
+				platformerrors.ErrorTypeNotFound,
+				"media object not found",
+				err,
+				"1c2d3e4f-5a6b-4c7d-8e9f-0a1b2c3d4e5f",
+			)
+		}
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to get media by id",
+			err,
+			"2d3e4f5a-6b7c-4d8e-9f0a-1b2c3d4e5f6a",
+		)
+	}
+	obj := mapEntity(entity)
+	return &obj, nil
+}
+
+func mapEntity(entity entities.MediaObject) domain.MediaObject {
+	return domain.MediaObject{
+		ID:              entity.ID,
+		StorageProvider: entity.StorageProvider,
+		StorageKey:      entity.StorageKey,
+		MimeType:        entity.MimeType,
+		Bytes:           entity.Bytes,
+		Sha256:          entity.Sha256,
+		CreatedBy:       entity.CreatedBy,
+		RetentionUntil:  entity.RetentionUntil,
+		CreatedAt:       entity.CreatedAt,
+		UpdatedAt:       entity.UpdatedAt,
+	}
+}
diff --git a/services/media-api/internal/infrastructure/storage/local_storage.go b/services/media-api/internal/infrastructure/storage/local_storage.go
new file mode 100644
index 00000000..fe47687a
--- /dev/null
+++ b/services/media-api/internal/infrastructure/storage/local_storage.go
@@ -0,0 +1,205 @@
+package storage
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/rs/zerolog"
+
+	"jan-server/services/media-api/internal/config"
+)
+
+var errLocalStorageDisabled = errors.New("local storage is not configured; set MEDIA_LOCAL_STORAGE_PATH to enable")
+
+// LocalStorage handles uploads and downloads to local filesystem.
+type LocalStorage struct {
+	basePath string
+	baseURL  string
+	log      zerolog.Logger
+	disabled bool
+}
+
+// NewLocalStorage creates a new local filesystem storage backend.
+func NewLocalStorage(cfg *config.Config, log zerolog.Logger) (*LocalStorage, error) {
+	logger := log.With().Str("component", "local-storage").Logger()
+
+	basePath := strings.TrimSpace(cfg.LocalStoragePath)
+	if basePath == "" {
+		logger.Warn().Msg("MEDIA_LOCAL_STORAGE_PATH is not set; local storage will be disabled")
+		return &LocalStorage{
+			log:      logger,
+			disabled: true,
+		}, nil
+	}
+
+	// Create base directory if it doesn't exist
+	if err := os.MkdirAll(basePath, 0755); err != nil {
+		return nil, fmt.Errorf("failed to create local storage directory: %w", err)
+	}
+
+	storage := &LocalStorage{
+		basePath: basePath,
+		baseURL:  strings.TrimSpace(cfg.LocalStorageBaseURL),
+		log:      logger,
+		disabled: false,
+	}
+
+	logger.Info().
+		Str("path", basePath).
+		Str("base_url", storage.baseURL).
+		Msg("local storage initialized")
+
+	return storage, nil
+}
+
+func (l *LocalStorage) ensureEnabled() error {
+	if l.disabled {
+		return errLocalStorageDisabled
+	}
+	return nil
+}
+
+// Upload stores a file to the local filesystem.
+func (l *LocalStorage) Upload(ctx context.Context, key string, body io.Reader, size int64, contentType string) error {
+	if err := l.ensureEnabled(); err != nil {
+		return err
+	}
+
+	fullPath := filepath.Join(l.basePath, filepath.FromSlash(key))
+	dir := filepath.Dir(fullPath)
+
+	// Ensure directory exists
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return fmt.Errorf("failed to create directory: %w", err)
+	}
+
+	// Create the file
+	file, err := os.Create(fullPath)
+	if err != nil {
+		return fmt.Errorf("failed to create file: %w", err)
+	}
+	defer file.Close()
+
+	// Copy data to file
+	written, err := io.Copy(file, body)
+	if err != nil {
+		return fmt.Errorf("failed to write file: %w", err)
+	}
+
+	l.log.Debug().
+		Str("key", key).
+		Int64("bytes", written).
+		Msg("file uploaded to local storage")
+
+	return nil
+}
+
+// PresignGet returns a direct URL to the file (no presigning needed for local storage).
+// If LocalStorageBaseURL is set, it returns a URL, otherwise returns the file path.
+func (l *LocalStorage) PresignGet(ctx context.Context, key string, ttl time.Duration) (string, error) {
+	if err := l.ensureEnabled(); err != nil {
+		return "", err
+	}
+
+	// Check if file exists
+	fullPath := filepath.Join(l.basePath, filepath.FromSlash(key))
+	if _, err := os.Stat(fullPath); os.IsNotExist(err) {
+		return "", fmt.Errorf("file not found: %s", key)
+	}
+
+	// If base URL is configured, return a URL
+	if l.baseURL != "" {
+		// Normalize the key to use forward slashes for URLs
+		urlKey := filepath.ToSlash(key)
+		return fmt.Sprintf("%s/%s", strings.TrimSuffix(l.baseURL, "/"), urlKey), nil
+	}
+
+	// Otherwise return a file:// URL
+	return fmt.Sprintf("file://%s", fullPath), nil
+}
+
+// PresignPut is not supported for local storage (direct upload only).
+// Returns an error indicating presigned uploads are not available.
+func (l *LocalStorage) PresignPut(ctx context.Context, key string, contentType string, ttl time.Duration) (string, error) {
+	if err := l.ensureEnabled(); err != nil {
+		return "", err
+	}
+	return "", errors.New("presigned PUT not supported for local storage; use direct upload endpoint")
+}
+
+// Download reads a file from the local filesystem.
+func (l *LocalStorage) Download(ctx context.Context, key string) (io.ReadCloser, string, error) {
+	if err := l.ensureEnabled(); err != nil {
+		return nil, "", err
+	}
+
+	fullPath := filepath.Join(l.basePath, filepath.FromSlash(key))
+
+	file, err := os.Open(fullPath)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, "", fmt.Errorf("file not found: %s", key)
+		}
+		return nil, "", fmt.Errorf("failed to open file: %w", err)
+	}
+
+	// Try to detect content type from extension
+	contentType := detectContentTypeFromPath(fullPath)
+
+	l.log.Debug().
+		Str("key", key).
+		Str("content_type", contentType).
+		Msg("file downloaded from local storage")
+
+	return file, contentType, nil
+}
+
+// Health checks if the storage directory is accessible.
+func (l *LocalStorage) Health(ctx context.Context) error {
+	if l.disabled {
+		return nil
+	}
+
+	// Check if we can write to the storage directory
+	testFile := filepath.Join(l.basePath, ".health_check")
+	if err := os.WriteFile(testFile, []byte("ok"), 0644); err != nil {
+		return fmt.Errorf("storage directory not writable: %w", err)
+	}
+
+	// Clean up test file
+	_ = os.Remove(testFile)
+
+	return nil
+}
+
+// SupportsPresignedUploads returns false for local storage.
+func (l *LocalStorage) SupportsPresignedUploads() bool {
+	return false
+}
+
+// detectContentTypeFromPath attempts to determine content type from file extension.
+func detectContentTypeFromPath(path string) string {
+	ext := strings.ToLower(filepath.Ext(path))
+	switch ext {
+	case ".jpg", ".jpeg":
+		return "image/jpeg"
+	case ".png":
+		return "image/png"
+	case ".gif":
+		return "image/gif"
+	case ".webp":
+		return "image/webp"
+	case ".bmp":
+		return "image/bmp"
+	case ".tiff", ".tif":
+		return "image/tiff"
+	default:
+		return "application/octet-stream"
+	}
+}
diff --git a/services/media-api/internal/infrastructure/storage/s3_storage.go b/services/media-api/internal/infrastructure/storage/s3_storage.go
new file mode 100644
index 00000000..08d6f082
--- /dev/null
+++ b/services/media-api/internal/infrastructure/storage/s3_storage.go
@@ -0,0 +1,168 @@
+package storage
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"strings"
+	"time"
+
+	"github.com/aws/aws-sdk-go-v2/aws"
+	awsconfig "github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/credentials"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/media-api/internal/config"
+)
+
+var errStorageDisabled = errors.New("media storage backend is not configured; set MEDIA_S3_* to enable uploads")
+
+// S3Storage handles uploads and downloads to S3-compatible storage.
+type S3Storage struct {
+	bucket    string
+	client    *s3.Client
+	presigner *s3.PresignClient
+	log       zerolog.Logger
+	disabled  bool
+}
+
+func NewS3Storage(ctx context.Context, cfg *config.Config, log zerolog.Logger) (*S3Storage, error) {
+	logger := log.With().Str("component", "s3-storage").Logger()
+	storage := &S3Storage{
+		bucket: strings.TrimSpace(cfg.S3Bucket),
+		log:    logger,
+	}
+
+	accessKey := strings.TrimSpace(cfg.S3AccessKeyID)
+	secretKey := strings.TrimSpace(cfg.S3SecretKey)
+	if storage.bucket == "" || accessKey == "" || secretKey == "" {
+		logger.Warn().Msg("MEDIA_S3_BUCKET or credentials are not set; media uploads will be disabled until configured")
+		storage.disabled = true
+		return storage, nil
+	}
+
+	resolver := aws.EndpointResolverWithOptionsFunc(func(service, region string, options ...interface{}) (aws.Endpoint, error) {
+		if cfg.S3Endpoint != "" {
+			return aws.Endpoint{
+				URL:           cfg.S3Endpoint,
+				PartitionID:   "aws",
+				SigningRegion: cfg.S3Region,
+			}, nil
+		}
+		return aws.Endpoint{}, &aws.EndpointNotFoundError{}
+	})
+
+	awsCfg, err := awsconfig.LoadDefaultConfig(ctx,
+		awsconfig.WithRegion(cfg.S3Region),
+		awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(cfg.S3AccessKeyID, cfg.S3SecretKey, "")),
+		awsconfig.WithEndpointResolverWithOptions(resolver),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("load aws config: %w", err)
+	}
+
+	client := s3.NewFromConfig(awsCfg, func(o *s3.Options) {
+		o.UsePathStyle = cfg.S3UsePathStyle
+	})
+
+	presignClient := client
+	if cfg.S3PublicEndpoint != "" {
+		presignClient = s3.NewFromConfig(awsCfg, func(o *s3.Options) {
+			o.UsePathStyle = cfg.S3UsePathStyle
+			o.EndpointResolver = s3.EndpointResolverFromURL(cfg.S3PublicEndpoint)
+		})
+	}
+
+	presigner := s3.NewPresignClient(presignClient)
+
+	storage.client = client
+	storage.presigner = presigner
+	return storage, nil
+}
+
+func (s *S3Storage) ensureEnabled() error {
+	if s.disabled {
+		return errStorageDisabled
+	}
+	return nil
+}
+
+func (s *S3Storage) Upload(ctx context.Context, key string, body io.Reader, size int64, contentType string) error {
+	if err := s.ensureEnabled(); err != nil {
+		return err
+	}
+	input := &s3.PutObjectInput{
+		Bucket:      aws.String(s.bucket),
+		Key:         aws.String(key),
+		Body:        body,
+		ContentType: aws.String(contentType),
+	}
+	if _, err := s.client.PutObject(ctx, input); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (s *S3Storage) PresignGet(ctx context.Context, key string, ttl time.Duration) (string, error) {
+	if err := s.ensureEnabled(); err != nil {
+		return "", err
+	}
+	resp, err := s.presigner.PresignGetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s.bucket),
+		Key:    aws.String(key),
+	}, s3.WithPresignExpires(ttl))
+	if err != nil {
+		return "", err
+	}
+	return resp.URL, nil
+}
+
+func (s *S3Storage) PresignPut(ctx context.Context, key string, contentType string, ttl time.Duration) (string, error) {
+	if err := s.ensureEnabled(); err != nil {
+		return "", err
+	}
+	input := &s3.PutObjectInput{
+		Bucket:      aws.String(s.bucket),
+		Key:         aws.String(key),
+		ContentType: aws.String(contentType),
+	}
+	resp, err := s.presigner.PresignPutObject(ctx, input, s3.WithPresignExpires(ttl))
+	if err != nil {
+		return "", err
+	}
+	return resp.URL, nil
+}
+
+func (s *S3Storage) Download(ctx context.Context, key string) (io.ReadCloser, string, error) {
+	if err := s.ensureEnabled(); err != nil {
+		return nil, "", err
+	}
+	out, err := s.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s.bucket),
+		Key:    aws.String(key),
+	})
+	if err != nil {
+		return nil, "", err
+	}
+	mime := ""
+	if out.ContentType != nil {
+		mime = *out.ContentType
+	}
+	return out.Body, mime, nil
+}
+
+// Health performs a simple HeadObject request.
+func (s *S3Storage) Health(ctx context.Context) error {
+	if s.disabled {
+		return nil
+	}
+	_, err := s.client.HeadBucket(ctx, &s3.HeadBucketInput{Bucket: aws.String(s.bucket)})
+	return err
+}
+
+// SupportsPresignedUploads returns true for S3 storage.
+func (s *S3Storage) SupportsPresignedUploads() bool {
+	return !s.disabled
+}
diff --git a/services/media-api/internal/interfaces/httpserver/handlers/media_handler.go b/services/media-api/internal/interfaces/httpserver/handlers/media_handler.go
new file mode 100644
index 00000000..cc366721
--- /dev/null
+++ b/services/media-api/internal/interfaces/httpserver/handlers/media_handler.go
@@ -0,0 +1,301 @@
+package handlers
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"io"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/media-api/internal/config"
+	domain "jan-server/services/media-api/internal/domain/media"
+	"jan-server/services/media-api/internal/interfaces/httpserver/responses"
+	"jan-server/services/media-api/internal/utils/platformerrors"
+)
+
+// MediaHandler exposes media endpoints.
+type MediaHandler struct {
+	cfg     *config.Config
+	service *domain.Service
+	log     zerolog.Logger
+}
+
+func NewMediaHandler(cfg *config.Config, service *domain.Service, log zerolog.Logger) *MediaHandler {
+	return &MediaHandler{
+		cfg:     cfg,
+		service: service,
+		log:     log.With().Str("component", "media-handler").Logger(),
+	}
+}
+
+type ingestResponse struct {
+	ID           string `json:"id"`
+	Mime         string `json:"mime"`
+	Bytes        int64  `json:"bytes"`
+	Deduped      bool   `json:"deduped"`
+	PresignedURL string `json:"presigned_url,omitempty"`
+}
+
+type resolveRequest struct {
+	Payload JSONPayload `json:"payload" binding:"required"`
+}
+
+type resolveResponse struct {
+	Payload JSONPayload `json:"payload"`
+}
+
+// JSONPayload is used to document arbitrary JSON blobs in swagger.
+type JSONPayload = json.RawMessage
+
+// Ingest godoc
+// @Summary      Upload media
+// @Description  Accepts data URLs or remote URLs and stores content privately.
+// @Tags         media
+// @Accept       json
+// @Produce      json
+// @Param        request  body      domain.IngestRequest  true  "Media request"
+// @Success      200      {object}  ingestResponse
+// @Failure      400      {object}  map[string]string
+// @Security     ApiKeyAuth
+// @Router       /v1/media [post]
+func (h *MediaHandler) Ingest(c *gin.Context) {
+	var req domain.IngestRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		responses.HandleNewError(c, platformerrors.ErrorTypeValidation, "invalid request body", "1a2b3c4d-5e6f-7a8b-9c0d-1e2f3a4b5c6d")
+		return
+	}
+
+	obj, dedup, err := h.service.Ingest(c.Request.Context(), req)
+	if err != nil {
+		h.log.Error().Err(err).Msg("ingest failed")
+		responses.HandleError(c, err, "failed to ingest media")
+		return
+	}
+
+	// Generate presigned URL for immediate access
+	presignedURL, err := h.service.Presign(c.Request.Context(), obj.ID)
+	if err != nil {
+		h.log.Warn().Err(err).Msg("failed to generate presigned URL, continuing without it")
+		presignedURL = ""
+	}
+
+	c.JSON(http.StatusOK, ingestResponse{
+		ID:           obj.ID,
+		Mime:         obj.MimeType,
+		Bytes:        obj.Bytes,
+		Deduped:      dedup,
+		PresignedURL: presignedURL,
+	})
+}
+
+// Resolve godoc
+// @Summary      Resolve jan_* placeholders
+// @Description  Replaces pseudo data URLs with short-lived signed URLs.
+// @Tags         media
+// @Accept       json
+// @Produce      json
+// @Param        request  body      resolveRequest  true  "Payload to resolve"
+// @Success      200      {object}  resolveResponse
+// @Failure      400      {object}  map[string]string
+// @Security     ApiKeyAuth
+// @Router       /v1/media/resolve [post]
+func (h *MediaHandler) Resolve(c *gin.Context) {
+	var req resolveRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		responses.HandleNewError(c, platformerrors.ErrorTypeValidation, "invalid request body", "2b3c4d5e-6f7a-8b9c-0d1e-2f3a4b5c6d7e")
+		return
+	}
+
+	out, err := h.service.ResolvePayload(c.Request.Context(), json.RawMessage(req.Payload))
+	if err != nil {
+		h.log.Error().Err(err).Msg("resolve failed")
+		responses.HandleError(c, err, "failed to resolve media payload")
+		return
+	}
+
+	c.JSON(http.StatusOK, resolveResponse{Payload: JSONPayload(out)})
+}
+
+// PrepareUpload godoc
+// @Summary      Request presigned upload URL
+// @Description  Generates a presigned upload URL and reserves a jan_id. Client uploads directly to S3 using the URL. Not available for local storage.
+// @Tags         media
+// @Accept       json
+// @Produce      json
+// @Param        request  body      domain.PrepareUploadRequest  true  "Upload preparation request"
+// @Success      200      {object}  domain.UploadPreparation
+// @Failure      400      {object}  map[string]string
+// @Failure      501      {object}  map[string]string
+// @Security     ApiKeyAuth
+// @Router       /v1/media/prepare-upload [post]
+func (h *MediaHandler) PrepareUpload(c *gin.Context) {
+	// Check if storage backend supports presigned uploads
+	if !h.service.SupportsPresignedUploads() {
+		c.JSON(http.StatusNotImplemented, gin.H{
+			"error":   "presigned uploads not supported",
+			"message": "use POST /v1/media/upload for direct upload with local storage",
+		})
+		return
+	}
+
+	var req domain.PrepareUploadRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		responses.HandleNewError(c, platformerrors.ErrorTypeValidation, "invalid request body", "3c4d5e6f-7a8b-9c0d-1e2f-3a4b5c6d7e8f")
+		return
+	}
+
+	prep, err := h.service.PrepareUpload(c.Request.Context(), req.MimeType, req.UserID)
+	if err != nil {
+		h.log.Error().Err(err).Msg("prepare upload failed")
+		responses.HandleError(c, err, "failed to prepare upload")
+		return
+	}
+
+	c.JSON(http.StatusOK, prep)
+}
+
+// GetPresignedURL godoc
+// @Summary      Get presigned download URL
+// @Description  Returns a temporary signed URL for downloading media by jan_id.
+// @Tags         media
+// @Produce      json
+// @Param        id   path      string  true  "Media ID (jan_xxx)"
+// @Success      200  {object}  map[string]interface{}
+// @Failure      404  {object}  map[string]string
+// @Security     ApiKeyAuth
+// @Router       /v1/media/{id}/presign [get]
+func (h *MediaHandler) GetPresignedURL(c *gin.Context) {
+	id := c.Param("id")
+
+	url, err := h.service.Presign(c.Request.Context(), id)
+	if err != nil {
+		h.log.Error().Err(err).Str("id", id).Msg("presign failed")
+		responses.HandleError(c, err, "failed to generate presigned URL")
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{
+		"id":         id,
+		"url":        url,
+		"expires_in": int(h.cfg.S3PresignTTL.Seconds()),
+	})
+}
+
+// Proxy godoc
+// @Summary      Stream media bytes
+// @Description  Streams the object through the media API without exposing storage URLs.
+// @Tags         media
+// @Produce      octet-stream
+// @Param        id   path      string  true  "Media ID"
+// @Success      200  "binary data"
+// @Failure      404  {object}  map[string]string
+// @Security     ApiKeyAuth
+// @Router       /v1/media/{id} [get]
+func (h *MediaHandler) Proxy(c *gin.Context) {
+	id := c.Param("id")
+
+	if !h.cfg.ProxyDownload {
+		url, err := h.service.Presign(c.Request.Context(), id)
+		if err != nil {
+			h.log.Error().Err(err).Msg("presign failed")
+			c.JSON(http.StatusNotFound, gin.H{"error": err.Error()})
+			return
+		}
+		c.JSON(http.StatusOK, gin.H{"url": url})
+		return
+	}
+
+	reader, mime, err := h.service.Download(c.Request.Context(), id)
+	if err != nil {
+		h.log.Error().Err(err).Msg("download failed")
+		c.JSON(http.StatusNotFound, gin.H{"error": err.Error()})
+		return
+	}
+	defer reader.Close()
+
+	if mime == "" {
+		mime = "application/octet-stream"
+	}
+
+	c.Header("Content-Type", mime)
+	c.Status(http.StatusOK)
+	if _, err := io.Copy(c.Writer, reader); err != nil {
+		h.log.Error().Err(err).Msg("stream error")
+	}
+}
+
+// DirectUpload godoc
+// @Summary      Direct file upload
+// @Description  Accepts multipart file upload for local storage. Alternative to presigned uploads.
+// @Tags         media
+// @Accept       multipart/form-data
+// @Produce      json
+// @Param        file      formData  file    true  "File to upload"
+// @Param        user_id   formData  string  false "User ID"
+// @Success      200       {object}  ingestResponse
+// @Failure      400       {object}  map[string]string
+// @Security     ApiKeyAuth
+// @Router       /v1/media/upload [post]
+func (h *MediaHandler) DirectUpload(c *gin.Context) {
+	file, header, err := c.Request.FormFile("file")
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "file is required"})
+		return
+	}
+	defer file.Close()
+
+	userID := c.Request.FormValue("user_id")
+	if userID == "" {
+		userID = "anonymous"
+	}
+
+	// Read file content
+	data, err := io.ReadAll(file)
+	if err != nil {
+		h.log.Error().Err(err).Msg("failed to read file")
+		c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read file"})
+		return
+	}
+
+	// Determine content type
+	contentType := header.Header.Get("Content-Type")
+	if contentType == "" {
+		contentType = "application/octet-stream"
+	}
+
+	// Create an ingest request with data URL
+	dataURL := "data:" + contentType + ";base64," + base64.StdEncoding.EncodeToString(data)
+
+	req := domain.IngestRequest{
+		Source: domain.Source{
+			Type:    "data_url",
+			DataURL: dataURL,
+		},
+		Filename: header.Filename,
+		UserID:   userID,
+	}
+
+	obj, dedup, err := h.service.Ingest(c.Request.Context(), req)
+	if err != nil {
+		h.log.Error().Err(err).Msg("ingest failed")
+		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+
+	// Generate presigned/direct URL for immediate access
+	presignedURL, err := h.service.Presign(c.Request.Context(), obj.ID)
+	if err != nil {
+		h.log.Warn().Err(err).Msg("failed to generate URL, continuing without it")
+		presignedURL = ""
+	}
+
+	c.JSON(http.StatusOK, ingestResponse{
+		ID:           obj.ID,
+		Mime:         obj.MimeType,
+		Bytes:        obj.Bytes,
+		Deduped:      dedup,
+		PresignedURL: presignedURL,
+	})
+}
diff --git a/services/media-api/internal/interfaces/httpserver/handlers/provider.go b/services/media-api/internal/interfaces/httpserver/handlers/provider.go
new file mode 100644
index 00000000..3b51b1c6
--- /dev/null
+++ b/services/media-api/internal/interfaces/httpserver/handlers/provider.go
@@ -0,0 +1,19 @@
+package handlers
+
+import (
+	"github.com/rs/zerolog"
+
+	"jan-server/services/media-api/internal/config"
+	domain "jan-server/services/media-api/internal/domain/media"
+)
+
+// Provider wires HTTP handlers.
+type Provider struct {
+	Media *MediaHandler
+}
+
+func NewProvider(cfg *config.Config, service *domain.Service, log zerolog.Logger) *Provider {
+	return &Provider{
+		Media: NewMediaHandler(cfg, service, log),
+	}
+}
diff --git a/services/media-api/internal/interfaces/httpserver/http_server.go b/services/media-api/internal/interfaces/httpserver/http_server.go
new file mode 100644
index 00000000..f71ceb2b
--- /dev/null
+++ b/services/media-api/internal/interfaces/httpserver/http_server.go
@@ -0,0 +1,108 @@
+package httpserver
+
+import (
+	"context"
+	"errors"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+	swaggerFiles "github.com/swaggo/files"
+	ginSwagger "github.com/swaggo/gin-swagger"
+
+	mediaapidocs "jan-server/services/media-api/docs/swagger"
+	"jan-server/services/media-api/internal/config"
+	domain "jan-server/services/media-api/internal/domain/media"
+	"jan-server/services/media-api/internal/infrastructure/auth"
+	"jan-server/services/media-api/internal/interfaces/httpserver/handlers"
+	v1 "jan-server/services/media-api/internal/interfaces/httpserver/routes/v1"
+)
+
+// HTTPServer wraps the gin engine with graceful shutdown helpers.
+type HTTPServer struct {
+	cfg    *config.Config
+	engine *gin.Engine
+	log    zerolog.Logger
+	auth   *auth.Validator
+}
+
+// New constructs the HTTP server with default middleware and routes.
+func New(cfg *config.Config, log zerolog.Logger, mediaService *domain.Service, authValidator *auth.Validator) *HTTPServer {
+	if cfg.Environment == "production" {
+		gin.SetMode(gin.ReleaseMode)
+	}
+	mediaapidocs.SwaggerInfo.BasePath = "/"
+
+	engine := gin.New()
+	engine.Use(gin.Recovery(), gin.Logger())
+
+	handlerProvider := handlers.NewProvider(cfg, mediaService, log)
+	routeProvider := v1.NewRoutes(handlerProvider, cfg)
+
+	// Register public routes (health checks, swagger) without authentication
+	registerPublicRoutes(engine, cfg, authValidator)
+
+	// Apply authentication middleware before protected routes
+	if authValidator != nil {
+		engine.Use(authValidator.Middleware())
+	}
+
+	// Register protected API routes
+	routeProvider.Register(engine.Group("/"))
+
+	return &HTTPServer{
+		cfg:    cfg,
+		engine: engine,
+		log:    log,
+		auth:   authValidator,
+	}
+}
+
+// Run starts the HTTP listener and handles graceful shutdown via context cancellation.
+func (s *HTTPServer) Run(ctx context.Context) error {
+	server := &http.Server{
+		Addr:    s.cfg.Addr(),
+		Handler: s.engine,
+	}
+
+	errCh := make(chan error, 1)
+	go func() {
+		s.log.Info().Str("addr", s.cfg.Addr()).Msg("media-api HTTP server listening")
+		if err := server.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
+			errCh <- err
+			return
+		}
+		errCh <- nil
+	}()
+
+	select {
+	case <-ctx.Done():
+		s.log.Info().Msg("context cancelled, shutting down HTTP server")
+	case err := <-errCh:
+		return err
+	}
+
+	shutdownCtx, cancel := context.WithTimeout(context.Background(), s.cfg.ShutdownTimeout)
+	defer cancel()
+	return server.Shutdown(shutdownCtx)
+}
+
+func registerPublicRoutes(engine *gin.Engine, cfg *config.Config, authValidator *auth.Validator) {
+	engine.GET("/", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"service": cfg.ServiceName, "status": "ok"})
+	})
+	engine.GET("/healthz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "healthy"})
+	})
+	engine.GET("/readyz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "ready"})
+	})
+	engine.GET("/health/auth", func(c *gin.Context) {
+		if authValidator == nil || authValidator.Ready() {
+			c.JSON(http.StatusOK, gin.H{"status": "ready"})
+			return
+		}
+		c.JSON(http.StatusServiceUnavailable, gin.H{"status": "initializing"})
+	})
+	engine.GET("/swagger/*any", ginSwagger.WrapHandler(swaggerFiles.Handler))
+}
diff --git a/services/media-api/internal/interfaces/httpserver/middlewares/middlewares.go b/services/media-api/internal/interfaces/httpserver/middlewares/middlewares.go
new file mode 100644
index 00000000..44cc7bfe
--- /dev/null
+++ b/services/media-api/internal/interfaces/httpserver/middlewares/middlewares.go
@@ -0,0 +1,28 @@
+package middlewares
+
+import (
+	"github.com/gin-gonic/gin"
+)
+
+// CORS middleware for handling cross-origin requests
+func CORS() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
+		c.Writer.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
+		c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
+
+		if c.Request.Method == "OPTIONS" {
+			c.AbortWithStatus(204)
+			return
+		}
+
+		c.Next()
+	}
+}
+
+// RequestLogger logs incoming requests
+func RequestLogger() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		c.Next()
+	}
+}
diff --git a/services/media-api/internal/interfaces/httpserver/requests/media_requests.go b/services/media-api/internal/interfaces/httpserver/requests/media_requests.go
new file mode 100644
index 00000000..a0a6ea3e
--- /dev/null
+++ b/services/media-api/internal/interfaces/httpserver/requests/media_requests.go
@@ -0,0 +1,50 @@
+package requests
+
+import (
+	"jan-server/services/media-api/internal/domain/media"
+)
+
+// IngestRequest represents media ingestion request
+type IngestRequest struct {
+	Source   Source `json:"source" binding:"required"`
+	Filename string `json:"filename"`
+	UserID   string `json:"user_id"`
+}
+
+// Source describes the media source
+type Source struct {
+	Type    string `json:"type" binding:"required"`
+	DataURL string `json:"data_url"`
+	URL     string `json:"url"`
+}
+
+// ToDomain converts request to domain model
+func (r *IngestRequest) ToDomain() *media.IngestRequest {
+	return &media.IngestRequest{
+		Source: media.Source{
+			Type:    r.Source.Type,
+			DataURL: r.Source.DataURL,
+			URL:     r.Source.URL,
+		},
+		Filename: r.Filename,
+		UserID:   r.UserID,
+	}
+}
+
+// ResolveRequest represents media resolution request
+type ResolveRequest struct {
+	Payload interface{} `json:"payload" binding:"required"`
+}
+
+// PrepareUploadRequest represents presigned upload preparation
+type PrepareUploadRequest struct {
+	Filename string `json:"filename" binding:"required"`
+	MimeType string `json:"mime_type" binding:"required"`
+	SizeKB   int64  `json:"size_kb"`
+}
+
+// DirectUploadRequest represents direct file upload metadata
+type DirectUploadRequest struct {
+	Filename string `json:"filename"`
+	MimeType string `json:"mime_type"`
+}
diff --git a/services/media-api/internal/interfaces/httpserver/responses/error_responses.go b/services/media-api/internal/interfaces/httpserver/responses/error_responses.go
new file mode 100644
index 00000000..27dcfedc
--- /dev/null
+++ b/services/media-api/internal/interfaces/httpserver/responses/error_responses.go
@@ -0,0 +1,63 @@
+package responses
+
+import (
+	"errors"
+	"net/http"
+
+	"jan-server/services/media-api/internal/utils/platformerrors"
+
+	"github.com/gin-gonic/gin"
+)
+
+// ErrorResponse represents an error response with platform error details
+type ErrorResponse struct {
+	Code          string `json:"code"` // UUID from PlatformError
+	Error         string `json:"error"`
+	Message       string `json:"message,omitempty"`
+	ErrorInstance error  `json:"-"`
+	RequestID     string `json:"request_id,omitempty"`
+}
+
+// HandleError handles domain errors and returns appropriate HTTP responses
+func HandleError(reqCtx *gin.Context, err error, message string) {
+	var domainErr *platformerrors.PlatformError
+	if errors.As(err, &domainErr) {
+		statusCode := platformerrors.ErrorTypeToHTTPStatus(domainErr.GetErrorType())
+
+		errResp := ErrorResponse{
+			Code:          domainErr.GetUUID(),
+			Error:         message,
+			Message:       message,
+			ErrorInstance: domainErr,
+			RequestID:     domainErr.GetRequestID(),
+		}
+
+		reqCtx.AbortWithStatusJSON(statusCode, errResp)
+		return
+	}
+	// Non-platform errors
+	errResp := ErrorResponse{
+		Error:         message,
+		Message:       message,
+		ErrorInstance: err,
+	}
+	reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, errResp)
+}
+
+// HandleNewError creates a new typed error at the route layer and handles it
+func HandleNewError(reqCtx *gin.Context, errorType platformerrors.ErrorType, message string, uuid string) {
+	ctx := reqCtx.Request.Context()
+	err := platformerrors.NewError(ctx, platformerrors.LayerRoute, errorType, message, nil, uuid)
+
+	statusCode := platformerrors.ErrorTypeToHTTPStatus(err.GetErrorType())
+
+	errResp := ErrorResponse{
+		Code:          err.GetUUID(),
+		Error:         message,
+		Message:       message,
+		ErrorInstance: err,
+		RequestID:     err.GetRequestID(),
+	}
+
+	reqCtx.AbortWithStatusJSON(statusCode, errResp)
+}
diff --git a/services/media-api/internal/interfaces/httpserver/responses/media_responses.go b/services/media-api/internal/interfaces/httpserver/responses/media_responses.go
new file mode 100644
index 00000000..3d4482f3
--- /dev/null
+++ b/services/media-api/internal/interfaces/httpserver/responses/media_responses.go
@@ -0,0 +1,79 @@
+package responses
+
+import (
+	"jan-server/services/media-api/internal/domain/media"
+)
+
+// IngestResponse represents successful media ingestion
+type IngestResponse struct {
+	ID           string `json:"id"`
+	Mime         string `json:"mime"`
+	Bytes        int64  `json:"bytes"`
+	Deduped      bool   `json:"deduped"`
+	PresignedURL string `json:"presigned_url,omitempty"`
+}
+
+// BuildIngestResponse creates response from domain object
+func BuildIngestResponse(obj *media.MediaObject, deduped bool, presignedURL string) *IngestResponse {
+	return &IngestResponse{
+		ID:           obj.ID,
+		Mime:         obj.MimeType,
+		Bytes:        obj.Bytes,
+		Deduped:      deduped,
+		PresignedURL: presignedURL,
+	}
+}
+
+// ResolveResponse represents media resolution result
+type ResolveResponse struct {
+	Payload interface{} `json:"payload"`
+}
+
+// BuildResolveResponse creates response from resolved payload
+func BuildResolveResponse(payload interface{}) *ResolveResponse {
+	return &ResolveResponse{
+		Payload: payload,
+	}
+}
+
+// PrepareUploadResponse contains presigned upload information
+type PrepareUploadResponse struct {
+	UploadURL  string            `json:"upload_url"`
+	UploadID   string            `json:"upload_id"`
+	FormFields map[string]string `json:"form_fields,omitempty"`
+}
+
+// BuildPrepareUploadResponse creates upload preparation response
+func BuildPrepareUploadResponse(uploadURL, uploadID string, formFields map[string]string) *PrepareUploadResponse {
+	return &PrepareUploadResponse{
+		UploadURL:  uploadURL,
+		UploadID:   uploadID,
+		FormFields: formFields,
+	}
+}
+
+// DirectUploadResponse contains upload result
+type DirectUploadResponse struct {
+	ID           string `json:"id"`
+	PresignedURL string `json:"presigned_url,omitempty"`
+}
+
+// BuildDirectUploadResponse creates direct upload response
+func BuildDirectUploadResponse(id, presignedURL string) *DirectUploadResponse {
+	return &DirectUploadResponse{
+		ID:           id,
+		PresignedURL: presignedURL,
+	}
+}
+
+// PresignedURLResponse contains presigned URL
+type PresignedURLResponse struct {
+	URL string `json:"url"`
+}
+
+// BuildPresignedURLResponse creates presigned URL response
+func BuildPresignedURLResponse(url string) *PresignedURLResponse {
+	return &PresignedURLResponse{
+		URL: url,
+	}
+}
diff --git a/services/media-api/internal/interfaces/httpserver/routes/v1/routes.go b/services/media-api/internal/interfaces/httpserver/routes/v1/routes.go
new file mode 100644
index 00000000..6b714d81
--- /dev/null
+++ b/services/media-api/internal/interfaces/httpserver/routes/v1/routes.go
@@ -0,0 +1,38 @@
+package v1
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/media-api/internal/config"
+	"jan-server/services/media-api/internal/interfaces/httpserver/handlers"
+)
+
+// Routes encapsulates versioned route registration.
+type Routes struct {
+	handlers *handlers.Provider
+	cfg      *config.Config
+}
+
+func NewRoutes(provider *handlers.Provider, cfg *config.Config) *Routes {
+	return &Routes{
+		handlers: provider,
+		cfg:      cfg,
+	}
+}
+
+// Register attaches all v1 routes under /v1 prefix.
+func (r *Routes) Register(router gin.IRouter) {
+	group := router.Group("/v1")
+	group.POST("/media", r.handlers.Media.Ingest)
+	group.POST("/media/upload", r.handlers.Media.DirectUpload)
+	group.POST("/media/prepare-upload", r.handlers.Media.PrepareUpload)
+	group.POST("/media/resolve", r.handlers.Media.Resolve)
+	group.GET("/media/:id/presign", r.handlers.Media.GetPresignedURL)
+	group.GET("/media/:id", r.handlers.Media.Proxy)
+
+	// Serve static files from local storage if configured
+	if r.cfg.IsLocalStorage() && r.cfg.LocalStoragePath != "" {
+		// Strip /v1 prefix and serve files from /v1/files/*
+		group.Static("/files", r.cfg.LocalStoragePath)
+	}
+}
diff --git a/services/media-api/internal/utils/platformerrors/errors.go b/services/media-api/internal/utils/platformerrors/errors.go
new file mode 100644
index 00000000..d47c0709
--- /dev/null
+++ b/services/media-api/internal/utils/platformerrors/errors.go
@@ -0,0 +1,210 @@
+package platformerrors
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+)
+
+// getRequestIDFromContext extracts request ID from context
+func getRequestIDFromContext(ctx context.Context) string {
+	val := ctx.Value("requestID")
+	if requestID, ok := val.(string); ok {
+		return requestID
+	}
+	return ""
+}
+
+// ErrorType represents the category of error
+type ErrorType string
+
+const (
+	ErrorTypeNotFound       ErrorType = "NOT_FOUND"
+	ErrorTypeTooManyRecords ErrorType = "TOO_MANY_RECORDS"
+	ErrorTypeValidation     ErrorType = "VALIDATION"
+	ErrorTypeConflict       ErrorType = "CONFLICT"
+	ErrorTypeUnauthorized   ErrorType = "UNAUTHORIZED"
+	ErrorTypeForbidden      ErrorType = "FORBIDDEN"
+	ErrorTypeInternal       ErrorType = "INTERNAL"
+	ErrorTypeExternal       ErrorType = "EXTERNAL"
+	ErrorTypeDatabaseError  ErrorType = "DATABASE_ERROR"
+	ErrorTypeNotImplemented ErrorType = "NOT_IMPLEMENTED"
+)
+
+// Layer represents the application layer where the error occurred
+type Layer string
+
+const (
+	LayerRepository     Layer = "repository"
+	LayerDomain         Layer = "domain"
+	LayerHandler        Layer = "handler"
+	LayerRoute          Layer = "route"
+	LayerInfrastructure Layer = "infrastructure"
+	LayerCommon         Layer = "common"
+)
+
+// PlatformError represents an error with context and metadata
+type PlatformError struct {
+	UUID      string
+	Type      ErrorType
+	Message   string
+	Err       error
+	Context   map[string]any
+	RequestID string
+	Layer     Layer
+	Timestamp time.Time
+}
+
+// Error implements the error interface
+func (e *PlatformError) Error() string {
+	if e.Err != nil {
+		return fmt.Sprintf("[%s][%s][%s] %s: %v", e.Layer, e.Type, e.UUID, e.Message, e.Err)
+	}
+	return fmt.Sprintf("[%s][%s][%s] %s", e.Layer, e.Type, e.UUID, e.Message)
+}
+
+// Unwrap returns the underlying error
+func (e *PlatformError) Unwrap() error {
+	return e.Err
+}
+
+// GetErrorType returns the error type
+func (e *PlatformError) GetErrorType() ErrorType {
+	return e.Type
+}
+
+// GetRequestID returns the request ID
+func (e *PlatformError) GetRequestID() string {
+	return e.RequestID
+}
+
+// GetUUID returns the error UUID
+func (e *PlatformError) GetUUID() string {
+	return e.UUID
+}
+
+// NewError creates a new PlatformError with the specified parameters
+func NewError(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string) *PlatformError {
+	return NewErrorWithContext(ctx, layer, errorType, message, err, customUUID, nil)
+}
+
+// NewErrorWithContext creates a new PlatformError with additional context fields
+func NewErrorWithContext(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string, contextFields map[string]any) *PlatformError {
+	requestID := getRequestIDFromContext(ctx)
+
+	errorUUID := customUUID
+	if errorUUID == "" {
+		errorUUID = "auto-generated-uuid"
+	}
+
+	errorContext := make(map[string]any)
+	for k, v := range contextFields {
+		errorContext[k] = v
+	}
+
+	platformError := &PlatformError{
+		UUID:      errorUUID,
+		Type:      errorType,
+		Message:   message,
+		Err:       err,
+		RequestID: requestID,
+		Layer:     layer,
+		Timestamp: time.Now().UTC(),
+		Context:   errorContext,
+	}
+
+	return platformError
+}
+
+// AsError wraps an error with layer context
+func AsError(ctx context.Context, layer Layer, err error, message string) *PlatformError {
+	if err == nil {
+		return nil
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return NewError(ctx, layer, platformErr.Type, fmt.Sprintf("%s: %s", message, platformErr.Message), platformErr, platformErr.UUID)
+	}
+
+	errorType := ErrorTypeInternal
+	if errors.Is(err, gorm.ErrRecordNotFound) {
+		errorType = ErrorTypeNotFound
+	}
+
+	return NewError(ctx, layer, errorType, message, err, "")
+}
+
+// ErrorTypeToHTTPStatus maps error types to HTTP status codes
+func ErrorTypeToHTTPStatus(errorType ErrorType) int {
+	switch errorType {
+	case ErrorTypeNotFound:
+		return http.StatusNotFound
+	case ErrorTypeValidation:
+		return http.StatusBadRequest
+	case ErrorTypeConflict:
+		return http.StatusConflict
+	case ErrorTypeUnauthorized:
+		return http.StatusUnauthorized
+	case ErrorTypeForbidden:
+		return http.StatusForbidden
+	case ErrorTypeNotImplemented:
+		return http.StatusNotImplemented
+	case ErrorTypeTooManyRecords:
+		return http.StatusInternalServerError
+	case ErrorTypeDatabaseError:
+		return http.StatusInternalServerError
+	case ErrorTypeExternal:
+		return http.StatusBadGateway
+	case ErrorTypeInternal:
+		fallthrough
+	default:
+		return http.StatusInternalServerError
+	}
+}
+
+// IsErrorType checks if an error is a PlatformError with the specified type
+func IsErrorType(err error, errorType ErrorType) bool {
+	if err == nil {
+		return false
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return platformErr.Type == errorType
+	}
+
+	return false
+}
+
+// LogError logs a platform error with proper structure
+func LogError(logger zerolog.Logger, err *PlatformError) {
+	if err == nil {
+		return
+	}
+
+	event := logger.Error().
+		Str("error_uuid", err.UUID).
+		Str("error_type", string(err.Type)).
+		Str("layer", string(err.Layer)).
+		Time("timestamp_utc", err.Timestamp)
+
+	if err.RequestID != "" {
+		event = event.Str("request_id", err.RequestID)
+	}
+
+	for k, v := range err.Context {
+		event = event.Interface(k, v)
+	}
+
+	if err.Err != nil {
+		event = event.Err(err.Err)
+	}
+
+	event.Msg(err.Message)
+}
diff --git a/services/media-api/migrations/000001_init_schema.down.sql b/services/media-api/migrations/000001_init_schema.down.sql
new file mode 100644
index 00000000..5cdc8373
--- /dev/null
+++ b/services/media-api/migrations/000001_init_schema.down.sql
@@ -0,0 +1,5 @@
+-- Drop tables
+DROP TABLE IF EXISTS media_api.media_objects CASCADE;
+
+-- Drop schema
+DROP SCHEMA IF EXISTS media_api CASCADE;
diff --git a/services/media-api/migrations/000001_init_schema.up.sql b/services/media-api/migrations/000001_init_schema.up.sql
new file mode 100644
index 00000000..23bdc282
--- /dev/null
+++ b/services/media-api/migrations/000001_init_schema.up.sql
@@ -0,0 +1,26 @@
+-- Create schema
+CREATE SCHEMA IF NOT EXISTS media_api;
+
+-- Set search path to media_api schema
+SET search_path TO media_api;
+
+-- ============================================================================
+-- MEDIA OBJECTS
+-- ============================================================================
+CREATE TABLE media_api.media_objects (
+    id VARCHAR(40) PRIMARY KEY,
+    storage_provider VARCHAR(32) NOT NULL,
+    storage_key VARCHAR(255) NOT NULL,
+    mime_type VARCHAR(64) NOT NULL,
+    bytes BIGINT NOT NULL,
+    sha256 CHAR(64) NOT NULL,
+    created_by VARCHAR(64),
+    retention_until TIMESTAMPTZ,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE UNIQUE INDEX idx_media_objects_sha256 ON media_api.media_objects(sha256);
+CREATE INDEX idx_media_objects_created_by ON media_api.media_objects(created_by);
+CREATE INDEX idx_media_objects_created_at ON media_api.media_objects(created_at);
+CREATE INDEX idx_media_objects_retention_until ON media_api.media_objects(retention_until) WHERE retention_until IS NOT NULL;
diff --git a/services/media-api/migrations/migrations.go b/services/media-api/migrations/migrations.go
new file mode 100644
index 00000000..91cca1c3
--- /dev/null
+++ b/services/media-api/migrations/migrations.go
@@ -0,0 +1,6 @@
+package migrations
+
+import "embed"
+
+//go:embed *.sql
+var FS embed.FS
diff --git a/services/media-api/utils/mediaid/mediaid.go b/services/media-api/utils/mediaid/mediaid.go
new file mode 100644
index 00000000..a6f0acc2
--- /dev/null
+++ b/services/media-api/utils/mediaid/mediaid.go
@@ -0,0 +1,46 @@
+package mediaid
+
+import (
+	"math/rand"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/oklog/ulid/v2"
+)
+
+var (
+	entropyOnce sync.Once
+	entropy     *ulid.MonotonicEntropy
+)
+
+func newEntropy() *ulid.MonotonicEntropy {
+	entropyOnce.Do(func() {
+		source := rand.NewSource(time.Now().UnixNano())
+		entropy = ulid.Monotonic(rand.New(source), 0)
+	})
+	return entropy
+}
+
+// New returns a jan_* ULID string.
+func New() string {
+	id := ulid.MustNew(ulid.Timestamp(time.Now()), newEntropy())
+	return "jan_" + strings.ToLower(id.String())
+}
+
+// IsValid reports whether the string is a jan_* ULID.
+func IsValid(value string) bool {
+	if !strings.HasPrefix(value, "jan_") {
+		return false
+	}
+	_, err := Parse(value)
+	return err == nil
+}
+
+// Parse strips the jan_ prefix and returns the ULID.
+func Parse(value string) (ulid.ULID, error) {
+	value = strings.TrimSpace(value)
+	value = strings.TrimPrefix(value, "jan_")
+	value = strings.TrimPrefix(value, "JAN_")
+	return ulid.Parse(value)
+}
diff --git a/services/memory-tools/Dockerfile b/services/memory-tools/Dockerfile
new file mode 100644
index 00000000..5818c202
--- /dev/null
+++ b/services/memory-tools/Dockerfile
@@ -0,0 +1,44 @@
+# Build stage
+ARG GO_VERSION=1.25
+
+FROM golang:${GO_VERSION}-alpine AS builder
+
+WORKDIR /app
+
+# Install build dependencies
+RUN apk add --no-cache git
+
+# Copy go mod files
+COPY go.mod go.sum ./
+RUN go mod download
+
+# Copy source code
+COPY . .
+
+# Build the binary
+RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o memory-tools ./cmd/server
+
+# Final stage
+FROM alpine:latest
+
+# Install runtime dependencies
+RUN apk --no-cache add ca-certificates curl
+
+WORKDIR /app
+
+# Copy the binary from builder
+COPY --from=builder /app/memory-tools .
+
+# Copy config and migrations
+COPY config ./config
+COPY migrations ./migrations
+
+# Expose port
+EXPOSE 8090
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+  CMD curl -fsS http://localhost:8090/healthz || exit 1
+
+# Run the binary
+CMD ["./memory-tools"]
diff --git a/services/memory-tools/INTERNAL_FEATURES.md b/services/memory-tools/INTERNAL_FEATURES.md
new file mode 100644
index 00000000..9b319b5f
--- /dev/null
+++ b/services/memory-tools/INTERNAL_FEATURES.md
@@ -0,0 +1,282 @@
+# Memory Tools - Internal Features Documentation
+
+This document describes features that are implemented internally but not exposed as HTTP endpoints.
+
+## Overview
+
+The memory-tools service has several advanced features that are used internally by the service logic but are not directly accessible via REST API. These features are designed to be building blocks for future functionality or are already integrated into existing endpoints.
+
+## Internal-Only Features
+
+### 1. LLM-Based Conversation Summarization
+
+**Location**: `internal/domain/memory/summarization.go`
+
+**Description**: Provides automatic conversation summarization using an LLM to extract:
+- Dialogue summaries (2-3 sentences)
+- Open tasks and action items
+- Entities (people, systems, tools)
+- Decisions and conclusions
+
+**Usage**: Internal to `Observe` endpoint for future automatic summarization.
+
+**Key Components**:
+```go
+type Summarizer struct {
+    config SummarizerConfig
+    llm    LLMClient
+}
+
+func (s *Summarizer) Summarize(ctx context.Context, messages []ConversationItem, previousSummary *ConversationSummary) (*SummarizationResult, error)
+func (s *Summarizer) MergeSummaries(previous *ConversationSummary, new *SummarizationResult) *ConversationSummary
+```
+
+**Configuration**:
+- `TriggerEveryN`: Summarize every N messages (default: 10)
+- `TriggerInterval`: Or every X duration (default: 5 minutes)
+- `MaxWindowSize`: Max messages per summary (default: 50)
+- `Temperature`: LLM temperature (default: 0.3)
+- `Model`: LLM model to use (default: gpt-4)
+
+**Why Internal**: Requires LLM integration and configuration that may not be available in all deployments.
+
+---
+
+### 2. LLM-Based Memory Action Planning
+
+**Location**: `internal/domain/action/planner.go`
+
+**Description**: Uses LLM to analyze conversations and intelligently decide what to store in memory, with:
+- Automatic memory extraction from natural language
+- Conflict detection with existing memories
+- Importance level assignment
+- Memory type classification (user/project/episodic)
+
+**Usage**: Can be integrated into `Observe` endpoint to replace simple pattern matching.
+
+**Key Components**:
+```go
+type Planner struct {
+    scorer *Scorer
+    llm    LLMClient
+    config PlannerConfig
+}
+
+func (p *Planner) PlanActions(ctx context.Context, req memory.MemoryObserveRequest, existingMemory *ExistingMemoryContext) (*memory.MemoryAction, error)
+```
+
+**Fallback**: Includes heuristic-based planning if LLM is unavailable.
+
+**Why Internal**: 
+- Requires LLM API configuration
+- Currently uses simple pattern matching in production
+- Can be enabled via configuration when LLM service is available
+
+---
+
+### 3. Advanced Importance Scoring
+
+**Location**: `internal/domain/action/scorer.go`
+
+**Description**: Analyzes text content to automatically determine importance levels based on:
+- Keyword detection (must, required, critical, security, etc.)
+- Scope and context analysis
+- Confidence scoring for project facts
+
+**Usage**: Used internally by memory upsert operations.
+
+**Key Components**:
+```go
+type Scorer struct{}
+
+func (s *Scorer) ScoreImportance(importance string) int
+func (s *Scorer) AnalyzeTextImportance(text string) string
+func (s *Scorer) ScoreUserMemoryItem(item *memory.UserMemoryItemInput) int
+func (s *Scorer) ScoreProjectFact(fact *memory.ProjectFactInput) float32
+```
+
+**Scoring Levels**:
+- Critical: 5 (security, API keys, passwords, must, required)
+- High: 4 (important, should, prefer, decision, requirement)
+- Medium: 3 (default)
+- Low: 2 (maybe, might, consider, optional)
+- Minimal: 1
+
+**Why Internal**: Automatically applied during memory storage, no need for direct API access.
+
+---
+
+### 4. BGE-M3 Sparse Embeddings
+
+**Location**: `internal/domain/embedding/client.go`
+
+**Description**: Supports BGE-M3 sparse vector embeddings in addition to dense embeddings for hybrid search capabilities.
+
+**Usage**: Internal to embedding client, not currently used in search.
+
+**Key Components**:
+```go
+type SparseEmbedding struct {
+    Indices []int     `json:"indices"`
+    Values  []float32 `json:"values"`
+}
+
+func (c *BGE_M3_Client) EmbedSparse(ctx context.Context, texts []string) ([]SparseEmbedding, error)
+```
+
+**Why Internal**: 
+- Requires BGE-M3 service with sparse embedding support
+- Not currently integrated into search ranking
+- Reserved for future hybrid search implementation
+
+---
+
+### 5. Multi-Vector Search Ranking
+
+**Location**: `internal/domain/search/ranking.go`
+
+**Description**: Combines dense, sparse, and colbert vectors for hybrid search ranking.
+
+**Usage**: Framework exists for future implementation.
+
+**Key Components**:
+```go
+type HybridRanker struct {
+    denseWeight   float32
+    sparseWeight  float32
+    colbertWeight float32
+}
+```
+
+**Default Weights**:
+- Dense: 0.7
+- Sparse: 0.2
+- Colbert: 0.1
+
+**Why Internal**: Awaiting full BGE-M3 integration with sparse/colbert support.
+
+---
+
+### 6. Vector Search Engine
+
+**Location**: `internal/domain/search/vector_search.go`
+
+**Description**: Advanced vector search with filtering, boosting, and hybrid ranking.
+
+**Usage**: Used internally by `Load` endpoint.
+
+**Why Internal**: Abstracted behind the `/v1/memory/load` API.
+
+---
+
+## Integration Status
+
+| Feature | Status | Exposed via API | Notes |
+|---------|--------|-----------------|-------|
+| **Summarization** | ✅ Implemented | ❌ No | Requires LLM integration |
+| **LLM Planner** | ✅ Implemented | ❌ No | Optional, has heuristic fallback |
+| **Importance Scoring** | ✅ Active | ✅ Indirect | Auto-applied in upsert |
+| **Sparse Embeddings** | ✅ Implemented | ❌ No | Awaiting BGE-M3 service |
+| **Hybrid Ranking** | 🔄 Partial | ❌ No | Framework ready |
+| **Vector Search** | ✅ Active | ✅ Yes | Via `/v1/memory/load` |
+
+## Testing Internal Features
+
+These features can be tested through:
+
+1. **Unit Tests**: Test each component in isolation
+2. **Integration Tests**: Test via the endpoints that use them
+3. **Direct Imports**: Import packages in Go test files
+
+### Example: Testing Summarization
+```go
+import "github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+
+func TestSummarization(t *testing.T) {
+    summarizer := memory.NewSummarizer(config, llmClient)
+    result, err := summarizer.Summarize(ctx, messages, nil)
+    // assertions...
+}
+```
+
+### Example: Testing via API
+```bash
+# Observe endpoint uses internal memory extraction
+curl -X POST http://localhost:8090/v1/memory/observe \
+  -H "Content-Type: application/json" \
+  -d '{
+    "user_id": "test_user",
+    "conversation_id": "test_conv",
+    "messages": [...]
+  }'
+
+# Load endpoint uses internal vector search
+curl -X POST http://localhost:8090/v1/memory/load \
+  -H "Content-Type: application/json" \
+  -d '{
+    "user_id": "test_user",
+    "query": "programming preferences"
+  }'
+```
+
+## Future Roadmap
+
+### Planned Endpoint Additions
+
+1. **POST /v1/memory/summarize**
+   - Explicit conversation summarization
+   - Input: conversation_id or messages
+   - Output: SummarizationResult
+
+2. **POST /v1/memory/extract**
+   - LLM-based memory extraction
+   - Input: conversation text
+   - Output: Suggested memory items
+
+3. **GET /v1/memory/conflicts**
+   - Detect conflicting memories
+   - Input: user_id, optional query
+   - Output: List of conflicts
+
+4. **POST /v1/embedding/sparse**
+   - Expose sparse embedding generation
+   - Input: texts array
+   - Output: Sparse vectors
+
+### Configuration Requirements
+
+To enable advanced features:
+
+```yaml
+# config.yaml
+memory_tools:
+  llm:
+    enabled: true
+    endpoint: "http://llm-service:8080"
+    model: "gpt-4"
+    temperature: 0.3
+  
+  embedding:
+    enable_sparse: true
+    enable_colbert: true
+  
+  summarization:
+    enabled: true
+    trigger_every_n: 10
+    trigger_interval: "5m"
+```
+
+## Contributing
+
+When adding new internal features:
+
+1. Document them in this file
+2. Include usage examples
+3. Explain why they're internal-only
+4. Provide roadmap for potential API exposure
+5. Add unit tests in the feature's package
+6. Add integration tests via existing APIs where applicable
+
+## Questions?
+
+See the main [README.md](./README.md) for general service documentation or check the [API documentation](../../docs/api/README.md).
diff --git a/services/memory-tools/Makefile b/services/memory-tools/Makefile
new file mode 100644
index 00000000..7e48d439
--- /dev/null
+++ b/services/memory-tools/Makefile
@@ -0,0 +1,18 @@
+.PHONY: fmt
+fmt:
+	@go fmt ./...
+
+.PHONY: test
+test:
+	@go test ./...
+
+.PHONY: tidy
+tidy:
+	@go mod tidy
+
+.PHONY: run
+run:
+	@go run ./cmd/server
+
+.PHONY: check
+check: fmt test
diff --git a/services/memory-tools/README.md b/services/memory-tools/README.md
new file mode 100644
index 00000000..4ead031a
--- /dev/null
+++ b/services/memory-tools/README.md
@@ -0,0 +1,240 @@
+# Memory Tools Service
+
+The Memory Tools service provides semantic memory capabilities for Jan Server using BGE-M3 embeddings.
+
+## Features
+
+- **BGE-M3 Integration**: Dense and sparse embeddings (1024-dimensional)
+- **Caching Layer**: Redis, in-memory, or no-cache options
+- **Batch Processing**: Efficient batch embedding (up to 32 items)
+- **Circuit Breaker**: Fault tolerance for embedding service failures
+- **Multi-language Support**: 100+ languages including English, Vietnamese, Chinese
+
+## Architecture
+
+```
+┌─────────────┐
+│ Memory Tools│
+│  Service    │
+└──────┬──────┘
+       │
+       │ HTTP Client
+       ▼
+┌──────────────┐
+│ BGE-M3       │
+│ Embedding    │
+│ Service      │
+└──────────────┘
+```
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Description | Default | Required |
+|----------|-------------|---------|----------|
+| `DB_POSTGRESQL_WRITE_DSN` | PostgreSQL connection string for write operations | - | Yes |
+| `DB_POSTGRESQL_READ1_DSN` | PostgreSQL connection string for read replica (optional) | - | No |
+| `EMBEDDING_SERVICE_URL` | URL of BGE-M3 embedding service | - | Yes |
+| `EMBEDDING_SERVICE_API_KEY` | API key for embedding service | - | No |
+| `EMBEDDING_SERVICE_TIMEOUT` | Request timeout | `30s` | No |
+| `EMBEDDING_CACHE_TYPE` | Cache type: `redis`, `memory`, `noop` | `redis` | No |
+| `EMBEDDING_CACHE_REDIS_URL` | Redis connection URL | `redis://redis:6379/3` | If cache=redis |
+| `EMBEDDING_CACHE_TTL` | Cache TTL | `1h` | No |
+| `EMBEDDING_CACHE_MAX_SIZE` | Max cache size (memory only) | `10000` | If cache=memory |
+| `MEMORY_TOOLS_PORT` | HTTP port | `8090` | No |
+
+### Example Configurations
+
+#### Production (with Redis cache)
+```bash
+DB_POSTGRESQL_WRITE_DSN=postgres://user:password@db-host:5432/jan_llm_api?sslmode=require
+# DB_POSTGRESQL_READ1_DSN=postgres://user:password@db-replica:5432/jan_llm_api?sslmode=require
+EMBEDDING_SERVICE_URL=http://bge-m3-service:8091
+EMBEDDING_CACHE_TYPE=redis
+EMBEDDING_CACHE_REDIS_URL=redis://redis:6379/3
+MEMORY_TOOLS_PORT=8090
+```
+
+#### Development (in-memory cache)
+```bash
+DB_POSTGRESQL_WRITE_DSN=postgres://jan_user:jan_password@localhost:5432/jan_llm_api?sslmode=disable
+EMBEDDING_SERVICE_URL=http://localhost:8091
+EMBEDDING_CACHE_TYPE=memory
+EMBEDDING_CACHE_MAX_SIZE=5000
+MEMORY_TOOLS_PORT=8090
+```
+
+#### Testing (no cache)
+```bash
+DB_POSTGRESQL_WRITE_DSN=postgres://jan_user:jan_password@localhost:5432/jan_llm_api?sslmode=disable
+EMBEDDING_SERVICE_URL=http://localhost:8091
+EMBEDDING_CACHE_TYPE=noop
+MEMORY_TOOLS_PORT=8090
+```
+
+## Running the Service
+
+### Local Development
+
+```bash
+# Set environment variables
+export DB_POSTGRESQL_WRITE_DSN=postgres://jan_user:jan_password@localhost:5432/jan_llm_api?sslmode=disable
+export EMBEDDING_SERVICE_URL=http://localhost:8091
+export EMBEDDING_CACHE_TYPE=memory
+
+# Run the service
+cd services/memory-tools
+go run cmd/server/main.go
+```
+
+### Docker
+
+```bash
+# Build the image
+docker build -t memory-tools:latest .
+
+# Run the container
+docker run -p 8090:8090 \
+  -e DB_POSTGRESQL_WRITE_DSN=postgres://jan_user:jan_password@api-db:5432/jan_llm_api?sslmode=disable \
+  -e EMBEDDING_SERVICE_URL=http://bge-m3:8091 \
+  -e EMBEDDING_CACHE_TYPE=redis \
+  -e EMBEDDING_CACHE_REDIS_URL=redis://redis:6379/3 \
+  memory-tools:latest
+```
+
+### Docker Compose
+
+```bash
+# Start all services including memory-tools
+docker-compose --profile memory up -d
+```
+
+## API Endpoints
+
+### Health Check
+```bash
+GET /healthz
+```
+
+**Response:**
+```json
+{
+  "status": "healthy",
+  "service": "memory-tools"
+}
+```
+
+### Test Embedding
+```bash
+POST /v1/embed/test
+```
+
+**Response:**
+```json
+{
+  "dimension": 1024,
+  "status": "ok"
+}
+```
+
+## Testing
+
+### Unit Tests
+```bash
+cd services/memory-tools
+go test ./...
+```
+
+### Integration Tests (jan-cli api-test)
+```bash
+# Make sure services are running
+docker-compose --profile memory up -d
+
+# Run tests
+jan-cli api-test run tests/automation/bge-m3-integration.postman_collection.json \
+  --env-var "embedding_service_url=http://localhost:8091"
+```
+
+## Performance
+
+### Expected Latencies (with GPU)
+
+| Operation | Target (p95) | Expected |
+|-----------|-------------|----------|
+| Single embed (cache miss) | 50ms | 30-40ms |
+| Single embed (cache hit) | 5ms | 1-2ms |
+| Batch embed (32 items) | 200ms | 150-180ms |
+
+### Cache Hit Rates
+
+- **MVP**: 30-40%
+- **Production**: 70-80%
+
+### Throughput
+
+- **T4 GPU**: 50-100 embeddings/sec
+- **A10G GPU**: 200-300 embeddings/sec
+- **A100 GPU**: 500+ embeddings/sec
+
+## Deployment Options
+
+### Option 1: External Embedding Service (Recommended)
+
+Users provide their own BGE-M3 inference server URL.
+
+**Pros:**
+- ✅ No GPU requirements for Jan Server
+- ✅ Users choose their own infrastructure
+- ✅ Easy to scale independently
+
+**Cons:**
+- ⚠️ Requires users to deploy BGE-M3 separately
+- ⚠️ Network latency if server is remote
+
+### Option 2: Self-Hosted with Jan Server
+
+Jan Server includes BGE-M3 deployment.
+
+**Pros:**
+- ✅ All-in-one deployment
+- ✅ No external dependencies
+
+**Cons:**
+- ⚠️ Requires GPU infrastructure
+- ⚠️ Higher infrastructure costs
+
+## Troubleshooting
+
+### Embedding service not healthy
+
+```bash
+# Check if embedding service is running
+curl http://localhost:8091/health
+
+# Check embedding service logs
+docker logs bge-m3-service
+```
+
+### Cache connection failed
+
+```bash
+# Check Redis connection
+redis-cli -u redis://redis:6379/3 ping
+
+# Switch to in-memory cache
+export EMBEDDING_CACHE_TYPE=memory
+```
+
+### High latency
+
+1. Check cache hit rate in logs
+2. Verify GPU is being used (if applicable)
+3. Consider increasing batch size
+4. Check network latency to embedding service
+
+## References
+
+- [BGE-M3 Model Card](https://huggingface.co/BAAI/bge-m3)
+- [Text Embeddings Inference](https://github.com/huggingface/text-embeddings-inference)
+- [Integration TODO](../../docs/todos/bge-m3-integration.md)
diff --git a/services/memory-tools/cmd/server/application.go b/services/memory-tools/cmd/server/application.go
new file mode 100644
index 00000000..439835ff
--- /dev/null
+++ b/services/memory-tools/cmd/server/application.go
@@ -0,0 +1,197 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+
+	"database/sql"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/configs"
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/embedding"
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+	"github.com/janhq/jan-server/services/memory-tools/internal/infrastructure/database/repository/memoryrepo"
+	"github.com/janhq/jan-server/services/memory-tools/internal/interfaces/httpserver/handlers"
+	"github.com/janhq/jan-server/services/memory-tools/internal/interfaces/httpserver/middleware"
+	"github.com/janhq/jan-server/services/memory-tools/internal/interfaces/httpserver/responses"
+	"github.com/rs/zerolog/log"
+	"gorm.io/driver/postgres"
+	"gorm.io/gorm"
+)
+
+type Application struct {
+	server *http.Server
+	db     *gorm.DB
+	sqlDB  *sql.DB
+}
+
+func newApplication(cfg *configs.Config) (*Application, error) {
+	ctx := context.Background()
+
+	db, err := gorm.Open(postgres.Open(cfg.GetDatabaseWriteDSN()), &gorm.Config{})
+	if err != nil {
+		return nil, fmt.Errorf("connect database: %w", err)
+	}
+
+	sqlDB, err := db.DB()
+	if err != nil {
+		return nil, fmt.Errorf("database handle: %w", err)
+	}
+
+	if err := db.WithContext(ctx).Raw("SELECT 1").Error; err != nil {
+		return nil, fmt.Errorf("ping database: %w", err)
+	}
+	log.Info().Msg("Database connection established")
+
+	if err := runMigrations(ctx, db, cfg.MigrationsDir); err != nil {
+		return nil, err
+	}
+	log.Info().Msg("Database migrations applied")
+
+	cacheConfig := embedding.CacheConfig{
+		Type:      cfg.EmbeddingCacheType,
+		RedisURL:  cfg.EmbeddingCacheRedisURL,
+		KeyPrefix: cfg.EmbeddingCacheKeyPrefix,
+		MaxSize:   cfg.EmbeddingCacheMaxSize,
+		TTL:       cfg.EmbeddingCacheTTL,
+	}
+
+	embeddingClient, err := embedding.NewBGE_M3_Client(cfg.EmbeddingServiceURL, cacheConfig)
+	if err != nil {
+		return nil, fmt.Errorf("create embedding client: %w", err)
+	}
+
+	if cfg.ValidateEmbedding {
+		validateCtx, cancel := context.WithTimeout(ctx, cfg.ValidateEmbeddingTimeout)
+		defer cancel()
+
+		if err := embeddingClient.ValidateServer(validateCtx); err != nil {
+			return nil, fmt.Errorf("validate embedding server: %w", err)
+		}
+		log.Info().Msg("Embedding server validated successfully")
+	}
+
+	repo := memoryrepo.NewRepository(db)
+	memoryService := memory.NewService(repo, embeddingClient)
+	memoryHandler := handlers.NewMemoryHandler(memoryService)
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/healthz", memoryHandler.HandleHealth)
+	mux.HandleFunc("/v1/memory/load", memoryHandler.HandleLoad)
+	mux.HandleFunc("/v1/memory/observe", memoryHandler.HandleObserve)
+	mux.HandleFunc("/v1/memory/stats", memoryHandler.HandleStats)
+	mux.HandleFunc("/v1/memory/export", memoryHandler.HandleExport)
+	mux.HandleFunc("/v1/memory/user/upsert", memoryHandler.HandleUserUpsert)
+	mux.HandleFunc("/v1/memory/project/upsert", memoryHandler.HandleProjectUpsert)
+	mux.HandleFunc("/v1/memory/delete", memoryHandler.HandleDelete)
+
+	mux.HandleFunc("/v1/embed/test", func(w http.ResponseWriter, r *http.Request) {
+		logger := log.Ctx(r.Context())
+		if logger == nil {
+			logger = &log.Logger
+		}
+
+		if r.Method != http.MethodPost {
+			responses.Error(w, r, http.StatusMethodNotAllowed, "method not allowed")
+			return
+		}
+
+		emb, err := embeddingClient.EmbedSingle(r.Context(), "test query")
+		if err != nil {
+			logger.Error().Err(err).Msg("Failed to embed test query")
+			responses.Error(w, r, http.StatusInternalServerError, "failed to embed test query")
+			return
+		}
+
+		responses.JSON(w, r, http.StatusOK, map[string]interface{}{
+			"dimension": len(emb),
+			"status":    "ok",
+		})
+	})
+
+	handler := middleware.TimeoutMiddleware(cfg.RequestTimeout)(mux)
+	handler = middleware.AuthMiddleware(cfg.APIKey)(handler)
+	handler = middleware.RequestIDMiddleware()(handler)
+
+	server := &http.Server{
+		Addr:         fmt.Sprintf(":%d", cfg.HTTPPort),
+		Handler:      handler,
+		ReadTimeout:  cfg.RequestTimeout,
+		WriteTimeout: cfg.RequestTimeout,
+		IdleTimeout:  cfg.IdleTimeout,
+	}
+
+	return &Application{
+		server: server,
+		db:     db,
+		sqlDB:  sqlDB,
+	}, nil
+}
+
+func (a *Application) Start(ctx context.Context) error {
+	log.Info().Msg("Starting Memory Tools Service")
+
+	errCh := make(chan error, 1)
+	go func() {
+		log.Info().Str("addr", a.server.Addr).Msg("Memory Tools Service listening")
+		if err := a.server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+			errCh <- err
+		}
+	}()
+
+	select {
+	case err := <-errCh:
+		return err
+	case <-ctx.Done():
+		log.Info().Msg("Shutdown signal received")
+	}
+
+	shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	if err := a.server.Shutdown(shutdownCtx); err != nil {
+		return fmt.Errorf("shutdown server: %w", err)
+	}
+
+	if a.sqlDB != nil {
+		_ = a.sqlDB.Close()
+	}
+
+	log.Info().Msg("Server exited")
+	return nil
+}
+
+func runMigrations(ctx context.Context, db *gorm.DB, dir string) error {
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return fmt.Errorf("read migrations directory: %w", err)
+	}
+
+	sort.Slice(entries, func(i, j int) bool {
+		return entries[i].Name() < entries[j].Name()
+	})
+
+	for _, entry := range entries {
+		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".sql") {
+			continue
+		}
+
+		path := filepath.Join(dir, entry.Name())
+		sqlBytes, err := os.ReadFile(path)
+		if err != nil {
+			return fmt.Errorf("read migration %s: %w", entry.Name(), err)
+		}
+
+		log.Info().Str("migration", entry.Name()).Msg("Applying migration")
+		if err := db.WithContext(ctx).Exec(string(sqlBytes)).Error; err != nil {
+			return fmt.Errorf("apply migration %s: %w", entry.Name(), err)
+		}
+	}
+
+	return nil
+}
diff --git a/services/memory-tools/cmd/server/server.go b/services/memory-tools/cmd/server/server.go
new file mode 100644
index 00000000..f2e583b7
--- /dev/null
+++ b/services/memory-tools/cmd/server/server.go
@@ -0,0 +1,42 @@
+package main
+
+import (
+	"context"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/configs"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+func main() {
+	zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
+
+	cfg, err := configs.Load()
+	if err != nil {
+		log.Fatal().Err(err).Msg("load config")
+	}
+
+	level, err := zerolog.ParseLevel(cfg.LogLevel)
+	if err != nil {
+		level = zerolog.InfoLevel
+	}
+	zerolog.SetGlobalLevel(level)
+	if cfg.LogFormat == "console" {
+		log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
+	}
+
+	app, err := CreateApplication(cfg)
+	if err != nil {
+		log.Fatal().Err(err).Msg("create application")
+	}
+
+	ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
+	defer stop()
+
+	if err := app.Start(ctx); err != nil && err != context.Canceled {
+		log.Fatal().Err(err).Msg("application exited with error")
+	}
+}
diff --git a/services/memory-tools/cmd/server/wire.go b/services/memory-tools/cmd/server/wire.go
new file mode 100644
index 00000000..b7d1076f
--- /dev/null
+++ b/services/memory-tools/cmd/server/wire.go
@@ -0,0 +1,14 @@
+//go:build wireinject
+// +build wireinject
+
+package main
+
+import (
+	"github.com/google/wire"
+	"github.com/janhq/jan-server/services/memory-tools/internal/configs"
+)
+
+func CreateApplication(cfg *configs.Config) (*Application, error) {
+	wire.Build(newApplication)
+	return nil, nil
+}
diff --git a/services/memory-tools/cmd/server/wire_gen.go b/services/memory-tools/cmd/server/wire_gen.go
new file mode 100644
index 00000000..5f20b541
--- /dev/null
+++ b/services/memory-tools/cmd/server/wire_gen.go
@@ -0,0 +1,14 @@
+// Code generated by Wire. DO NOT EDIT.
+
+//go:build !wireinject
+// +build !wireinject
+
+package main
+
+import "github.com/janhq/jan-server/services/memory-tools/internal/configs"
+
+// Injectors from wire.go:
+
+func CreateApplication(cfg *configs.Config) (*Application, error) {
+	return newApplication(cfg)
+}
diff --git a/services/memory-tools/config/config.yaml b/services/memory-tools/config/config.yaml
new file mode 100644
index 00000000..58428946
--- /dev/null
+++ b/services/memory-tools/config/config.yaml
@@ -0,0 +1,86 @@
+# Memory Tools Service Configuration
+
+# Service settings
+service:
+  name: memory-tools
+  port: 8090
+  log_level: info
+  log_format: json
+
+# Database configuration
+database:
+  url: postgres://jan_user:password@postgres:5432/jan_memory?sslmode=disable
+  max_connections: 50
+  max_idle_connections: 10
+  connection_max_lifetime: 30m
+
+# Embedding service configuration
+embedding:
+  base_url: http://bge-m3-service:8091
+  api_key: ""
+  timeout: 30s
+  validate_on_startup: true
+  expected_model: BAAI/bge-m3
+  expected_dimension: 1024
+  
+  # Retry configuration
+  retry:
+    enabled: true
+    max_attempts: 3
+    initial_backoff: 1s
+    max_backoff: 10s
+  
+  # Cache configuration
+  cache:
+    enabled: true
+    type: redis  # redis, memory, noop
+    
+    redis:
+      url: redis://redis:6379/3
+      key_prefix: "emb:"
+      ttl: 1h
+    
+    memory:
+      max_size: 10000
+      ttl: 1h
+  
+  # Batch processing
+  batch:
+    enabled: true
+    max_size: 32
+    timeout: 5s
+  
+  # Circuit breaker
+  circuit_breaker:
+    enabled: true
+    threshold: 5
+    timeout: 30s
+    max_concurrent: 100
+
+# Memory configuration
+memory:
+  # Search settings
+  search:
+    default_limit: 20
+    min_similarity: 0.5
+    max_user_items: 20
+    max_project_items: 20
+    max_episodic_items: 20
+  
+  # Ranking weights
+  ranking:
+    dense_weight: 0.7
+    sparse_weight: 0.2
+    lexical_weight: 0.1
+  
+  # Episodic memory retention
+  episodic:
+    retention_days: 14
+    max_events_per_user: 1000
+
+# API settings
+api:
+  timeout: 30s
+  max_request_size: 10MB
+  enable_auth: false
+  api_key: ""
diff --git a/services/memory-tools/go.mod b/services/memory-tools/go.mod
new file mode 100644
index 00000000..3aa7e027
--- /dev/null
+++ b/services/memory-tools/go.mod
@@ -0,0 +1,34 @@
+module github.com/janhq/jan-server/services/memory-tools
+
+go 1.25.0
+
+require (
+	github.com/caarlos0/env/v10 v10.0.0
+	github.com/go-redsync/redsync/v4 v4.13.0
+	github.com/google/uuid v1.6.0
+	github.com/google/wire v0.7.0
+	github.com/hashicorp/golang-lru v1.0.2
+	github.com/jackc/pgx/v5 v5.7.6
+	github.com/redis/go-redis/v9 v9.7.0
+	github.com/rs/zerolog v1.34.0
+	gorm.io/driver/postgres v1.5.7
+	gorm.io/gorm v1.25.10
+)
+
+require (
+	github.com/cespare/xxhash/v2 v2.2.0 // indirect
+	github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
+	github.com/hashicorp/errwrap v1.1.0 // indirect
+	github.com/hashicorp/go-multierror v1.1.1 // indirect
+	github.com/jackc/pgpassfile v1.0.0 // indirect
+	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
+	github.com/jackc/puddle/v2 v2.2.2 // indirect
+	github.com/jinzhu/inflection v1.0.0 // indirect
+	github.com/jinzhu/now v1.1.5 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.19 // indirect
+	golang.org/x/crypto v0.45.0 // indirect
+	golang.org/x/sync v0.18.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+)
diff --git a/services/memory-tools/go.sum b/services/memory-tools/go.sum
new file mode 100644
index 00000000..c478030b
--- /dev/null
+++ b/services/memory-tools/go.sum
@@ -0,0 +1,89 @@
+github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
+github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
+github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
+github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
+github.com/caarlos0/env/v10 v10.0.0 h1:yIHUBZGsyqCnpTkbjk8asUlx6RFhhEs+h7TOBdgdzXA=
+github.com/caarlos0/env/v10 v10.0.0/go.mod h1:ZfulV76NvVPw3tm591U4SwL3Xx9ldzBP9aGxzeN7G18=
+github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
+github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
+github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
+github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg=
+github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA=
+github.com/go-redis/redis/v7 v7.4.1 h1:PASvf36gyUpr2zdOUS/9Zqc80GbM+9BDyiJSJDDOrTI=
+github.com/go-redis/redis/v7 v7.4.1/go.mod h1:JDNMw23GTyLNC4GZu9njt15ctBQVn7xjRfnwdHj/Dcg=
+github.com/go-redis/redis/v8 v8.11.5 h1:AcZZR7igkdvfVmQTPnu9WE37LRrO/YrBH5zWyjDC0oI=
+github.com/go-redis/redis/v8 v8.11.5/go.mod h1:gREzHqY1hg6oD9ngVRbLStwAWKhA0FEgq8Jd4h5lpwo=
+github.com/go-redsync/redsync/v4 v4.13.0 h1:49X6GJfnbLGaIpBBREM/zA4uIMDXKAh1NDkvQ1EkZKA=
+github.com/go-redsync/redsync/v4 v4.13.0/go.mod h1:HMW4Q224GZQz6x1Xc7040Yfgacukdzu7ifTDAKiyErQ=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/gomodule/redigo v1.8.9 h1:Sl3u+2BI/kk+VEatbj0scLdrFhjPmbxOc1myhDP41ws=
+github.com/gomodule/redigo v1.8.9/go.mod h1:7ArFNvsTjH8GMMzB4uy1snslv2BwmginuMs06a1uzZE=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4=
+github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18=
+github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
+github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
+github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
+github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c=
+github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
+github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
+github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
+github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
+github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
+github.com/jackc/pgx/v5 v5.7.6 h1:rWQc5FwZSPX58r1OQmkuaNicxdmExaEz5A2DO2hUuTk=
+github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
+github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
+github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
+github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
+github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
+github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
+github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/redis/go-redis/v9 v9.7.0 h1:HhLSs+B6O021gwzl+locl0zEDnyNkxMtf/Z3NNBMa9E=
+github.com/redis/go-redis/v9 v9.7.0/go.mod h1:f6zhXITC7JUJIlPEiBOTXxJgPLdZcA93GewI7inzyWw=
+github.com/redis/rueidis v1.0.19 h1:s65oWtotzlIFN8eMPhyYwxlwLR1lUdhza2KtWprKYSo=
+github.com/redis/rueidis v1.0.19/go.mod h1:8B+r5wdnjwK3lTFml5VtxjzGOQAC+5UmujoD12pDrEo=
+github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
+github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
+github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stvp/tempredis v0.0.0-20181119212430-b82af8480203 h1:QVqDTf3h2WHt08YuiTGPZLls0Wq99X9bWd0Q5ZSBesM=
+github.com/stvp/tempredis v0.0.0-20181119212430-b82af8480203/go.mod h1:oqN97ltKNihBbwlX8dLpwxCl3+HnXKV/R0e+sRLd9C8=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gorm.io/driver/postgres v1.5.7 h1:8ptbNJTDbEmhdr62uReG5BGkdQyeasu/FZHxI0IMGnM=
+gorm.io/driver/postgres v1.5.7/go.mod h1:3e019WlBaYI5o5LIdNV+LyxCMNtLOQETBXL2h4chKpA=
+gorm.io/gorm v1.25.10 h1:dQpO+33KalOA+aFYGlK+EfxcI5MbO7EP2yYygwh9h+s=
+gorm.io/gorm v1.25.10/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
diff --git a/services/memory-tools/internal/configs/config.go b/services/memory-tools/internal/configs/config.go
new file mode 100644
index 00000000..6b0cc1a7
--- /dev/null
+++ b/services/memory-tools/internal/configs/config.go
@@ -0,0 +1,70 @@
+package configs
+
+import (
+	"strings"
+	"time"
+
+	"github.com/caarlos0/env/v10"
+)
+
+var global *Config
+
+type Config struct {
+	HTTPPort int `env:"MEMORY_TOOLS_PORT" envDefault:"8090"`
+
+	// Database - Read/Write Split (required, no default)
+	DBPostgresqlWriteDSN string `env:"DB_POSTGRESQL_WRITE_DSN,notEmpty"`
+	DBPostgresqlRead1DSN string `env:"DB_POSTGRESQL_READ1_DSN"` // Optional read replica
+
+	EmbeddingServiceURL     string        `env:"EMBEDDING_SERVICE_URL" envDefault:"http://localhost:8091"`
+	EmbeddingCacheType      string        `env:"EMBEDDING_CACHE_TYPE" envDefault:"memory"`
+	EmbeddingCacheTTL       time.Duration `env:"EMBEDDING_CACHE_TTL" envDefault:"1h"`
+	EmbeddingCacheMaxSize   int           `env:"EMBEDDING_CACHE_MAX_SIZE" envDefault:"10000"`
+	EmbeddingCacheRedisURL  string        `env:"EMBEDDING_CACHE_REDIS_URL" envDefault:"redis://redis:6379/3"`
+	EmbeddingCacheKeyPrefix string        `env:"EMBEDDING_CACHE_KEY_PREFIX" envDefault:"emb:"`
+
+	ValidateEmbedding        bool          `env:"VALIDATE_EMBEDDING_ON_START" envDefault:"true"`
+	ValidateEmbeddingTimeout time.Duration `env:"VALIDATE_EMBEDDING_TIMEOUT" envDefault:"10s"`
+
+	RequestTimeout time.Duration `env:"REQUEST_TIMEOUT" envDefault:"30s"`
+	IdleTimeout    time.Duration `env:"IDLE_TIMEOUT" envDefault:"120s"`
+
+	APIKey string `env:"MEMORY_TOOLS_API_KEY"`
+
+	LogLevel  string `env:"LOG_LEVEL" envDefault:"info"`
+	LogFormat string `env:"LOG_FORMAT" envDefault:"console"`
+
+	MigrationsDir string `env:"MIGRATIONS_DIR" envDefault:"migrations"`
+}
+
+func Load() (*Config, error) {
+	cfg := &Config{}
+	if err := env.Parse(cfg); err != nil {
+		return nil, err
+	}
+
+	cfg.LogLevel = strings.ToLower(strings.TrimSpace(cfg.LogLevel))
+	cfg.LogFormat = strings.ToLower(strings.TrimSpace(cfg.LogFormat))
+
+	global = cfg
+	return cfg, nil
+}
+
+func GetGlobal() *Config {
+	return global
+}
+
+// GetDatabaseWriteDSN returns the write database connection string.
+func (c *Config) GetDatabaseWriteDSN() string {
+	return c.DBPostgresqlWriteDSN
+}
+
+// GetDatabaseReadDSN returns the read database connection string.
+// If DB_POSTGRESQL_READ1_DSN is set, it returns that.
+// Otherwise, falls back to write DSN (no replica configured).
+func (c *Config) GetDatabaseReadDSN() string {
+	if c.DBPostgresqlRead1DSN != "" {
+		return c.DBPostgresqlRead1DSN
+	}
+	return c.GetDatabaseWriteDSN()
+}
diff --git a/services/memory-tools/internal/domain/action/planner.go b/services/memory-tools/internal/domain/action/planner.go
new file mode 100644
index 00000000..4604de43
--- /dev/null
+++ b/services/memory-tools/internal/domain/action/planner.go
@@ -0,0 +1,501 @@
+package action
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+	"github.com/rs/zerolog/log"
+)
+
+// Planner handles memory action planning using LLM
+type Planner struct {
+	scorer *Scorer
+	llm    memory.LLMClient
+	config PlannerConfig
+}
+
+// PlannerConfig holds configuration for the planner
+type PlannerConfig struct {
+	Model           string
+	Temperature     float32
+	MaxTokens       int
+	UseHeuristics   bool // Fallback to heuristics if LLM fails
+	IncludeContext  bool // Include existing memory in prompt
+	DetectConflicts bool // Enable conflict detection
+}
+
+// LLMMemoryActionResponse represents the structured LLM response
+type LLMMemoryActionResponse struct {
+	Delete []string `json:"delete"`
+	Add    struct {
+		UserMemory    []memory.UserMemoryItemInput `json:"user_memory"`
+		ProjectMemory []memory.ProjectFactInput    `json:"project_memory"`
+		Episodic      []memory.EpisodicEventInput  `json:"episodic_memory"`
+	} `json:"add"`
+	Reasoning string `json:"reasoning,omitempty"`
+}
+
+// NewPlanner creates a new LLM-based memory action planner
+func NewPlanner(llm memory.LLMClient, config PlannerConfig) *Planner {
+	// Set defaults
+	if config.Model == "" {
+		config.Model = "gpt-4"
+	}
+	if config.Temperature == 0 {
+		config.Temperature = 0.3
+	}
+	if config.MaxTokens == 0 {
+		config.MaxTokens = 2000
+	}
+
+	return &Planner{
+		scorer: NewScorer(),
+		llm:    llm,
+		config: config,
+	}
+}
+
+// PlanActions analyzes conversation and determines memory actions using LLM
+func (p *Planner) PlanActions(ctx context.Context, req memory.MemoryObserveRequest, existingMemory *ExistingMemoryContext) (*memory.MemoryAction, error) {
+	// Try LLM-based planning first
+	if p.llm != nil {
+		action, err := p.planWithLLM(ctx, req, existingMemory)
+		if err == nil {
+			return action, nil
+		}
+
+		log.Warn().Err(err).Msg("LLM-based planning failed, falling back to heuristics")
+	}
+
+	// Fallback to heuristics if LLM fails or is disabled
+	if p.config.UseHeuristics {
+		return p.planWithHeuristics(ctx, req), nil
+	}
+
+	return nil, fmt.Errorf("LLM planning failed and heuristics disabled")
+}
+
+// planWithLLM uses LLM to analyze conversation and plan memory actions
+func (p *Planner) planWithLLM(ctx context.Context, req memory.MemoryObserveRequest, existingMemory *ExistingMemoryContext) (*memory.MemoryAction, error) {
+	// Build prompt
+	prompt := p.buildMemoryActionPrompt(req, existingMemory)
+
+	log.Debug().
+		Int("message_count", len(req.Messages)).
+		Bool("has_existing_memory", existingMemory != nil).
+		Msg("Planning memory actions with LLM")
+
+	// Call LLM
+	response, err := p.llm.Complete(ctx, prompt, memory.LLMOptions{
+		Model:          p.config.Model,
+		Temperature:    p.config.Temperature,
+		MaxTokens:      p.config.MaxTokens,
+		ResponseFormat: "json",
+	})
+	if err != nil {
+		return nil, fmt.Errorf("llm completion failed: %w", err)
+	}
+
+	// Parse JSON response
+	var llmResp LLMMemoryActionResponse
+	if err := json.Unmarshal([]byte(response), &llmResp); err != nil {
+		return nil, fmt.Errorf("failed to parse LLM response: %w", err)
+	}
+
+	// Convert to MemoryAction
+	action := &memory.MemoryAction{
+		Add: memory.MemoryAddActions{
+			UserMemory:    llmResp.Add.UserMemory,
+			ProjectMemory: llmResp.Add.ProjectMemory,
+			Episodic:      llmResp.Add.Episodic,
+		},
+		Delete: llmResp.Delete,
+	}
+
+	// Apply scoring enhancements
+	p.enhanceScoring(action, req)
+
+	// Detect conflicts if enabled
+	if p.config.DetectConflicts && existingMemory != nil {
+		p.detectAndResolveConflicts(action, existingMemory)
+	}
+
+	log.Info().
+		Int("user_memory_add", len(action.Add.UserMemory)).
+		Int("project_memory_add", len(action.Add.ProjectMemory)).
+		Int("episodic_add", len(action.Add.Episodic)).
+		Int("delete", len(action.Delete)).
+		Str("reasoning", llmResp.Reasoning).
+		Msg("Memory actions planned with LLM")
+
+	return action, nil
+}
+
+// buildMemoryActionPrompt constructs the LLM prompt for memory action planning
+func (p *Planner) buildMemoryActionPrompt(req memory.MemoryObserveRequest, existingMemory *ExistingMemoryContext) string {
+	prompt := `You are a memory management system. Analyze the conversation and decide what information should be stored in long-term memory.
+
+Your task:
+1. Identify facts worth remembering (user preferences, project decisions, important context)
+2. Detect contradictions with existing memory
+3. Assign appropriate importance/confidence levels
+4. Create episodic events for significant interactions
+
+Rules:
+- Only promote facts mentioned explicitly or repeatedly
+- Mark contradictions for deletion
+- Assign importance: low, medium, high, critical
+- Assign confidence: 0.0-1.0 for project facts
+- Be conservative - don't store trivial information
+
+`
+
+	// Add existing memory context if available
+	if p.config.IncludeContext && existingMemory != nil {
+		prompt += p.formatExistingMemory(existingMemory)
+	}
+
+	// Add conversation
+	prompt += "Recent Conversation:\n"
+	for _, msg := range req.Messages {
+		prompt += fmt.Sprintf("%s: %s\n", msg.Role, msg.Content)
+	}
+
+	// Add response format
+	prompt += `
+Return your analysis as JSON with this exact structure:
+{
+  "delete": ["memory_id_1", "memory_id_2"],
+  "add": {
+    "user_memory": [
+      {
+        "scope": "preference|profile|skill|other",
+        "key": "descriptive_key",
+        "text": "the fact to remember",
+        "importance": "low|medium|high|critical"
+      }
+    ],
+    "project_memory": [
+      {
+        "kind": "decision|assumption|risk|metric|fact",
+        "title": "short title",
+        "text": "detailed description",
+        "confidence": 0.8
+      }
+    ],
+    "episodic_memory": [
+      {
+        "kind": "tool_result|decision|incident|milestone",
+        "text": "what happened"
+      }
+    ]
+  },
+  "reasoning": "brief explanation of your decisions"
+}
+
+Only include items that are truly worth remembering. Empty arrays are fine.
+Ensure the response is valid JSON.`
+
+	return prompt
+}
+
+// formatExistingMemory formats existing memory for the prompt
+func (p *Planner) formatExistingMemory(existing *ExistingMemoryContext) string {
+	var builder strings.Builder
+
+	builder.WriteString("Existing Memory (check for contradictions):\n\n")
+
+	if len(existing.UserMemory) > 0 {
+		builder.WriteString("User Memory:\n")
+		for _, item := range existing.UserMemory {
+			builder.WriteString(fmt.Sprintf("- [%s] %s: %s (score: %d)\n",
+				item.ID, item.Scope, item.Text, item.Score))
+		}
+		builder.WriteString("\n")
+	}
+
+	if len(existing.ProjectFacts) > 0 {
+		builder.WriteString("Project Facts:\n")
+		for _, fact := range existing.ProjectFacts {
+			builder.WriteString(fmt.Sprintf("- [%s] %s: %s (confidence: %.2f)\n",
+				fact.ID, fact.Kind, fact.Text, fact.Confidence))
+		}
+		builder.WriteString("\n")
+	}
+
+	return builder.String()
+}
+
+// enhanceScoring applies additional scoring logic based on conversation patterns
+func (p *Planner) enhanceScoring(action *memory.MemoryAction, req memory.MemoryObserveRequest) {
+	// Check for explicit "remember" commands
+	hasExplicitRemember := false
+	for _, msg := range req.Messages {
+		if msg.Role == "user" {
+			content := strings.ToLower(msg.Content)
+			if strings.Contains(content, "remember") || strings.Contains(content, "don't forget") {
+				hasExplicitRemember = true
+				break
+			}
+		}
+	}
+
+	// Boost importance for explicit remember commands
+	if hasExplicitRemember {
+		for i := range action.Add.UserMemory {
+			if action.Add.UserMemory[i].Importance == "medium" {
+				action.Add.UserMemory[i].Importance = "high"
+			} else if action.Add.UserMemory[i].Importance == "low" {
+				action.Add.UserMemory[i].Importance = "medium"
+			}
+		}
+
+		for i := range action.Add.ProjectMemory {
+			newConfidence := action.Add.ProjectMemory[i].Confidence + 0.1
+			if newConfidence > 1.0 {
+				newConfidence = 1.0
+			}
+			action.Add.ProjectMemory[i].Confidence = newConfidence
+		}
+	}
+}
+
+// detectAndResolveConflicts detects contradictions and resolves them
+func (p *Planner) detectAndResolveConflicts(action *memory.MemoryAction, existing *ExistingMemoryContext) {
+	// Check new user memory against existing
+	for _, newItem := range action.Add.UserMemory {
+		for _, existingItem := range existing.UserMemory {
+			// Same scope and key suggests potential conflict
+			if newItem.Scope == existingItem.Scope && newItem.Key == existingItem.Key {
+				// Check if texts are different (potential contradiction)
+				if !strings.EqualFold(newItem.Text, existingItem.Text) {
+					// Mark old item for deletion
+					action.Delete = append(action.Delete, existingItem.ID)
+					log.Info().
+						Str("old_id", existingItem.ID).
+						Str("old_text", existingItem.Text).
+						Str("new_text", newItem.Text).
+						Msg("Detected contradiction, marking old memory for deletion")
+				}
+			}
+		}
+	}
+
+	// Check new project facts against existing
+	for _, newFact := range action.Add.ProjectMemory {
+		for _, existingFact := range existing.ProjectFacts {
+			// Same kind and similar title suggests potential conflict
+			if newFact.Kind == existingFact.Kind {
+				// Simple similarity check (could be enhanced with embeddings)
+				if strings.Contains(strings.ToLower(existingFact.Title), strings.ToLower(newFact.Title)) ||
+					strings.Contains(strings.ToLower(newFact.Title), strings.ToLower(existingFact.Title)) {
+
+					// If texts are different, it might be an update
+					if !strings.EqualFold(newFact.Text, existingFact.Text) {
+						// Reduce confidence of old fact instead of deleting
+						action.Delete = append(action.Delete, existingFact.ID)
+						log.Info().
+							Str("old_id", existingFact.ID).
+							Str("kind", existingFact.Kind).
+							Msg("Detected potential update, marking old fact for deletion")
+					}
+				}
+			}
+		}
+	}
+}
+
+// planWithHeuristics is the fallback heuristic-based planning
+func (p *Planner) planWithHeuristics(ctx context.Context, req memory.MemoryObserveRequest) *memory.MemoryAction {
+	action := &memory.MemoryAction{
+		Add: memory.MemoryAddActions{
+			UserMemory:    []memory.UserMemoryItemInput{},
+			ProjectMemory: []memory.ProjectFactInput{},
+			Episodic:      []memory.EpisodicEventInput{},
+		},
+		Delete: []string{},
+	}
+
+	// Analyze each message
+	for _, msg := range req.Messages {
+		if msg.Role == "user" {
+			p.analyzeUserMessage(msg.Content, action, req.ProjectID)
+		}
+
+		// Always create episodic event for the interaction
+		action.Add.Episodic = append(action.Add.Episodic, memory.EpisodicEventInput{
+			Text: formatEpisodicText(msg.Role, msg.Content),
+			Kind: "interaction",
+		})
+	}
+
+	return action
+}
+
+// analyzeUserMessage analyzes a user message for memory extraction (heuristic)
+func (p *Planner) analyzeUserMessage(content string, action *memory.MemoryAction, projectID string) {
+	content = strings.TrimSpace(content)
+	if content == "" {
+		return
+	}
+
+	contentLower := strings.ToLower(content)
+
+	// Detect user preferences
+	if p.isPreference(contentLower) {
+		importance := p.scorer.AnalyzeTextImportance(content)
+		action.Add.UserMemory = append(action.Add.UserMemory, memory.UserMemoryItemInput{
+			Scope:      "preference",
+			Key:        "user_preference",
+			Text:       content,
+			Importance: importance,
+		})
+	}
+
+	// Detect project decisions (only if project_id is set)
+	if projectID != "" && p.isDecision(contentLower) {
+		confidence := p.calculateConfidence(content)
+		action.Add.ProjectMemory = append(action.Add.ProjectMemory, memory.ProjectFactInput{
+			Kind:       "decision",
+			Title:      extractTitle(content),
+			Text:       content,
+			Confidence: confidence,
+		})
+	}
+
+	// Detect requirements
+	if projectID != "" && p.isRequirement(contentLower) {
+		confidence := p.calculateConfidence(content)
+		action.Add.ProjectMemory = append(action.Add.ProjectMemory, memory.ProjectFactInput{
+			Kind:       "assumption",
+			Title:      extractTitle(content),
+			Text:       content,
+			Confidence: confidence,
+		})
+	}
+
+	// Detect constraints
+	if projectID != "" && p.isConstraint(contentLower) {
+		confidence := p.calculateConfidence(content)
+		action.Add.ProjectMemory = append(action.Add.ProjectMemory, memory.ProjectFactInput{
+			Kind:       "risk",
+			Title:      extractTitle(content),
+			Text:       content,
+			Confidence: confidence,
+		})
+	}
+}
+
+// Pattern detection functions (heuristic)
+
+func (p *Planner) isPreference(text string) bool {
+	patterns := []string{
+		"i prefer", "i like", "i love", "i want",
+		"i always", "i usually", "i typically",
+		"my preference", "i'd rather",
+	}
+
+	for _, pattern := range patterns {
+		if strings.Contains(text, pattern) {
+			return true
+		}
+	}
+	return false
+}
+
+func (p *Planner) isDecision(text string) bool {
+	patterns := []string{
+		"we should", "let's use", "we'll use", "we decided",
+		"we're going to", "we will", "let's go with",
+		"we chose", "we selected",
+	}
+
+	for _, pattern := range patterns {
+		if strings.Contains(text, pattern) {
+			return true
+		}
+	}
+	return false
+}
+
+func (p *Planner) isRequirement(text string) bool {
+	patterns := []string{
+		"we need", "must have", "required", "requirement",
+		"has to", "needs to", "should support",
+	}
+
+	for _, pattern := range patterns {
+		if strings.Contains(text, pattern) {
+			return true
+		}
+	}
+	return false
+}
+
+func (p *Planner) isConstraint(text string) bool {
+	patterns := []string{
+		"can't", "cannot", "must not", "shouldn't",
+		"limited to", "restricted", "constraint",
+		"not allowed", "forbidden",
+	}
+
+	for _, pattern := range patterns {
+		if strings.Contains(text, pattern) {
+			return true
+		}
+	}
+	return false
+}
+
+// Helper functions
+
+func (p *Planner) calculateConfidence(text string) float32 {
+	// Higher confidence for definitive statements
+	textLower := strings.ToLower(text)
+
+	if strings.Contains(textLower, "definitely") || strings.Contains(textLower, "certainly") {
+		return 0.95
+	}
+	if strings.Contains(textLower, "probably") || strings.Contains(textLower, "likely") {
+		return 0.75
+	}
+	if strings.Contains(textLower, "maybe") || strings.Contains(textLower, "might") {
+		return 0.6
+	}
+
+	return 0.8 // Default confidence
+}
+
+func extractTitle(text string) string {
+	// Extract first sentence or first 50 characters as title
+	sentences := strings.Split(text, ".")
+	if len(sentences) > 0 {
+		title := strings.TrimSpace(sentences[0])
+		if len(title) > 100 {
+			title = title[:97] + "..."
+		}
+		return title
+	}
+
+	if len(text) > 100 {
+		return text[:97] + "..."
+	}
+	return text
+}
+
+func formatEpisodicText(role, content string) string {
+	// Truncate long content for episodic events
+	if len(content) > 500 {
+		content = content[:497] + "..."
+	}
+	return role + ": " + content
+}
+
+// ExistingMemoryContext holds existing memory for conflict detection
+type ExistingMemoryContext struct {
+	UserMemory   []memory.UserMemoryItem
+	ProjectFacts []memory.ProjectFact
+}
diff --git a/services/memory-tools/internal/domain/action/scorer.go b/services/memory-tools/internal/domain/action/scorer.go
new file mode 100644
index 00000000..a00907c6
--- /dev/null
+++ b/services/memory-tools/internal/domain/action/scorer.go
@@ -0,0 +1,115 @@
+package action
+
+import (
+	"math"
+	"strings"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+)
+
+// Scorer handles importance scoring for memory items
+type Scorer struct{}
+
+// NewScorer creates a new scorer
+func NewScorer() *Scorer {
+	return &Scorer{}
+}
+
+// ScoreImportance converts importance string to numeric score
+func (s *Scorer) ScoreImportance(importance string) int {
+	switch strings.ToLower(importance) {
+	case "critical":
+		return 5
+	case "high":
+		return 4
+	case "medium":
+		return 3
+	case "low":
+		return 2
+	case "minimal":
+		return 1
+	default:
+		return 3 // Default to medium
+	}
+}
+
+// ScoreUserMemoryItem scores a user memory item based on various factors
+func (s *Scorer) ScoreUserMemoryItem(item *memory.UserMemoryItemInput) int {
+	score := s.ScoreImportance(item.Importance)
+
+	// Adjust based on scope
+	switch item.Scope {
+	case "core":
+		if score < 5 {
+			score++ // Core facts are more important
+		}
+	case "preference":
+		// Keep as is
+	case "context":
+		if score > 1 {
+			score-- // Context is less permanent
+		}
+	}
+
+	return score
+}
+
+// ScoreProjectFact scores a project fact based on confidence and kind
+func (s *Scorer) ScoreProjectFact(fact *memory.ProjectFactInput) float32 {
+	confidence := fact.Confidence
+
+	// Adjust based on kind
+	switch fact.Kind {
+	case "decision":
+		confidence = float32(math.Min(float64(confidence+0.1), 1.0)) // Decisions are more important
+	case "requirement":
+		confidence = float32(math.Min(float64(confidence+0.05), 1.0))
+	case "constraint":
+		// Keep as is
+	case "context":
+		confidence = float32(math.Max(float64(confidence-0.1), 0.0))
+	}
+
+	return confidence
+}
+
+// AnalyzeTextImportance analyzes text to determine importance
+func (s *Scorer) AnalyzeTextImportance(text string) string {
+	text = strings.ToLower(text)
+
+	// Critical indicators
+	criticalKeywords := []string{
+		"must", "required", "critical", "essential", "mandatory",
+		"always", "never", "security", "password", "api key",
+	}
+	for _, keyword := range criticalKeywords {
+		if strings.Contains(text, keyword) {
+			return "critical"
+		}
+	}
+
+	// High importance indicators
+	highKeywords := []string{
+		"important", "should", "prefer", "recommend",
+		"decision", "requirement", "constraint",
+	}
+	for _, keyword := range highKeywords {
+		if strings.Contains(text, keyword) {
+			return "high"
+		}
+	}
+
+	// Low importance indicators
+	lowKeywords := []string{
+		"maybe", "might", "consider", "optional",
+		"nice to have", "if possible",
+	}
+	for _, keyword := range lowKeywords {
+		if strings.Contains(text, keyword) {
+			return "low"
+		}
+	}
+
+	// Default to medium
+	return "medium"
+}
diff --git a/services/memory-tools/internal/domain/embedding/batcher.go b/services/memory-tools/internal/domain/embedding/batcher.go
new file mode 100644
index 00000000..0943b343
--- /dev/null
+++ b/services/memory-tools/internal/domain/embedding/batcher.go
@@ -0,0 +1,148 @@
+package embedding
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/rs/zerolog/log"
+)
+
+// Batcher handles batch processing of embedding requests
+type Batcher struct {
+	client    Client
+	batchSize int
+	timeout   time.Duration
+
+	mu       sync.Mutex
+	queue    []batchItem
+	timer    *time.Timer
+	stopCh   chan struct{}
+	resultCh chan batchResult
+}
+
+type batchItem struct {
+	text     string
+	resultCh chan<- batchResult
+}
+
+type batchResult struct {
+	embedding []float32
+	err       error
+}
+
+// NewBatcher creates a new embedding batcher
+func NewBatcher(client Client, batchSize int, timeout time.Duration) *Batcher {
+	b := &Batcher{
+		client:    client,
+		batchSize: batchSize,
+		timeout:   timeout,
+		queue:     make([]batchItem, 0, batchSize),
+		stopCh:    make(chan struct{}),
+		resultCh:  make(chan batchResult, batchSize),
+	}
+
+	go b.run()
+	return b
+}
+
+// Embed adds a text to the batch queue and returns the embedding
+func (b *Batcher) Embed(ctx context.Context, text string) ([]float32, error) {
+	resultCh := make(chan batchResult, 1)
+
+	b.mu.Lock()
+	b.queue = append(b.queue, batchItem{
+		text:     text,
+		resultCh: resultCh,
+	})
+
+	// Start timer if this is the first item
+	if len(b.queue) == 1 {
+		b.timer = time.AfterFunc(b.timeout, func() {
+			b.flush()
+		})
+	}
+
+	// Flush if batch is full
+	if len(b.queue) >= b.batchSize {
+		if b.timer != nil {
+			b.timer.Stop()
+		}
+		b.mu.Unlock()
+		b.flush()
+	} else {
+		b.mu.Unlock()
+	}
+
+	// Wait for result
+	select {
+	case result := <-resultCh:
+		return result.embedding, result.err
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	}
+}
+
+// flush processes the current batch
+func (b *Batcher) flush() {
+	b.mu.Lock()
+	if len(b.queue) == 0 {
+		b.mu.Unlock()
+		return
+	}
+
+	items := b.queue
+	b.queue = make([]batchItem, 0, b.batchSize)
+	b.mu.Unlock()
+
+	// Extract texts
+	texts := make([]string, len(items))
+	for i, item := range items {
+		texts[i] = item.text
+	}
+
+	log.Debug().
+		Int("batch_size", len(texts)).
+		Msg("Processing embedding batch")
+
+	// Batch embed
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+
+	embeddings, err := b.client.Embed(ctx, texts)
+
+	// Send results back
+	for i, item := range items {
+		result := batchResult{err: err}
+		if err == nil && i < len(embeddings) {
+			result.embedding = embeddings[i]
+		}
+
+		select {
+		case item.resultCh <- result:
+		default:
+			log.Warn().Msg("Failed to send batch result")
+		}
+	}
+}
+
+// run is the background goroutine that handles batch processing
+func (b *Batcher) run() {
+	ticker := time.NewTicker(b.timeout)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			b.flush()
+		case <-b.stopCh:
+			b.flush()
+			return
+		}
+	}
+}
+
+// Stop stops the batcher
+func (b *Batcher) Stop() {
+	close(b.stopCh)
+}
diff --git a/services/memory-tools/internal/domain/embedding/client.go b/services/memory-tools/internal/domain/embedding/client.go
new file mode 100644
index 00000000..f15dcfbd
--- /dev/null
+++ b/services/memory-tools/internal/domain/embedding/client.go
@@ -0,0 +1,345 @@
+package embedding
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"sync"
+	"time"
+
+	lru "github.com/hashicorp/golang-lru"
+	"github.com/rs/zerolog/log"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/infrastructure/cache"
+)
+
+// Cache interface for embedding storage
+type Cache interface {
+	Get(key string) ([]float32, bool)
+	Set(key string, value []float32, ttl time.Duration)
+}
+
+type CacheConfig struct {
+	Type      string // "redis", "memory", "noop"
+	RedisURL  string
+	KeyPrefix string
+	MaxSize   int
+	TTL       time.Duration
+}
+
+// Cache implementations
+
+// 1. Redis Cache (recommended for production) - using infrastructure cache
+type RedisCache struct {
+	cache *cache.EmbeddingCache
+}
+
+func NewRedisCache(redisURL, prefix string, ttl time.Duration) (*RedisCache, error) {
+	embCache, err := cache.NewEmbeddingCache(redisURL, prefix, ttl)
+	if err != nil {
+		return nil, fmt.Errorf("create embedding cache: %w", err)
+	}
+
+	return &RedisCache{
+		cache: embCache,
+	}, nil
+}
+
+func (c *RedisCache) Get(key string) ([]float32, bool) {
+	return c.cache.Get(key)
+}
+
+func (c *RedisCache) Set(key string, value []float32, ttl time.Duration) {
+	c.cache.Set(key, value, ttl)
+}
+
+// 2. In-Memory LRU Cache (alternative, no Redis required)
+type MemoryCache struct {
+	cache *lru.Cache
+	ttl   time.Duration
+	mu    sync.RWMutex
+}
+
+type cacheEntry struct {
+	value     []float32
+	expiresAt time.Time
+}
+
+func NewMemoryCache(maxSize int, ttl time.Duration) (*MemoryCache, error) {
+	cache, err := lru.New(maxSize)
+	if err != nil {
+		return nil, err
+	}
+
+	return &MemoryCache{
+		cache: cache,
+		ttl:   ttl,
+	}, nil
+}
+
+func (c *MemoryCache) Get(key string) ([]float32, bool) {
+	c.mu.RLock()
+	defer c.mu.RUnlock()
+
+	val, found := c.cache.Get(key)
+	if !found {
+		return nil, false
+	}
+
+	entry := val.(cacheEntry)
+	if time.Now().After(entry.expiresAt) {
+		// Expired
+		c.cache.Remove(key)
+		return nil, false
+	}
+
+	return entry.value, true
+}
+
+func (c *MemoryCache) Set(key string, value []float32, ttl time.Duration) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	entry := cacheEntry{
+		value:     value,
+		expiresAt: time.Now().Add(ttl),
+	}
+	c.cache.Add(key, entry)
+}
+
+// 3. NoOps Cache (disable caching)
+type NoOpsCache struct{}
+
+func NewNoOpsCache() *NoOpsCache {
+	return &NoOpsCache{}
+}
+
+func (c *NoOpsCache) Get(key string) ([]float32, bool) {
+	return nil, false // Always cache miss
+}
+
+func (c *NoOpsCache) Set(key string, value []float32, ttl time.Duration) {
+	// Do nothing
+}
+
+// Cache factory
+func NewCache(config CacheConfig) (Cache, error) {
+	switch config.Type {
+	case "redis":
+		return NewRedisCache(config.RedisURL, config.KeyPrefix, config.TTL)
+	case "memory":
+		return NewMemoryCache(config.MaxSize, config.TTL)
+	case "noop":
+		return NewNoOpsCache(), nil
+	default:
+		return nil, fmt.Errorf("unknown cache type: %s", config.Type)
+	}
+}
+
+// Client interface and implementation
+
+type Client interface {
+	Embed(ctx context.Context, texts []string) ([][]float32, error)
+	EmbedSingle(ctx context.Context, text string) ([]float32, error)
+	EmbedSparse(ctx context.Context, texts []string) ([]SparseEmbedding, error)
+	ValidateServer(ctx context.Context) error
+}
+
+type BGE_M3_Client struct {
+	baseURL    string
+	httpClient *http.Client
+	cache      Cache
+}
+
+type EmbedRequest struct {
+	Inputs    interface{} `json:"inputs"` // string or []string
+	Normalize bool        `json:"normalize"`
+	Truncate  bool        `json:"truncate"`
+}
+
+type EmbedResponse [][]float32
+
+type SparseEmbedding struct {
+	Indices []int     `json:"indices"`
+	Values  []float32 `json:"values"`
+}
+
+type ModelInfo struct {
+	ModelID string `json:"model_id"`
+}
+
+func NewBGE_M3_Client(baseURL string, cacheConfig CacheConfig) (*BGE_M3_Client, error) {
+	cache, err := NewCache(cacheConfig)
+	if err != nil {
+		return nil, fmt.Errorf("initialize cache: %w", err)
+	}
+
+	return &BGE_M3_Client{
+		baseURL: baseURL,
+		httpClient: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+		cache: cache,
+	}, nil
+}
+
+func (c *BGE_M3_Client) Embed(ctx context.Context, texts []string) ([][]float32, error) {
+	// Check cache first
+	cachedResults := make([][]float32, len(texts))
+	uncachedIndices := []int{}
+	uncachedTexts := []string{}
+
+	for i, text := range texts {
+		if cached, found := c.cache.Get(text); found {
+			cachedResults[i] = cached
+		} else {
+			uncachedIndices = append(uncachedIndices, i)
+			uncachedTexts = append(uncachedTexts, text)
+		}
+	}
+
+	if len(uncachedTexts) == 0 {
+		return cachedResults, nil
+	}
+
+	// Call BGE-M3 API for uncached items
+	reqBody := EmbedRequest{
+		Inputs:    uncachedTexts,
+		Normalize: true,
+		Truncate:  true,
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/embed", bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("embedding service returned status %d", resp.StatusCode)
+	}
+
+	var embeddings EmbedResponse
+	if err := json.NewDecoder(resp.Body).Decode(&embeddings); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+
+	// Merge results and cache
+	for i, idx := range uncachedIndices {
+		cachedResults[idx] = embeddings[i]
+		c.cache.Set(uncachedTexts[i], embeddings[i], 1*time.Hour)
+	}
+
+	return cachedResults, nil
+}
+
+func (c *BGE_M3_Client) EmbedSingle(ctx context.Context, text string) ([]float32, error) {
+	embeddings, err := c.Embed(ctx, []string{text})
+	if err != nil {
+		return nil, err
+	}
+	return embeddings[0], nil
+}
+
+func (c *BGE_M3_Client) EmbedSparse(ctx context.Context, texts []string) ([]SparseEmbedding, error) {
+	reqBody := EmbedRequest{
+		Inputs:   texts,
+		Truncate: true,
+	}
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/embed_sparse", bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("embedding service returned status %d", resp.StatusCode)
+	}
+
+	var sparseEmbeddings [][]struct {
+		Index int     `json:"index"`
+		Value float32 `json:"value"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&sparseEmbeddings); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+
+	// Convert to SparseEmbedding format
+	result := make([]SparseEmbedding, len(sparseEmbeddings))
+	for i, sparse := range sparseEmbeddings {
+		indices := make([]int, len(sparse))
+		values := make([]float32, len(sparse))
+		for j, sv := range sparse {
+			indices[j] = sv.Index
+			values[j] = sv.Value
+		}
+		result[i] = SparseEmbedding{
+			Indices: indices,
+			Values:  values,
+		}
+	}
+
+	return result, nil
+}
+
+func (c *BGE_M3_Client) ValidateServer(ctx context.Context) error {
+	// 1. Check health endpoint
+	resp, err := c.httpClient.Get(c.baseURL + "/health")
+	if err != nil || resp.StatusCode != 200 {
+		return fmt.Errorf("embedding server not healthy")
+	}
+	resp.Body.Close()
+
+	// 2. Check model info
+	resp, err = c.httpClient.Get(c.baseURL + "/info")
+	if err != nil {
+		return fmt.Errorf("failed to get model info: %w", err)
+	}
+	defer resp.Body.Close()
+
+	var info ModelInfo
+	if err := json.NewDecoder(resp.Body).Decode(&info); err != nil {
+		return fmt.Errorf("failed to decode model info: %w", err)
+	}
+
+	// 3. Verify it's BGE-M3
+	if info.ModelID != "BAAI/bge-m3" {
+		log.Warn().Str("model", info.ModelID).Msg("Expected BGE-M3, got different model")
+	}
+
+	// 4. Test embedding
+	embeddings, err := c.Embed(ctx, []string{"test"})
+	if err != nil {
+		return fmt.Errorf("test embedding failed: %w", err)
+	}
+	if len(embeddings) == 0 || len(embeddings[0]) != 1024 {
+		return fmt.Errorf("expected 1024 dimensions, got %d", len(embeddings[0]))
+	}
+
+	return nil
+}
diff --git a/services/memory-tools/internal/domain/memory/models.go b/services/memory-tools/internal/domain/memory/models.go
new file mode 100644
index 00000000..2c779477
--- /dev/null
+++ b/services/memory-tools/internal/domain/memory/models.go
@@ -0,0 +1,198 @@
+package memory
+
+import (
+	"context"
+	"time"
+)
+
+// UserMemoryItem represents a user's personal memory item
+type UserMemoryItem struct {
+	ID        string    `json:"id"`
+	UserID    string    `json:"user_id"`
+	Scope     string    `json:"scope"` // "core", "preference", "context"
+	Key       string    `json:"key"`
+	Text      string    `json:"text"`
+	Score     int       `json:"score"` // Importance: 1-5
+	Embedding []float32 `json:"-"`
+	IsDeleted bool      `json:"-"`
+	CreatedAt time.Time `json:"created_at"`
+	UpdatedAt time.Time `json:"updated_at"`
+
+	// Computed fields
+	Similarity float32 `json:"similarity,omitempty" db:"-"`
+}
+
+// ProjectFact represents a project-level fact or decision
+type ProjectFact struct {
+	ID                   string    `json:"id"`
+	ProjectID            string    `json:"project_id"`
+	Kind                 string    `json:"kind"` // "decision", "requirement", "constraint", "context"
+	Title                string    `json:"title"`
+	Text                 string    `json:"text"`
+	Confidence           float32   `json:"confidence"` // 0.0-1.0
+	Embedding            []float32 `json:"-"`
+	SourceConversationID string    `json:"source_conversation_id"`
+	IsDeleted            bool      `json:"-"`
+	CreatedAt            time.Time `json:"created_at"`
+	UpdatedAt            time.Time `json:"updated_at"`
+
+	// Computed fields
+	Similarity float32 `json:"similarity,omitempty" db:"-"`
+}
+
+// EpisodicEvent represents a time-bound event or interaction
+type EpisodicEvent struct {
+	ID             string    `json:"id"`
+	UserID         string    `json:"user_id"`
+	ProjectID      string    `json:"project_id,omitempty"`
+	ConversationID string    `json:"conversation_id"`
+	Time           time.Time `json:"time"`
+	Text           string    `json:"text"`
+	Kind           string    `json:"kind"` // "interaction", "decision", "milestone"
+	Embedding      []float32 `json:"-"`
+	IsDeleted      bool      `json:"-"`
+	CreatedAt      time.Time `json:"created_at"`
+
+	// Computed fields
+	Similarity float32 `json:"similarity,omitempty" db:"-"`
+}
+
+// ConversationItem represents a single message in a conversation
+type ConversationItem struct {
+	ID             string    `json:"id"`
+	ConversationID string    `json:"conversation_id"`
+	Role           string    `json:"role"` // "user", "assistant", "system"
+	Content        string    `json:"content"`
+	ToolCalls      string    `json:"tool_calls,omitempty"` // JSON array
+	CreatedAt      time.Time `json:"created_at"`
+}
+
+// ConversationSummary represents a summary of a conversation
+type ConversationSummary struct {
+	ID              string        `json:"id" db:"id"`
+	ConversationID  string        `json:"conversation_id" db:"conversation_id"`
+	DialogueSummary string        `json:"dialogue_summary" db:"dialogue_summary"`
+	OpenTasks       []interface{} `json:"open_tasks" db:"open_tasks"`
+	Entities        []interface{} `json:"entities" db:"entities"`
+	Decisions       []interface{} `json:"decisions" db:"decisions"`
+	UpdatedAt       time.Time     `json:"updated_at" db:"updated_at"`
+}
+
+// MemoryLoadRequest represents a request to load relevant memories
+type MemoryLoadRequest struct {
+	UserID         string            `json:"user_id"`
+	ProjectID      string            `json:"project_id,omitempty"`
+	ConversationID string            `json:"conversation_id,omitempty"`
+	Query          string            `json:"query"`
+	Options        MemoryLoadOptions `json:"options"`
+}
+
+// MemoryLoadOptions contains options for memory loading
+type MemoryLoadOptions struct {
+	AugmentWithMemory bool    `json:"augment_with_memory"`
+	MaxUserItems      int     `json:"max_user_items"`
+	MaxProjectItems   int     `json:"max_project_items"`
+	MaxEpisodicItems  int     `json:"max_episodic_items"`
+	MinSimilarity     float32 `json:"min_similarity"`
+}
+
+// MemoryLoadResponse contains the loaded memories
+type MemoryLoadResponse struct {
+	CoreMemory     []UserMemoryItem `json:"core_memory"`
+	EpisodicMemory []EpisodicEvent  `json:"episodic_memory"`
+	SemanticMemory []ProjectFact    `json:"semantic_memory"`
+}
+
+// MemoryObserveRequest represents a request to observe and store conversation
+type MemoryObserveRequest struct {
+	UserID         string             `json:"user_id"`
+	ProjectID      string             `json:"project_id,omitempty"`
+	ConversationID string             `json:"conversation_id"`
+	Messages       []ConversationItem `json:"messages"`
+	ToolCalls      []ToolCall         `json:"tool_calls,omitempty"`
+}
+
+// MemoryObserveResponse represents the response from observe endpoint
+type MemoryObserveResponse struct {
+	Status  string `json:"status"`
+	Message string `json:"message"`
+}
+
+// ToolCall represents a tool invocation
+type ToolCall struct {
+	Name      string                 `json:"name"`
+	Arguments map[string]interface{} `json:"arguments"`
+	Result    string                 `json:"result,omitempty"`
+}
+
+// MemoryAction represents an action to take on memory
+type MemoryAction struct {
+	Add    MemoryAddActions `json:"add"`
+	Delete []string         `json:"delete"` // Memory item IDs to delete
+}
+
+// MemoryAddActions contains items to add to different memory types
+type MemoryAddActions struct {
+	UserMemory    []UserMemoryItemInput `json:"user_memory"`
+	ProjectMemory []ProjectFactInput    `json:"project_memory"`
+	Episodic      []EpisodicEventInput  `json:"episodic"`
+}
+
+// UserMemoryItemInput represents input for creating a user memory item
+type UserMemoryItemInput struct {
+	Scope      string `json:"scope"`
+	Key        string `json:"key"`
+	Text       string `json:"text"`
+	Importance string `json:"importance"` // "low", "medium", "high", "critical"
+}
+
+// ProjectFactInput represents input for creating a project fact
+type ProjectFactInput struct {
+	Kind       string  `json:"kind"`
+	Title      string  `json:"title"`
+	Text       string  `json:"text"`
+	Confidence float32 `json:"confidence"`
+}
+
+// EpisodicEventInput represents input for creating an episodic event
+type EpisodicEventInput struct {
+	Text string `json:"text"`
+	Kind string `json:"kind"`
+}
+
+// UserMemoryUpsertRequest represents a request to upsert user memories
+type UserMemoryUpsertRequest struct {
+	UserID string                `json:"user_id"`
+	Items  []UserMemoryItemInput `json:"items"`
+}
+
+// ProjectFactUpsertRequest represents a request to upsert project facts
+type ProjectFactUpsertRequest struct {
+	ProjectID string             `json:"project_id"`
+	Facts     []ProjectFactInput `json:"facts"`
+}
+
+// DeleteRequest represents a request to delete memories
+type DeleteRequest struct {
+	IDs []string `json:"ids"`
+}
+
+// DeleteResponse represents the response from delete endpoint
+type DeleteResponse struct {
+	Status       string `json:"status"`
+	Message      string `json:"message"`
+	DeletedCount int    `json:"deleted_count"`
+}
+
+// LLMClient interface for calling LLM services
+type LLMClient interface {
+	Complete(ctx context.Context, prompt string, options LLMOptions) (string, error)
+}
+
+// LLMOptions for LLM completion
+type LLMOptions struct {
+	Model          string
+	Temperature    float32
+	MaxTokens      int
+	ResponseFormat string // "json" or "text"
+}
diff --git a/services/memory-tools/internal/domain/memory/repository.go b/services/memory-tools/internal/domain/memory/repository.go
new file mode 100644
index 00000000..815d2d55
--- /dev/null
+++ b/services/memory-tools/internal/domain/memory/repository.go
@@ -0,0 +1,30 @@
+package memory
+
+import (
+	"context"
+)
+
+// Repository defines the interface for memory storage operations
+type Repository interface {
+	// User Memory
+	GetUserMemoryItems(ctx context.Context, userID string) ([]UserMemoryItem, error)
+	UpsertUserMemoryItem(ctx context.Context, item *UserMemoryItem) (string, error)
+	DeleteUserMemoryItem(ctx context.Context, id string) error
+	SearchUserMemory(ctx context.Context, userID string, queryEmbedding []float32, limit int, minSimilarity float32) ([]UserMemoryItem, error)
+
+	// Project Facts
+	GetProjectFacts(ctx context.Context, projectID string) ([]ProjectFact, error)
+	UpsertProjectFact(ctx context.Context, fact *ProjectFact) (string, error)
+	DeleteProjectFact(ctx context.Context, id string) error
+	SearchProjectFacts(ctx context.Context, projectID string, queryEmbedding []float32, limit int, minSimilarity float32) ([]ProjectFact, error)
+
+	// Episodic Events
+	GetEpisodicEvents(ctx context.Context, userID string, limit int) ([]EpisodicEvent, error)
+	CreateEpisodicEvent(ctx context.Context, event *EpisodicEvent) error
+	DeleteEpisodicEvent(ctx context.Context, id string) error
+	SearchEpisodicEvents(ctx context.Context, userID string, queryEmbedding []float32, limit int, minSimilarity float32) ([]EpisodicEvent, error)
+
+	// Conversation Items
+	CreateConversationItem(ctx context.Context, item *ConversationItem) error
+	GetConversationItems(ctx context.Context, conversationID string) ([]ConversationItem, error)
+}
diff --git a/services/memory-tools/internal/domain/memory/service.go b/services/memory-tools/internal/domain/memory/service.go
new file mode 100644
index 00000000..d522915f
--- /dev/null
+++ b/services/memory-tools/internal/domain/memory/service.go
@@ -0,0 +1,622 @@
+package memory
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"sort"
+	"strings"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/embedding"
+	"github.com/rs/zerolog/log"
+)
+
+// Service handles memory operations
+type Service struct {
+	repo            Repository
+	embeddingClient embedding.Client
+}
+
+// NewService creates a new memory service
+func NewService(repo Repository, embeddingClient embedding.Client) *Service {
+	return &Service{
+		repo:            repo,
+		embeddingClient: embeddingClient,
+	}
+}
+
+// Load retrieves relevant memories for a given query
+func (s *Service) Load(ctx context.Context, req MemoryLoadRequest) (*MemoryLoadResponse, error) {
+	// Set defaults
+	if req.Options.MaxUserItems == 0 {
+		req.Options.MaxUserItems = 20
+	}
+	if req.Options.MaxProjectItems == 0 {
+		req.Options.MaxProjectItems = 20
+	}
+	if req.Options.MaxEpisodicItems == 0 {
+		req.Options.MaxEpisodicItems = 20
+	}
+	if req.Options.MinSimilarity == 0 {
+		req.Options.MinSimilarity = 0.5
+	}
+
+	// Embed the query
+	queryEmbedding, err := s.embeddingClient.EmbedSingle(ctx, req.Query)
+	if err != nil {
+		return nil, fmt.Errorf("embed query: %w", err)
+	}
+
+	log.Debug().
+		Str("user_id", req.UserID).
+		Str("query", req.Query).
+		Int("embedding_dim", len(queryEmbedding)).
+		Msg("Query embedded successfully")
+
+	// Search user memory
+	userMemory, err := s.repo.SearchUserMemory(
+		ctx,
+		req.UserID,
+		queryEmbedding,
+		req.Options.MaxUserItems,
+		req.Options.MinSimilarity,
+	)
+	if err != nil {
+		return nil, fmt.Errorf("search user memory: %w", err)
+	}
+
+	if len(userMemory) == 0 {
+		allUserMemory, err := s.repo.GetUserMemoryItems(ctx, req.UserID)
+		if err == nil && len(allUserMemory) > 0 {
+			if req.Options.MaxUserItems > 0 && len(allUserMemory) > req.Options.MaxUserItems {
+				allUserMemory = allUserMemory[:req.Options.MaxUserItems]
+			}
+			userMemory = allUserMemory
+		}
+	}
+
+	// Search project facts if project_id provided
+	var projectFacts []ProjectFact
+	if req.ProjectID != "" {
+		projectFacts, err = s.repo.SearchProjectFacts(
+			ctx,
+			req.ProjectID,
+			queryEmbedding,
+			req.Options.MaxProjectItems,
+			req.Options.MinSimilarity,
+		)
+		if err != nil {
+			return nil, fmt.Errorf("search project facts: %w", err)
+		}
+
+		if len(projectFacts) == 0 {
+			allFacts, err := s.repo.GetProjectFacts(ctx, req.ProjectID)
+			if err == nil && len(allFacts) > 0 {
+				if req.Options.MaxProjectItems > 0 && len(allFacts) > req.Options.MaxProjectItems {
+					allFacts = allFacts[:req.Options.MaxProjectItems]
+				}
+				projectFacts = allFacts
+			}
+		}
+	}
+
+	// Search episodic events
+	episodicEvents, err := s.repo.SearchEpisodicEvents(
+		ctx,
+		req.UserID,
+		queryEmbedding,
+		req.Options.MaxEpisodicItems,
+		req.Options.MinSimilarity,
+	)
+	if err != nil {
+		return nil, fmt.Errorf("search episodic events: %w", err)
+	}
+
+	if len(episodicEvents) == 0 {
+		allEvents, err := s.repo.GetEpisodicEvents(ctx, req.UserID, req.Options.MaxEpisodicItems)
+		if err == nil && len(allEvents) > 0 {
+			episodicEvents = allEvents
+		}
+	}
+
+	for i := range userMemory {
+		if userMemory[i].Similarity == 0 {
+			userMemory[i].Similarity = 1.0
+		}
+	}
+	for i := range projectFacts {
+		if projectFacts[i].Similarity == 0 {
+			projectFacts[i].Similarity = projectFacts[i].Confidence
+			if projectFacts[i].Similarity == 0 {
+				projectFacts[i].Similarity = 1.0
+			}
+		}
+	}
+	for i := range episodicEvents {
+		if episodicEvents[i].Similarity == 0 {
+			episodicEvents[i].Similarity = 0.8
+		}
+	}
+
+	log.Info().
+		Int("user_memory_count", len(userMemory)).
+		Int("project_facts_count", len(projectFacts)).
+		Int("episodic_events_count", len(episodicEvents)).
+		Msg("Memory search completed")
+
+	if userMemory == nil {
+		userMemory = []UserMemoryItem{}
+	}
+	if projectFacts == nil {
+		projectFacts = []ProjectFact{}
+	}
+	if episodicEvents == nil {
+		episodicEvents = []EpisodicEvent{}
+	}
+
+	return &MemoryLoadResponse{
+		CoreMemory:     userMemory,
+		SemanticMemory: projectFacts,
+		EpisodicMemory: episodicEvents,
+	}, nil
+}
+
+// Observe stores conversation and extracts memories
+func (s *Service) Observe(ctx context.Context, req MemoryObserveRequest) error {
+	// Store conversation items
+	for _, msg := range req.Messages {
+		msg.ConversationID = req.ConversationID
+		if err := s.repo.CreateConversationItem(ctx, &msg); err != nil {
+			log.Error().Err(err).Msg("Failed to store conversation item")
+			// Continue processing even if storage fails
+		}
+	}
+
+	// Extract memory actions from conversation
+	memoryAction, err := s.extractMemoryActions(ctx, req)
+	if err != nil {
+		return fmt.Errorf("extract memory actions: %w", err)
+	}
+
+	// Process additions
+	if err := s.processMemoryAdditions(ctx, req, memoryAction.Add); err != nil {
+		return fmt.Errorf("process memory additions: %w", err)
+	}
+
+	// Process deletions
+	for _, itemID := range memoryAction.Delete {
+		// Try to delete from all tables (soft delete)
+		s.repo.DeleteUserMemoryItem(ctx, itemID)
+		s.repo.DeleteProjectFact(ctx, itemID)
+		// Note: We don't delete episodic events as they're historical
+	}
+
+	log.Info().
+		Int("user_memory_added", len(memoryAction.Add.UserMemory)).
+		Int("project_facts_added", len(memoryAction.Add.ProjectMemory)).
+		Int("episodic_added", len(memoryAction.Add.Episodic)).
+		Int("deleted", len(memoryAction.Delete)).
+		Msg("Memory observation completed")
+
+	return nil
+}
+
+// extractMemoryActions analyzes conversation and determines what to remember
+func (s *Service) extractMemoryActions(ctx context.Context, req MemoryObserveRequest) (*MemoryAction, error) {
+	// For now, use a simple heuristic-based approach
+	// In production, this would call an LLM to analyze the conversation
+
+	action := &MemoryAction{
+		Add: MemoryAddActions{
+			UserMemory:    []UserMemoryItemInput{},
+			ProjectMemory: []ProjectFactInput{},
+			Episodic:      []EpisodicEventInput{},
+		},
+		Delete: []string{},
+	}
+
+	// Extract user preferences and context from messages
+	for _, msg := range req.Messages {
+		if msg.Role == "user" {
+			// Simple pattern matching for demonstration
+			content := strings.ToLower(msg.Content)
+
+			// Detect preferences
+			if strings.Contains(content, "i prefer") || strings.Contains(content, "i like") {
+				action.Add.UserMemory = append(action.Add.UserMemory, UserMemoryItemInput{
+					Scope:      "preference",
+					Key:        "user_preference",
+					Text:       msg.Content,
+					Importance: "medium",
+				})
+			}
+
+			// Detect project decisions
+			if strings.Contains(content, "we should") || strings.Contains(content, "let's use") {
+				action.Add.ProjectMemory = append(action.Add.ProjectMemory, ProjectFactInput{
+					Kind:       "decision",
+					Title:      "Project decision",
+					Text:       msg.Content,
+					Confidence: 0.8,
+				})
+			}
+		}
+
+		// Always create episodic event for the interaction
+		action.Add.Episodic = append(action.Add.Episodic, EpisodicEventInput{
+			Text: fmt.Sprintf("%s: %s", msg.Role, msg.Content),
+			Kind: "interaction",
+		})
+	}
+
+	return action, nil
+}
+
+// processMemoryAdditions processes and stores new memory items
+func (s *Service) processMemoryAdditions(ctx context.Context, req MemoryObserveRequest, additions MemoryAddActions) error {
+	// Collect all texts to embed
+	var textsToEmbed []string
+	var textTypes []string // Track what type each text is
+
+	for _, item := range additions.UserMemory {
+		textsToEmbed = append(textsToEmbed, item.Text)
+		textTypes = append(textTypes, "user_memory")
+	}
+
+	for _, fact := range additions.ProjectMemory {
+		textsToEmbed = append(textsToEmbed, fact.Text)
+		textTypes = append(textTypes, "project_fact")
+	}
+
+	for _, event := range additions.Episodic {
+		textsToEmbed = append(textsToEmbed, event.Text)
+		textTypes = append(textTypes, "episodic")
+	}
+
+	if len(textsToEmbed) == 0 {
+		return nil
+	}
+
+	// Batch embed all texts
+	embeddings, err := s.embeddingClient.Embed(ctx, textsToEmbed)
+	if err != nil {
+		return fmt.Errorf("batch embed: %w", err)
+	}
+
+	log.Debug().
+		Int("texts_embedded", len(textsToEmbed)).
+		Msg("Batch embedding completed")
+
+	// Process embeddings and store
+	embeddingIndex := 0
+
+	// Store user memory items
+	for _, item := range additions.UserMemory {
+		userItem := &UserMemoryItem{
+			UserID:    req.UserID,
+			Scope:     item.Scope,
+			Key:       item.Key,
+			Text:      item.Text,
+			Score:     importanceToScore(item.Importance),
+			Embedding: embeddings[embeddingIndex],
+		}
+		embeddingIndex++
+
+		if _, err := s.repo.UpsertUserMemoryItem(ctx, userItem); err != nil {
+			log.Error().Err(err).Msg("Failed to store user memory item")
+		}
+	}
+
+	// Store project facts
+	for _, fact := range additions.ProjectMemory {
+		projectFact := &ProjectFact{
+			ProjectID:            req.ProjectID,
+			Kind:                 fact.Kind,
+			Title:                fact.Title,
+			Text:                 fact.Text,
+			Confidence:           fact.Confidence,
+			Embedding:            embeddings[embeddingIndex],
+			SourceConversationID: req.ConversationID,
+		}
+		embeddingIndex++
+
+		if _, err := s.repo.UpsertProjectFact(ctx, projectFact); err != nil {
+			log.Error().Err(err).Msg("Failed to store project fact")
+		}
+	}
+
+	// Store episodic events
+	for _, event := range additions.Episodic {
+		episodicEvent := &EpisodicEvent{
+			UserID:         req.UserID,
+			ProjectID:      req.ProjectID,
+			ConversationID: req.ConversationID,
+			Time:           req.Messages[len(req.Messages)-1].CreatedAt,
+			Text:           event.Text,
+			Kind:           event.Kind,
+			Embedding:      embeddings[embeddingIndex],
+		}
+		embeddingIndex++
+
+		if err := s.repo.CreateEpisodicEvent(ctx, episodicEvent); err != nil {
+			log.Error().Err(err).Msg("Failed to store episodic event")
+		}
+	}
+
+	return nil
+}
+
+// Helper function to convert importance string to score
+func importanceToScore(importance string) int {
+	switch strings.ToLower(importance) {
+	case "critical":
+		return 5
+	case "high":
+		return 4
+	case "medium":
+		return 3
+	case "low":
+		return 2
+	default:
+		return 3
+	}
+}
+
+// RankResults combines and ranks results from different memory types
+func (s *Service) RankResults(userMemory []UserMemoryItem, projectFacts []ProjectFact, episodic []EpisodicEvent) []interface{} {
+	type rankedItem struct {
+		item  interface{}
+		score float32
+	}
+
+	var items []rankedItem
+
+	// Add user memory with weighted score
+	for _, item := range userMemory {
+		score := item.Similarity * float32(item.Score) / 5.0
+		items = append(items, rankedItem{item: item, score: score})
+	}
+
+	// Add project facts with weighted score
+	for _, fact := range projectFacts {
+		score := fact.Similarity * fact.Confidence
+		items = append(items, rankedItem{item: fact, score: score})
+	}
+
+	// Add episodic events
+	for _, event := range episodic {
+		score := event.Similarity * 0.8 // Slightly lower weight for episodic
+		items = append(items, rankedItem{item: event, score: score})
+	}
+
+	// Sort by score descending
+	sort.Slice(items, func(i, j int) bool {
+		return items[i].score > items[j].score
+	})
+
+	// Extract items
+	result := make([]interface{}, len(items))
+	for i, item := range items {
+		result[i] = item.item
+	}
+
+	return result
+}
+
+// Helper to format memory for LLM context
+func (s *Service) FormatMemoryForContext(resp *MemoryLoadResponse) string {
+	var builder strings.Builder
+
+	if len(resp.CoreMemory) > 0 {
+		builder.WriteString("## Core Memory (User Preferences & Context)\n\n")
+		for _, item := range resp.CoreMemory {
+			builder.WriteString(fmt.Sprintf("- [%s] %s (importance: %d/5, similarity: %.2f)\n",
+				item.Scope, item.Text, item.Score, item.Similarity))
+		}
+		builder.WriteString("\n")
+	}
+
+	if len(resp.SemanticMemory) > 0 {
+		builder.WriteString("## Semantic Memory (Project Facts & Decisions)\n\n")
+		for _, fact := range resp.SemanticMemory {
+			builder.WriteString(fmt.Sprintf("- [%s] %s: %s (confidence: %.2f, similarity: %.2f)\n",
+				fact.Kind, fact.Title, fact.Text, fact.Confidence, fact.Similarity))
+		}
+		builder.WriteString("\n")
+	}
+
+	if len(resp.EpisodicMemory) > 0 {
+		builder.WriteString("## Episodic Memory (Recent Interactions)\n\n")
+		for _, event := range resp.EpisodicMemory {
+			builder.WriteString(fmt.Sprintf("- [%s] %s: %s (similarity: %.2f)\n",
+				event.Time.Format("2006-01-02 15:04"), event.Kind, event.Text, event.Similarity))
+		}
+		builder.WriteString("\n")
+	}
+
+	return builder.String()
+}
+
+// GetMemoryStats returns statistics about stored memories
+func (s *Service) GetMemoryStats(ctx context.Context, userID, projectID string) (map[string]interface{}, error) {
+	stats := make(map[string]interface{})
+
+	userMemory, err := s.repo.GetUserMemoryItems(ctx, userID)
+	if err == nil {
+		stats["user_memory_count"] = len(userMemory)
+	}
+
+	if projectID != "" {
+		projectFacts, err := s.repo.GetProjectFacts(ctx, projectID)
+		if err == nil {
+			stats["project_facts_count"] = len(projectFacts)
+		}
+	}
+
+	episodic, err := s.repo.GetEpisodicEvents(ctx, userID, 100)
+	if err == nil {
+		stats["episodic_events_count"] = len(episodic)
+	}
+
+	return stats, nil
+}
+
+// ExportMemory exports all memory for a user (for data portability)
+func (s *Service) ExportMemory(ctx context.Context, userID string) (string, error) {
+	export := make(map[string]interface{})
+
+	userMemory, err := s.repo.GetUserMemoryItems(ctx, userID)
+	if err != nil {
+		return "", fmt.Errorf("get user memory: %w", err)
+	}
+	export["user_memory"] = userMemory
+
+	episodic, err := s.repo.GetEpisodicEvents(ctx, userID, 1000)
+	if err != nil {
+		return "", fmt.Errorf("get episodic events: %w", err)
+	}
+	export["episodic_events"] = episodic
+
+	data, err := json.MarshalIndent(export, "", "  ")
+	if err != nil {
+		return "", fmt.Errorf("marshal export: %w", err)
+	}
+
+	return string(data), nil
+}
+
+// UpsertUserMemories upserts user memory items (for LLM tools)
+func (s *Service) UpsertUserMemories(ctx context.Context, req UserMemoryUpsertRequest) ([]string, error) {
+	ids := make([]string, 0, len(req.Items))
+
+	// Collect all texts for batch embedding
+	texts := make([]string, len(req.Items))
+	for i, item := range req.Items {
+		texts[i] = item.Text
+	}
+
+	// Batch embed all texts
+	embeddings, err := s.embeddingClient.Embed(ctx, texts)
+	if err != nil {
+		return nil, fmt.Errorf("embed texts: %w", err)
+	}
+
+	log.Info().
+		Str("user_id", req.UserID).
+		Int("item_count", len(req.Items)).
+		Msg("Upserting user memories")
+
+	// Upsert each item
+	for i, item := range req.Items {
+		userItem := &UserMemoryItem{
+			UserID:    req.UserID,
+			Scope:     item.Scope,
+			Key:       item.Key,
+			Text:      item.Text,
+			Score:     importanceToScore(item.Importance),
+			Embedding: embeddings[i],
+		}
+
+		id, err := s.repo.UpsertUserMemoryItem(ctx, userItem)
+		if err != nil {
+			log.Error().Err(err).Str("text", item.Text).Msg("Failed to upsert user memory item")
+			continue
+		}
+
+		ids = append(ids, id)
+	}
+
+	log.Info().
+		Str("user_id", req.UserID).
+		Int("upserted_count", len(ids)).
+		Msg("User memories upserted successfully")
+
+	return ids, nil
+}
+
+// UpsertProjectFacts upserts project facts (for LLM tools)
+func (s *Service) UpsertProjectFacts(ctx context.Context, req ProjectFactUpsertRequest) ([]string, error) {
+	ids := make([]string, 0, len(req.Facts))
+
+	// Collect all texts for batch embedding
+	texts := make([]string, len(req.Facts))
+	for i, fact := range req.Facts {
+		texts[i] = fact.Text
+	}
+
+	// Batch embed all texts
+	embeddings, err := s.embeddingClient.Embed(ctx, texts)
+	if err != nil {
+		return nil, fmt.Errorf("embed texts: %w", err)
+	}
+
+	log.Info().
+		Str("project_id", req.ProjectID).
+		Int("fact_count", len(req.Facts)).
+		Msg("Upserting project facts")
+
+	// Upsert each fact
+	for i, fact := range req.Facts {
+		projectFact := &ProjectFact{
+			ProjectID:  req.ProjectID,
+			Kind:       fact.Kind,
+			Title:      fact.Title,
+			Text:       fact.Text,
+			Confidence: fact.Confidence,
+			Embedding:  embeddings[i],
+		}
+
+		id, err := s.repo.UpsertProjectFact(ctx, projectFact)
+		if err != nil {
+			log.Error().Err(err).Str("title", fact.Title).Msg("Failed to upsert project fact")
+			continue
+		}
+
+		ids = append(ids, id)
+	}
+
+	log.Info().
+		Str("project_id", req.ProjectID).
+		Int("upserted_count", len(ids)).
+		Msg("Project facts upserted successfully")
+
+	return ids, nil
+}
+
+// DeleteMemories soft deletes memories by IDs (for LLM tools)
+func (s *Service) DeleteMemories(ctx context.Context, req DeleteRequest) (int, error) {
+	deletedCount := 0
+
+	log.Info().
+		Int("id_count", len(req.IDs)).
+		Msg("Deleting memories")
+
+	for _, id := range req.IDs {
+		// Try deleting from user memory
+		if err := s.repo.DeleteUserMemoryItem(ctx, id); err == nil {
+			deletedCount++
+			continue
+		}
+
+		// Try deleting from project facts
+		if err := s.repo.DeleteProjectFact(ctx, id); err == nil {
+			deletedCount++
+			continue
+		}
+
+		// Try deleting from episodic events
+		if err := s.repo.DeleteEpisodicEvent(ctx, id); err == nil {
+			deletedCount++
+			continue
+		}
+
+		log.Warn().Str("id", id).Msg("Memory ID not found in any table")
+	}
+
+	log.Info().
+		Int("deleted_count", deletedCount).
+		Int("requested_count", len(req.IDs)).
+		Msg("Memories deleted")
+
+	return deletedCount, nil
+}
diff --git a/services/memory-tools/internal/domain/memory/summarization.go b/services/memory-tools/internal/domain/memory/summarization.go
new file mode 100644
index 00000000..449daf67
--- /dev/null
+++ b/services/memory-tools/internal/domain/memory/summarization.go
@@ -0,0 +1,261 @@
+package memory
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"github.com/rs/zerolog/log"
+)
+
+// SummarizerConfig holds configuration for conversation summarization
+type SummarizerConfig struct {
+	LLMEndpoint     string
+	Model           string
+	TriggerEveryN   int           // Summarize every N messages
+	TriggerInterval time.Duration // Or every X duration
+	MaxWindowSize   int           // Max messages to include in summary
+	Temperature     float32
+	MaxTokens       int
+}
+
+// Summarizer handles conversation summarization
+type Summarizer struct {
+	config SummarizerConfig
+	llm    LLMClient
+}
+
+// SummarizationResult represents the structured output from summarization
+type SummarizationResult struct {
+	DialogueSummary string   `json:"dialogue_summary"`
+	OpenTasks       []string `json:"open_tasks"`
+	Entities        []string `json:"entities"`
+	Decisions       []string `json:"decisions"`
+}
+
+// NewSummarizer creates a new conversation summarizer
+func NewSummarizer(config SummarizerConfig, llm LLMClient) *Summarizer {
+	// Set defaults
+	if config.TriggerEveryN == 0 {
+		config.TriggerEveryN = 10
+	}
+	if config.TriggerInterval == 0 {
+		config.TriggerInterval = 5 * time.Minute
+	}
+	if config.MaxWindowSize == 0 {
+		config.MaxWindowSize = 50
+	}
+	if config.Temperature == 0 {
+		config.Temperature = 0.3
+	}
+	if config.MaxTokens == 0 {
+		config.MaxTokens = 1000
+	}
+	if config.Model == "" {
+		config.Model = "gpt-4"
+	}
+
+	return &Summarizer{
+		config: config,
+		llm:    llm,
+	}
+}
+
+// ShouldSummarize determines if summarization should be triggered
+func (s *Summarizer) ShouldSummarize(messageCount int, lastSummarizedAt time.Time) bool {
+	// Trigger by message count
+	if messageCount >= s.config.TriggerEveryN {
+		return true
+	}
+
+	// Trigger by time interval
+	if time.Since(lastSummarizedAt) >= s.config.TriggerInterval {
+		return true
+	}
+
+	return false
+}
+
+// Summarize generates a summary of the conversation
+func (s *Summarizer) Summarize(ctx context.Context, messages []ConversationItem, previousSummary *ConversationSummary) (*SummarizationResult, error) {
+	// Build the prompt
+	prompt := s.buildSummarizationPrompt(messages, previousSummary)
+
+	log.Debug().
+		Int("message_count", len(messages)).
+		Bool("has_previous_summary", previousSummary != nil).
+		Msg("Generating conversation summary")
+
+	// Call LLM
+	response, err := s.llm.Complete(ctx, prompt, LLMOptions{
+		Model:          s.config.Model,
+		Temperature:    s.config.Temperature,
+		MaxTokens:      s.config.MaxTokens,
+		ResponseFormat: "json",
+	})
+	if err != nil {
+		return nil, fmt.Errorf("llm completion failed: %w", err)
+	}
+
+	// Parse JSON response
+	var result SummarizationResult
+	if err := json.Unmarshal([]byte(response), &result); err != nil {
+		return nil, fmt.Errorf("failed to parse summarization result: %w", err)
+	}
+
+	log.Info().
+		Str("summary", result.DialogueSummary).
+		Int("open_tasks", len(result.OpenTasks)).
+		Int("entities", len(result.Entities)).
+		Int("decisions", len(result.Decisions)).
+		Msg("Conversation summarized successfully")
+
+	return &result, nil
+}
+
+// buildSummarizationPrompt constructs the LLM prompt for summarization
+func (s *Summarizer) buildSummarizationPrompt(messages []ConversationItem, previousSummary *ConversationSummary) string {
+	prompt := `You are analyzing a conversation to extract key information. Your task is to:
+1. Provide a concise 2-3 sentence summary of the conversation
+2. List any open tasks or action items mentioned
+3. Identify people, systems, services, or tools mentioned
+4. Note any decisions or conclusions reached
+
+Be precise and factual. Only include information explicitly mentioned in the conversation.
+
+`
+
+	// Add previous summary if exists
+	if previousSummary != nil && previousSummary.DialogueSummary != "" {
+		prompt += fmt.Sprintf(`Previous Summary:
+%s
+
+Previous Open Tasks:
+%s
+
+Previous Entities:
+%s
+
+Previous Decisions:
+%s
+
+`, previousSummary.DialogueSummary,
+			formatList(previousSummary.OpenTasks),
+			formatList(previousSummary.Entities),
+			formatList(previousSummary.Decisions))
+	}
+
+	// Add conversation window
+	prompt += "Recent Conversation:\n"
+	for _, msg := range messages {
+		prompt += fmt.Sprintf("%s: %s\n", msg.Role, msg.Content)
+	}
+
+	prompt += `
+Return your analysis as JSON with this exact structure:
+{
+  "dialogue_summary": "2-3 sentence summary of the conversation",
+  "open_tasks": ["task 1", "task 2"],
+  "entities": ["entity 1", "entity 2"],
+  "decisions": ["decision 1", "decision 2"]
+}
+
+Ensure the response is valid JSON.`
+
+	return prompt
+}
+
+// formatList formats a JSON array for display
+func formatList(items []interface{}) string {
+	if len(items) == 0 {
+		return "(none)"
+	}
+
+	result := ""
+	for i, item := range items {
+		if str, ok := item.(string); ok {
+			result += fmt.Sprintf("- %s\n", str)
+		} else {
+			// Handle JSON objects
+			bytes, _ := json.Marshal(item)
+			result += fmt.Sprintf("- %s\n", string(bytes))
+		}
+		if i >= 9 { // Limit to 10 items
+			result += "- ...\n"
+			break
+		}
+	}
+	return result
+}
+
+// MergeSummaries merges new summary with previous one
+func (s *Summarizer) MergeSummaries(previous *ConversationSummary, new *SummarizationResult) *ConversationSummary {
+	if previous == nil {
+		return &ConversationSummary{
+			DialogueSummary: new.DialogueSummary,
+			OpenTasks:       stringSliceToInterface(new.OpenTasks),
+			Entities:        stringSliceToInterface(new.Entities),
+			Decisions:       stringSliceToInterface(new.Decisions),
+			UpdatedAt:       time.Now(),
+		}
+	}
+
+	// Merge entities and decisions (deduplicate)
+	entities := mergeUnique(interfaceToStringSlice(previous.Entities), new.Entities)
+	decisions := mergeUnique(interfaceToStringSlice(previous.Decisions), new.Decisions)
+
+	// For open tasks, replace with new ones (old tasks are assumed completed)
+	openTasks := new.OpenTasks
+
+	return &ConversationSummary{
+		DialogueSummary: new.DialogueSummary,
+		OpenTasks:       stringSliceToInterface(openTasks),
+		Entities:        stringSliceToInterface(entities),
+		Decisions:       stringSliceToInterface(decisions),
+		UpdatedAt:       time.Now(),
+	}
+}
+
+// Helper functions
+
+func stringSliceToInterface(strs []string) []interface{} {
+	result := make([]interface{}, len(strs))
+	for i, s := range strs {
+		result[i] = s
+	}
+	return result
+}
+
+func interfaceToStringSlice(items []interface{}) []string {
+	result := make([]string, 0, len(items))
+	for _, item := range items {
+		if str, ok := item.(string); ok {
+			result = append(result, str)
+		}
+	}
+	return result
+}
+
+func mergeUnique(existing, new []string) []string {
+	seen := make(map[string]bool)
+	result := make([]string, 0)
+
+	// Add existing items
+	for _, item := range existing {
+		if !seen[item] {
+			seen[item] = true
+			result = append(result, item)
+		}
+	}
+
+	// Add new items
+	for _, item := range new {
+		if !seen[item] {
+			seen[item] = true
+			result = append(result, item)
+		}
+	}
+
+	return result
+}
diff --git a/services/memory-tools/internal/domain/search/ranking.go b/services/memory-tools/internal/domain/search/ranking.go
new file mode 100644
index 00000000..45d3bc85
--- /dev/null
+++ b/services/memory-tools/internal/domain/search/ranking.go
@@ -0,0 +1,143 @@
+package search
+
+import (
+	"sort"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+)
+
+// RankedResult represents a search result with a combined score
+type RankedResult struct {
+	Item  interface{}
+	Score float32
+	Type  string // "user_memory", "project_fact", "episodic"
+}
+
+// Ranker handles result ranking and fusion
+type Ranker struct {
+	denseWeight   float32
+	sparseWeight  float32
+	lexicalWeight float32
+}
+
+// NewRanker creates a new ranker with default weights
+func NewRanker() *Ranker {
+	return &Ranker{
+		denseWeight:   0.7,
+		sparseWeight:  0.2,
+		lexicalWeight: 0.1,
+	}
+}
+
+// RankUserMemory ranks user memory items by weighted score
+func (r *Ranker) RankUserMemory(items []memory.UserMemoryItem) []RankedResult {
+	results := make([]RankedResult, len(items))
+
+	for i, item := range items {
+		// Score = similarity * (importance_score / 5.0)
+		score := item.Similarity * (float32(item.Score) / 5.0)
+
+		results[i] = RankedResult{
+			Item:  item,
+			Score: score,
+			Type:  "user_memory",
+		}
+	}
+
+	return results
+}
+
+// RankProjectFacts ranks project facts by weighted score
+func (r *Ranker) RankProjectFacts(facts []memory.ProjectFact) []RankedResult {
+	results := make([]RankedResult, len(facts))
+
+	for i, fact := range facts {
+		// Score = similarity * confidence
+		score := fact.Similarity * fact.Confidence
+
+		results[i] = RankedResult{
+			Item:  fact,
+			Score: score,
+			Type:  "project_fact",
+		}
+	}
+
+	return results
+}
+
+// RankEpisodicEvents ranks episodic events by weighted score
+func (r *Ranker) RankEpisodicEvents(events []memory.EpisodicEvent) []RankedResult {
+	results := make([]RankedResult, len(events))
+
+	for i, event := range events {
+		// Score = similarity * 0.8 (slightly lower weight for episodic)
+		score := event.Similarity * 0.8
+
+		results[i] = RankedResult{
+			Item:  event,
+			Score: score,
+			Type:  "episodic",
+		}
+	}
+
+	return results
+}
+
+// CombineAndRank combines results from multiple sources and ranks them
+func (r *Ranker) CombineAndRank(
+	userMemory []memory.UserMemoryItem,
+	projectFacts []memory.ProjectFact,
+	episodicEvents []memory.EpisodicEvent,
+) []RankedResult {
+	var allResults []RankedResult
+
+	// Add user memory results
+	allResults = append(allResults, r.RankUserMemory(userMemory)...)
+
+	// Add project facts
+	allResults = append(allResults, r.RankProjectFacts(projectFacts)...)
+
+	// Add episodic events
+	allResults = append(allResults, r.RankEpisodicEvents(episodicEvents)...)
+
+	// Sort by score descending
+	sort.Slice(allResults, func(i, j int) bool {
+		return allResults[i].Score > allResults[j].Score
+	})
+
+	return allResults
+}
+
+// GetTopK returns the top K results
+func (r *Ranker) GetTopK(results []RankedResult, k int) []RankedResult {
+	if k > len(results) {
+		k = len(results)
+	}
+	return results[:k]
+}
+
+// SeparateByType separates ranked results back into their original types
+func (r *Ranker) SeparateByType(results []RankedResult) (
+	userMemory []memory.UserMemoryItem,
+	projectFacts []memory.ProjectFact,
+	episodicEvents []memory.EpisodicEvent,
+) {
+	for _, result := range results {
+		switch result.Type {
+		case "user_memory":
+			if item, ok := result.Item.(memory.UserMemoryItem); ok {
+				userMemory = append(userMemory, item)
+			}
+		case "project_fact":
+			if fact, ok := result.Item.(memory.ProjectFact); ok {
+				projectFacts = append(projectFacts, fact)
+			}
+		case "episodic":
+			if event, ok := result.Item.(memory.EpisodicEvent); ok {
+				episodicEvents = append(episodicEvents, event)
+			}
+		}
+	}
+
+	return
+}
diff --git a/services/memory-tools/internal/domain/search/vector_search.go b/services/memory-tools/internal/domain/search/vector_search.go
new file mode 100644
index 00000000..942b9d4e
--- /dev/null
+++ b/services/memory-tools/internal/domain/search/vector_search.go
@@ -0,0 +1,184 @@
+package search
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/jackc/pgx/v5/pgxpool"
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+)
+
+// VectorSearcher handles vector similarity search operations
+type VectorSearcher struct {
+	db *pgxpool.Pool
+}
+
+// NewVectorSearcher creates a new vector searcher
+func NewVectorSearcher(db *pgxpool.Pool) *VectorSearcher {
+	return &VectorSearcher{db: db}
+}
+
+// SearchUserMemory performs vector similarity search on user memory
+func (s *VectorSearcher) SearchUserMemory(
+	ctx context.Context,
+	userID string,
+	queryEmbedding []float32,
+	limit int,
+	minSimilarity float32,
+) ([]memory.UserMemoryItem, error) {
+	query := `
+		SELECT 
+			id, user_id, scope, key, text, score, created_at, updated_at,
+			1 - (embedding <=> $1::vector) AS similarity
+		FROM user_memory_items
+		WHERE user_id = $2 
+		  AND is_deleted = false
+		  AND score >= 2
+		  AND 1 - (embedding <=> $1::vector) >= $3
+		ORDER BY embedding <=> $1::vector
+		LIMIT $4
+	`
+
+	rows, err := s.db.Query(ctx, query,
+		embeddingToString(queryEmbedding),
+		userID,
+		minSimilarity,
+		limit,
+	)
+	if err != nil {
+		return nil, fmt.Errorf("search user memory: %w", err)
+	}
+	defer rows.Close()
+
+	var items []memory.UserMemoryItem
+	for rows.Next() {
+		var item memory.UserMemoryItem
+		err := rows.Scan(
+			&item.ID, &item.UserID, &item.Scope, &item.Key,
+			&item.Text, &item.Score, &item.CreatedAt, &item.UpdatedAt,
+			&item.Similarity,
+		)
+		if err != nil {
+			return nil, fmt.Errorf("scan row: %w", err)
+		}
+		items = append(items, item)
+	}
+
+	return items, nil
+}
+
+// SearchProjectFacts performs vector similarity search on project facts
+func (s *VectorSearcher) SearchProjectFacts(
+	ctx context.Context,
+	projectID string,
+	queryEmbedding []float32,
+	limit int,
+	minSimilarity float32,
+) ([]memory.ProjectFact, error) {
+	query := `
+		SELECT 
+			id, project_id, kind, title, text, confidence,
+			source_conversation_id, created_at, updated_at,
+			1 - (embedding <=> $1::vector) AS similarity
+		FROM project_facts
+		WHERE project_id = $2 
+		  AND is_deleted = false
+		  AND confidence >= 0.7
+		  AND 1 - (embedding <=> $1::vector) >= $3
+		ORDER BY embedding <=> $1::vector
+		LIMIT $4
+	`
+
+	rows, err := s.db.Query(ctx, query,
+		embeddingToString(queryEmbedding),
+		projectID,
+		minSimilarity,
+		limit,
+	)
+	if err != nil {
+		return nil, fmt.Errorf("search project facts: %w", err)
+	}
+	defer rows.Close()
+
+	var facts []memory.ProjectFact
+	for rows.Next() {
+		var fact memory.ProjectFact
+		err := rows.Scan(
+			&fact.ID, &fact.ProjectID, &fact.Kind, &fact.Title,
+			&fact.Text, &fact.Confidence, &fact.SourceConversationID,
+			&fact.CreatedAt, &fact.UpdatedAt, &fact.Similarity,
+		)
+		if err != nil {
+			return nil, fmt.Errorf("scan row: %w", err)
+		}
+		facts = append(facts, fact)
+	}
+
+	return facts, nil
+}
+
+// SearchEpisodicEvents performs vector similarity search on episodic events
+func (s *VectorSearcher) SearchEpisodicEvents(
+	ctx context.Context,
+	userID string,
+	queryEmbedding []float32,
+	limit int,
+	minSimilarity float32,
+) ([]memory.EpisodicEvent, error) {
+	query := `
+		SELECT 
+			id, user_id, project_id, conversation_id, time, text, kind, created_at,
+			1 - (embedding <=> $1::vector) AS similarity
+		FROM episodic_events
+		WHERE user_id = $2 
+		  AND is_deleted = false
+		  AND time > NOW() - INTERVAL '2 weeks'
+		  AND 1 - (embedding <=> $1::vector) >= $3
+		ORDER BY embedding <=> $1::vector
+		LIMIT $4
+	`
+
+	rows, err := s.db.Query(ctx, query,
+		embeddingToString(queryEmbedding),
+		userID,
+		minSimilarity,
+		limit,
+	)
+	if err != nil {
+		return nil, fmt.Errorf("search episodic events: %w", err)
+	}
+	defer rows.Close()
+
+	var events []memory.EpisodicEvent
+	for rows.Next() {
+		var event memory.EpisodicEvent
+		err := rows.Scan(
+			&event.ID, &event.UserID, &event.ProjectID, &event.ConversationID,
+			&event.Time, &event.Text, &event.Kind, &event.CreatedAt,
+			&event.Similarity,
+		)
+		if err != nil {
+			return nil, fmt.Errorf("scan row: %w", err)
+		}
+		events = append(events, event)
+	}
+
+	return events, nil
+}
+
+// Helper function to convert embedding to pgvector string format
+func embeddingToString(embedding []float32) string {
+	if len(embedding) == 0 {
+		return "[]"
+	}
+
+	result := "["
+	for i, val := range embedding {
+		if i > 0 {
+			result += ","
+		}
+		result += fmt.Sprintf("%f", val)
+	}
+	result += "]"
+	return result
+}
diff --git a/services/memory-tools/internal/infrastructure/cache/embedding_cache.go b/services/memory-tools/internal/infrastructure/cache/embedding_cache.go
new file mode 100644
index 00000000..3305b2c8
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/cache/embedding_cache.go
@@ -0,0 +1,112 @@
+package cache
+
+import (
+	"context"
+	"encoding/binary"
+	"math"
+	"time"
+)
+
+// EmbeddingCache wraps RedisCache for embedding-specific operations
+type EmbeddingCache struct {
+	cache     *RedisCache
+	keyPrefix string
+	ttl       time.Duration
+}
+
+// NewEmbeddingCache creates a new embedding cache
+func NewEmbeddingCache(redisURL, keyPrefix string, ttl time.Duration) (*EmbeddingCache, error) {
+	cache, err := NewRedisCache(redisURL)
+	if err != nil {
+		return nil, err
+	}
+
+	return &EmbeddingCache{
+		cache:     cache,
+		keyPrefix: keyPrefix,
+		ttl:       ttl,
+	}, nil
+}
+
+// Get retrieves an embedding from cache
+func (c *EmbeddingCache) Get(key string) ([]float32, bool) {
+	ctx := context.Background()
+	data, err := c.cache.client.Get(ctx, c.keyPrefix+key).Bytes()
+	if err != nil {
+		return nil, false
+	}
+
+	// Deserialize float32 array
+	embedding := make([]float32, len(data)/4)
+	for i := range embedding {
+		bits := binary.LittleEndian.Uint32(data[i*4:])
+		embedding[i] = math.Float32frombits(bits)
+	}
+
+	return embedding, true
+}
+
+// Set stores an embedding in cache
+func (c *EmbeddingCache) Set(key string, value []float32, ttl time.Duration) {
+	ctx := context.Background()
+
+	// Serialize float32 array
+	data := make([]byte, len(value)*4)
+	for i, f := range value {
+		bits := math.Float32bits(f)
+		binary.LittleEndian.PutUint32(data[i*4:], bits)
+	}
+
+	if ttl == 0 {
+		ttl = c.ttl
+	}
+
+	c.cache.client.Set(ctx, c.keyPrefix+key, data, ttl)
+}
+
+// Delete removes an embedding from cache
+func (c *EmbeddingCache) Delete(key string) error {
+	ctx := context.Background()
+	return c.cache.Delete(ctx, c.keyPrefix+key)
+}
+
+// Clear removes all cached embeddings with the prefix
+func (c *EmbeddingCache) Clear() error {
+	ctx := context.Background()
+	return c.cache.DeletePattern(ctx, c.keyPrefix+"*")
+}
+
+// Stats returns cache statistics
+func (c *EmbeddingCache) Stats() (map[string]interface{}, error) {
+	ctx := context.Background()
+
+	info, err := c.cache.client.Info(ctx, "stats").Result()
+	if err != nil {
+		return nil, err
+	}
+
+	// Count keys with our prefix
+	var count int64
+	iter := c.cache.client.Scan(ctx, 0, c.keyPrefix+"*", 0).Iterator()
+	for iter.Next(ctx) {
+		count++
+	}
+
+	return map[string]interface{}{
+		"type":       "redis",
+		"key_prefix": c.keyPrefix,
+		"key_count":  count,
+		"ttl":        c.ttl.String(),
+		"info":       info,
+	}, nil
+}
+
+// Close closes the underlying Redis connection
+func (c *EmbeddingCache) Close() error {
+	return c.cache.Close()
+}
+
+// HealthCheck checks if the cache is healthy
+func (c *EmbeddingCache) HealthCheck(ctx context.Context) error {
+	return c.cache.HealthCheck(ctx)
+}
diff --git a/apps/jan-api-gateway/application/app/infrastructure/cache/redis_cache_service.go b/services/memory-tools/internal/infrastructure/cache/redis_cache.go
similarity index 51%
rename from apps/jan-api-gateway/application/app/infrastructure/cache/redis_cache_service.go
rename to services/memory-tools/internal/infrastructure/cache/redis_cache.go
index 4bbd5d89..88873a39 100644
--- a/apps/jan-api-gateway/application/app/infrastructure/cache/redis_cache_service.go
+++ b/services/memory-tools/internal/infrastructure/cache/redis_cache.go
@@ -2,6 +2,7 @@ package cache
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"strings"
 	"time"
@@ -9,36 +10,28 @@ import (
 	"github.com/go-redsync/redsync/v4"
 	"github.com/go-redsync/redsync/v4/redis/goredis/v9"
 	"github.com/redis/go-redis/v9"
-	"menlo.ai/jan-api-gateway/app/utils/logger"
-	"menlo.ai/jan-api-gateway/config/environment_variables"
+	"github.com/rs/zerolog/log"
 )
 
-type RedisCacheService struct {
+const CacheVersion = "v1"
+
+type RedisCache struct {
 	client redis.UniversalClient
 	rs     *redsync.Redsync
 }
 
-func NewRedisCacheService() *RedisCacheService {
-	redisURL := environment_variables.EnvironmentVariables.REDIS_URL
+func NewRedisCache(redisURL string) (*RedisCache, error) {
 	if redisURL == "" {
-		panic("REDIS_URL environment variable must be set")
+		return nil, fmt.Errorf("Redis URL must be provided")
 	}
 
 	opts, err := buildUniversalOptions(redisURL)
 	if err != nil {
-		panic(fmt.Sprintf("failed to parse Redis URL: %v", err))
-	}
-
-	if pwd := environment_variables.EnvironmentVariables.REDIS_PASSWORD; pwd != "" {
-		opts.Password = pwd
-	}
-
-	if dbVal := environment_variables.EnvironmentVariables.REDIS_DB; dbVal != 0 {
-		opts.DB = dbVal
+		return nil, fmt.Errorf("failed to parse Redis URL: %w", err)
 	}
 
 	if len(opts.Addrs) > 1 && opts.DB != 0 {
-		logger.GetLogger().Warn("Ignoring non-zero REDIS_DB when using Redis Cluster configuration")
+		log.Warn().Msg("Ignoring non-zero DB when using Redis Cluster configuration")
 		opts.DB = 0
 	}
 
@@ -48,17 +41,15 @@ func NewRedisCacheService() *RedisCacheService {
 	defer cancel()
 
 	if err := client.Ping(ctx).Err(); err != nil {
-		panic(fmt.Sprintf("failed to connect to Redis: %v", err))
+		return nil, fmt.Errorf("failed to connect to Redis: %w", err)
 	}
 
-	logger.GetLogger().Info("Successfully connected to Redis")
-
+	log.Info().Msg("Successfully connected to Redis cache")
 	rs := redsync.New(goredis.NewPool(client))
-
-	return &RedisCacheService{
+	return &RedisCache{
 		client: client,
 		rs:     rs,
-	}
+	}, nil
 }
 
 func buildUniversalOptions(raw string) (*redis.UniversalOptions, error) {
@@ -126,23 +117,44 @@ func buildUniversalOptions(raw string) (*redis.UniversalOptions, error) {
 	return opts, nil
 }
 
-func (r *RedisCacheService) Set(ctx context.Context, key string, value string, expiration time.Duration) error {
+func (r *RedisCache) Set(ctx context.Context, key string, value string, expiration time.Duration) error {
 	return r.client.Set(ctx, key, value, expiration).Err()
 }
 
-func (r *RedisCacheService) Get(ctx context.Context, key string) (string, error) {
+func (r *RedisCache) SetWithTimeout(ctx context.Context, key string, value string, expiration time.Duration, timeout time.Duration) error {
+	timeoutCtx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+	return r.client.Set(timeoutCtx, key, value, expiration).Err()
+}
+
+func (r *RedisCache) Get(ctx context.Context, key string) (string, error) {
 	val, err := r.client.Get(ctx, key).Result()
 	if err != nil {
+		// Cache miss is a normal condition in cache-aside pattern - return redis.Nil as-is
+		// Callers should check with errors.Is(err, redis.Nil)
 		if err == redis.Nil {
-			return "", fmt.Errorf("key not found: %s", key)
+			return "", redis.Nil
 		}
-		return "", fmt.Errorf("failed to get value: %w", err)
+		return "", fmt.Errorf("failed to get value from cache: %w", err)
 	}
 
 	return val, nil
 }
 
-func (r *RedisCacheService) GetWithFallback(ctx context.Context, key string, fallback func() (string, error), expiration time.Duration) (string, error) {
+func GetJSON[T any](ctx context.Context, rdb *RedisCache, key string) (*T, error) {
+	val, err := rdb.Get(ctx, key)
+	if err != nil {
+		return nil, err
+	}
+
+	var obj T
+	if unmarshalErr := json.Unmarshal([]byte(val), &obj); unmarshalErr != nil {
+		return nil, fmt.Errorf("failed to unmarshal JSON from cache: %w", unmarshalErr)
+	}
+	return &obj, nil
+}
+
+func (r *RedisCache) GetWithFallback(ctx context.Context, key string, fallback func() (string, error), expiration time.Duration) (string, error) {
 	result, err := r.Get(ctx, key)
 	if err == nil {
 		return result, nil
@@ -154,21 +166,21 @@ func (r *RedisCacheService) GetWithFallback(ctx context.Context, key string, fal
 	}
 
 	if err := r.Set(ctx, key, result, expiration); err != nil {
-		logger.GetLogger().Error(fmt.Sprintf("Failed to cache value: %v", err))
+		log.Error().Err(err).Msg("Failed to cache value")
 	}
 
 	return result, nil
 }
 
-func (r *RedisCacheService) Delete(ctx context.Context, key string) error {
+func (r *RedisCache) Delete(ctx context.Context, key string) error {
 	return r.client.Del(ctx, key).Err()
 }
 
-func (r *RedisCacheService) Unlink(ctx context.Context, key string) error {
+func (r *RedisCache) Unlink(ctx context.Context, key string) error {
 	return r.client.Unlink(ctx, key).Err()
 }
 
-func (r *RedisCacheService) DeletePattern(ctx context.Context, pattern string) error {
+func (r *RedisCache) DeletePattern(ctx context.Context, pattern string) error {
 	var cursor uint64
 	for {
 		keys, next, err := r.client.Scan(ctx, cursor, pattern, 1000).Result()
@@ -192,7 +204,7 @@ func (r *RedisCacheService) DeletePattern(ctx context.Context, pattern string) e
 	return nil
 }
 
-func (r *RedisCacheService) Exists(ctx context.Context, key string) (bool, error) {
+func (r *RedisCache) Exists(ctx context.Context, key string) (bool, error) {
 	result, err := r.client.Exists(ctx, key).Result()
 	if err != nil {
 		return false, fmt.Errorf("failed to check key existence: %w", err)
@@ -200,20 +212,38 @@ func (r *RedisCacheService) Exists(ctx context.Context, key string) (bool, error
 	return result > 0, nil
 }
 
-func (r *RedisCacheService) Close() error {
-	return r.client.Close()
+func (r *RedisCache) Incr(ctx context.Context, key string) (int64, error) {
+	return r.client.Incr(ctx, key).Result()
 }
 
-func (r *RedisCacheService) HealthCheck(ctx context.Context) error {
-	return r.client.Ping(ctx).Err()
+func (r *RedisCache) IncrWithTimeout(ctx context.Context, key string, timeout time.Duration) (int64, error) {
+	timeoutCtx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+	return r.Incr(timeoutCtx, key)
+}
+
+func (r *RedisCache) Expires(ctx context.Context, key string, duration time.Duration) error {
+	return r.client.Expire(ctx, key, duration).Err()
 }
 
-func (r *RedisCacheService) NewMutex(name string, options ...redsync.Option) *redsync.Mutex {
-	return r.rs.NewMutex(name, options...)
+func (r *RedisCache) EvalSha(ctx context.Context, sha1 string, keys []string, args ...interface{}) (any, error) {
+	return r.client.EvalSha(ctx, sha1, keys, args...).Result()
+}
+
+func (r *RedisCache) ScriptLoad(ctx context.Context, script string) (string, error) {
+	return r.client.ScriptLoad(ctx, script).Result()
+}
+
+func (r *RedisCache) Close() error {
+	return r.client.Close()
+}
+
+func (r *RedisCache) HealthCheck(ctx context.Context) error {
+	return r.client.Ping(ctx).Err()
 }
 
-func WithLock(cache RedisCacheService, lockName string, fn func() error, ttl time.Duration) error {
-	mutex := cache.NewMutex(lockName, redsync.WithExpiry(ttl))
+func WithLock(cache *RedisCache, lockName string, ttl time.Duration, fn func() error) error {
+	mutex := cache.rs.NewMutex(lockName, redsync.WithExpiry(ttl))
 
 	if err := mutex.Lock(); err != nil {
 		return err
@@ -221,6 +251,7 @@ func WithLock(cache RedisCacheService, lockName string, fn func() error, ttl tim
 
 	defer func() {
 		if _, err := mutex.Unlock(); err != nil {
+			log.Error().Err(err).Msg("Failed to unlock mutex")
 		}
 	}()
 
diff --git a/services/memory-tools/internal/infrastructure/database/dbschema/conversation_item.go b/services/memory-tools/internal/infrastructure/database/dbschema/conversation_item.go
new file mode 100644
index 00000000..0e697191
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/database/dbschema/conversation_item.go
@@ -0,0 +1,46 @@
+package dbschema
+
+import (
+	"time"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+)
+
+type ConversationItem struct {
+	ID             string    `db:"id"`
+	ConversationID string    `db:"conversation_id"`
+	Role           string    `db:"role"`
+	Content        string    `db:"content"`
+	ToolCalls      string    `db:"tool_calls"`
+	CreatedAt      time.Time `db:"created_at"`
+}
+
+func NewSchemaConversationItem(d *memory.ConversationItem) *ConversationItem {
+	if d == nil {
+		return nil
+	}
+
+	return &ConversationItem{
+		ID:             d.ID,
+		ConversationID: d.ConversationID,
+		Role:           d.Role,
+		Content:        d.Content,
+		ToolCalls:      d.ToolCalls,
+		CreatedAt:      d.CreatedAt,
+	}
+}
+
+func (s *ConversationItem) EtoD() *memory.ConversationItem {
+	if s == nil {
+		return nil
+	}
+
+	return &memory.ConversationItem{
+		ID:             s.ID,
+		ConversationID: s.ConversationID,
+		Role:           s.Role,
+		Content:        s.Content,
+		ToolCalls:      s.ToolCalls,
+		CreatedAt:      s.CreatedAt,
+	}
+}
diff --git a/services/memory-tools/internal/infrastructure/database/dbschema/episodic_event.go b/services/memory-tools/internal/infrastructure/database/dbschema/episodic_event.go
new file mode 100644
index 00000000..f1947938
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/database/dbschema/episodic_event.go
@@ -0,0 +1,58 @@
+package dbschema
+
+import (
+	"time"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+)
+
+type EpisodicEvent struct {
+	ID             string    `db:"id"`
+	UserID         string    `db:"user_id"`
+	ProjectID      string    `db:"project_id"`
+	ConversationID string    `db:"conversation_id"`
+	Time           time.Time `db:"time"`
+	Text           string    `db:"text"`
+	Kind           string    `db:"kind"`
+	Embedding      []float32 `db:"embedding"`
+	IsDeleted      bool      `db:"is_deleted"`
+	CreatedAt      time.Time `db:"created_at"`
+}
+
+func NewSchemaEpisodicEvent(d *memory.EpisodicEvent) *EpisodicEvent {
+	if d == nil {
+		return nil
+	}
+
+	return &EpisodicEvent{
+		ID:             d.ID,
+		UserID:         d.UserID,
+		ProjectID:      d.ProjectID,
+		ConversationID: d.ConversationID,
+		Time:           d.Time,
+		Text:           d.Text,
+		Kind:           d.Kind,
+		Embedding:      d.Embedding,
+		IsDeleted:      d.IsDeleted,
+		CreatedAt:      d.CreatedAt,
+	}
+}
+
+func (s *EpisodicEvent) EtoD() *memory.EpisodicEvent {
+	if s == nil {
+		return nil
+	}
+
+	return &memory.EpisodicEvent{
+		ID:             s.ID,
+		UserID:         s.UserID,
+		ProjectID:      s.ProjectID,
+		ConversationID: s.ConversationID,
+		Time:           s.Time,
+		Text:           s.Text,
+		Kind:           s.Kind,
+		Embedding:      s.Embedding,
+		IsDeleted:      s.IsDeleted,
+		CreatedAt:      s.CreatedAt,
+	}
+}
diff --git a/services/memory-tools/internal/infrastructure/database/dbschema/project_fact.go b/services/memory-tools/internal/infrastructure/database/dbschema/project_fact.go
new file mode 100644
index 00000000..445cbb9e
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/database/dbschema/project_fact.go
@@ -0,0 +1,61 @@
+package dbschema
+
+import (
+	"time"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+)
+
+type ProjectFact struct {
+	ID                   string    `db:"id"`
+	ProjectID            string    `db:"project_id"`
+	Kind                 string    `db:"kind"`
+	Title                string    `db:"title"`
+	Text                 string    `db:"text"`
+	Confidence           float32   `db:"confidence"`
+	Embedding            []float32 `db:"embedding"`
+	SourceConversationID string    `db:"source_conversation_id"`
+	IsDeleted            bool      `db:"is_deleted"`
+	CreatedAt            time.Time `db:"created_at"`
+	UpdatedAt            time.Time `db:"updated_at"`
+}
+
+func NewSchemaProjectFact(d *memory.ProjectFact) *ProjectFact {
+	if d == nil {
+		return nil
+	}
+
+	return &ProjectFact{
+		ID:                   d.ID,
+		ProjectID:            d.ProjectID,
+		Kind:                 d.Kind,
+		Title:                d.Title,
+		Text:                 d.Text,
+		Confidence:           d.Confidence,
+		Embedding:            d.Embedding,
+		SourceConversationID: d.SourceConversationID,
+		IsDeleted:            d.IsDeleted,
+		CreatedAt:            d.CreatedAt,
+		UpdatedAt:            d.UpdatedAt,
+	}
+}
+
+func (s *ProjectFact) EtoD() *memory.ProjectFact {
+	if s == nil {
+		return nil
+	}
+
+	return &memory.ProjectFact{
+		ID:                   s.ID,
+		ProjectID:            s.ProjectID,
+		Kind:                 s.Kind,
+		Title:                s.Title,
+		Text:                 s.Text,
+		Confidence:           s.Confidence,
+		Embedding:            s.Embedding,
+		SourceConversationID: s.SourceConversationID,
+		IsDeleted:            s.IsDeleted,
+		CreatedAt:            s.CreatedAt,
+		UpdatedAt:            s.UpdatedAt,
+	}
+}
diff --git a/services/memory-tools/internal/infrastructure/database/dbschema/user_memory_item.go b/services/memory-tools/internal/infrastructure/database/dbschema/user_memory_item.go
new file mode 100644
index 00000000..b2852c40
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/database/dbschema/user_memory_item.go
@@ -0,0 +1,58 @@
+package dbschema
+
+import (
+	"time"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+)
+
+type UserMemoryItem struct {
+	ID        string    `db:"id"`
+	UserID    string    `db:"user_id"`
+	Scope     string    `db:"scope"`
+	Key       string    `db:"key"`
+	Text      string    `db:"text"`
+	Score     int       `db:"score"`
+	Embedding []float32 `db:"embedding"`
+	IsDeleted bool      `db:"is_deleted"`
+	CreatedAt time.Time `db:"created_at"`
+	UpdatedAt time.Time `db:"updated_at"`
+}
+
+func NewSchemaUserMemoryItem(d *memory.UserMemoryItem) *UserMemoryItem {
+	if d == nil {
+		return nil
+	}
+
+	return &UserMemoryItem{
+		ID:        d.ID,
+		UserID:    d.UserID,
+		Scope:     d.Scope,
+		Key:       d.Key,
+		Text:      d.Text,
+		Score:     d.Score,
+		Embedding: d.Embedding,
+		IsDeleted: d.IsDeleted,
+		CreatedAt: d.CreatedAt,
+		UpdatedAt: d.UpdatedAt,
+	}
+}
+
+func (s *UserMemoryItem) EtoD() *memory.UserMemoryItem {
+	if s == nil {
+		return nil
+	}
+
+	return &memory.UserMemoryItem{
+		ID:        s.ID,
+		UserID:    s.UserID,
+		Scope:     s.Scope,
+		Key:       s.Key,
+		Text:      s.Text,
+		Score:     s.Score,
+		Embedding: s.Embedding,
+		IsDeleted: s.IsDeleted,
+		CreatedAt: s.CreatedAt,
+		UpdatedAt: s.UpdatedAt,
+	}
+}
diff --git a/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/conversation_item_repository.go b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/conversation_item_repository.go
new file mode 100644
index 00000000..4add6d8e
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/conversation_item_repository.go
@@ -0,0 +1,57 @@
+package memoryrepo
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+	"github.com/janhq/jan-server/services/memory-tools/internal/infrastructure/database/dbschema"
+)
+
+func (r *Repository) CreateConversationItem(ctx context.Context, item *memory.ConversationItem) error {
+	if item.ID == "" {
+		item.ID = uuid.New().String()
+	}
+
+	if item.CreatedAt.IsZero() {
+		item.CreatedAt = time.Now()
+	}
+
+	schema := dbschema.NewSchemaConversationItem(item)
+
+	if err := r.db.WithContext(ctx).
+		Table("conversation_items").
+		Create(map[string]any{
+			"id":              schema.ID,
+			"conversation_id": schema.ConversationID,
+			"role":            schema.Role,
+			"content":         schema.Content,
+			"tool_calls":      schema.ToolCalls,
+			"created_at":      schema.CreatedAt,
+		}).Error; err != nil {
+		return fmt.Errorf("create conversation item: %w", err)
+	}
+
+	return nil
+}
+
+func (r *Repository) GetConversationItems(ctx context.Context, conversationID string) ([]memory.ConversationItem, error) {
+	var rows []dbschema.ConversationItem
+	if err := r.db.WithContext(ctx).
+		Table("conversation_items").
+		Select("id, conversation_id, role, content, tool_calls, created_at").
+		Where("conversation_id = ?", conversationID).
+		Order("created_at ASC").
+		Find(&rows).Error; err != nil {
+		return nil, fmt.Errorf("query conversation items: %w", err)
+	}
+
+	items := make([]memory.ConversationItem, 0, len(rows))
+	for _, row := range rows {
+		items = append(items, *row.EtoD())
+	}
+
+	return items, nil
+}
diff --git a/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/episodic_event_repository.go b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/episodic_event_repository.go
new file mode 100644
index 00000000..195d3238
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/episodic_event_repository.go
@@ -0,0 +1,109 @@
+package memoryrepo
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+	"github.com/janhq/jan-server/services/memory-tools/internal/infrastructure/database/dbschema"
+	"gorm.io/gorm/clause"
+)
+
+func (r *Repository) GetEpisodicEvents(ctx context.Context, userID string, limit int) ([]memory.EpisodicEvent, error) {
+	var rows []dbschema.EpisodicEvent
+	if err := r.db.WithContext(ctx).
+		Table("episodic_events").
+		Select("id, user_id, project_id, conversation_id, time, text, kind, created_at").
+		Where("user_id = ? AND is_deleted = false", userID).
+		Order("time DESC").
+		Limit(limit).
+		Find(&rows).Error; err != nil {
+		return nil, fmt.Errorf("query episodic events: %w", err)
+	}
+
+	events := make([]memory.EpisodicEvent, 0, len(rows))
+	for _, row := range rows {
+		events = append(events, *row.EtoD())
+	}
+
+	return events, nil
+}
+
+func (r *Repository) CreateEpisodicEvent(ctx context.Context, event *memory.EpisodicEvent) error {
+	if event.ID == "" {
+		event.ID = uuid.New().String()
+	}
+
+	if event.CreatedAt.IsZero() {
+		event.CreatedAt = time.Now()
+	}
+
+	schema := dbschema.NewSchemaEpisodicEvent(event)
+
+	if err := r.db.WithContext(ctx).
+		Table("episodic_events").
+		Create(map[string]any{
+			"id":              schema.ID,
+			"user_id":         schema.UserID,
+			"project_id":      schema.ProjectID,
+			"conversation_id": schema.ConversationID,
+			"time":            schema.Time,
+			"text":            schema.Text,
+			"kind":            schema.Kind,
+			"embedding":       embeddingToString(schema.Embedding),
+			"is_deleted":      schema.IsDeleted,
+			"created_at":      schema.CreatedAt,
+		}).Error; err != nil {
+		return fmt.Errorf("create episodic event: %w", err)
+	}
+
+	return nil
+}
+
+func (r *Repository) DeleteEpisodicEvent(ctx context.Context, id string) error {
+	result := r.db.WithContext(ctx).
+		Table("episodic_events").
+		Where("id = ?", id).
+		Update("is_deleted", true)
+	if result.Error != nil {
+		return result.Error
+	}
+	if result.RowsAffected == 0 {
+		return fmt.Errorf("episodic event not found")
+	}
+	return nil
+}
+
+func (r *Repository) SearchEpisodicEvents(
+	ctx context.Context,
+	userID string,
+	queryEmbedding []float32,
+	limit int,
+	minSimilarity float32,
+) ([]memory.EpisodicEvent, error) {
+	var rows []struct {
+		dbschema.EpisodicEvent
+		Similarity float32 `db:"similarity"`
+	}
+
+	if err := r.db.WithContext(ctx).
+		Table("episodic_events").
+		Select("id, user_id, project_id, conversation_id, time, text, kind, created_at, 1 - (embedding <=> ?::vector) AS similarity", embeddingToString(queryEmbedding)).
+		Where("user_id = ? AND is_deleted = false AND time > NOW() - INTERVAL '2 weeks' AND 1 - (embedding <=> ?::vector) >= ?", userID, embeddingToString(queryEmbedding), minSimilarity).
+		Order(clause.Expr{SQL: "embedding <=> ?::vector", Vars: []any{embeddingToString(queryEmbedding)}}).
+		Limit(limit).
+		Scan(&rows).Error; err != nil {
+		return nil, fmt.Errorf("search episodic events: %w", err)
+	}
+
+	events := make([]memory.EpisodicEvent, 0, len(rows))
+	for _, row := range rows {
+		event := row.EpisodicEvent.EtoD()
+		event.Similarity = row.Similarity
+		events = append(events, *event)
+	}
+
+	return events, nil
+}
diff --git a/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/project_fact_repository.go b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/project_fact_repository.go
new file mode 100644
index 00000000..efb8ff41
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/project_fact_repository.go
@@ -0,0 +1,123 @@
+package memoryrepo
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+	"github.com/janhq/jan-server/services/memory-tools/internal/infrastructure/database/dbschema"
+	"gorm.io/gorm/clause"
+)
+
+func (r *Repository) GetProjectFacts(ctx context.Context, projectID string) ([]memory.ProjectFact, error) {
+	query := `
+		id, project_id, kind, title, text, confidence, 
+		source_conversation_id, created_at, updated_at
+	`
+
+	var rows []dbschema.ProjectFact
+	if err := r.db.WithContext(ctx).
+		Table("project_facts").
+		Select(query).
+		Where("project_id = ? AND is_deleted = false", projectID).
+		Order("confidence DESC, updated_at DESC").
+		Find(&rows).Error; err != nil {
+		return nil, fmt.Errorf("query project facts: %w", err)
+	}
+
+	facts := make([]memory.ProjectFact, 0, len(rows))
+	for _, row := range rows {
+		facts = append(facts, *row.EtoD())
+	}
+
+	return facts, nil
+}
+
+func (r *Repository) UpsertProjectFact(ctx context.Context, fact *memory.ProjectFact) (string, error) {
+	if fact.ID == "" {
+		fact.ID = uuid.New().String()
+	}
+
+	now := time.Now()
+	if fact.CreatedAt.IsZero() {
+		fact.CreatedAt = now
+	}
+	fact.UpdatedAt = now
+
+	schema := dbschema.NewSchemaProjectFact(fact)
+
+	if err := r.db.WithContext(ctx).
+		Table("project_facts").
+		Clauses(clause.OnConflict{
+			Columns:   []clause.Column{{Name: "id"}},
+			DoUpdates: clause.AssignmentColumns([]string{"kind", "title", "text", "confidence", "embedding", "is_deleted", "updated_at"}),
+		}).
+		Create(map[string]any{
+			"id":                     schema.ID,
+			"project_id":             schema.ProjectID,
+			"kind":                   schema.Kind,
+			"title":                  schema.Title,
+			"text":                   schema.Text,
+			"confidence":             schema.Confidence,
+			"embedding":              embeddingToString(schema.Embedding),
+			"source_conversation_id": schema.SourceConversationID,
+			"is_deleted":             schema.IsDeleted,
+			"created_at":             schema.CreatedAt,
+			"updated_at":             schema.UpdatedAt,
+		}).Error; err != nil {
+		return "", fmt.Errorf("upsert project fact: %w", err)
+	}
+
+	return schema.ID, nil
+}
+
+func (r *Repository) DeleteProjectFact(ctx context.Context, id string) error {
+	result := r.db.WithContext(ctx).
+		Table("project_facts").
+		Where("id = ?", id).
+		Updates(map[string]any{
+			"is_deleted": true,
+			"updated_at": time.Now(),
+		})
+	if result.Error != nil {
+		return result.Error
+	}
+	if result.RowsAffected == 0 {
+		return fmt.Errorf("project fact not found")
+	}
+	return nil
+}
+
+func (r *Repository) SearchProjectFacts(
+	ctx context.Context,
+	projectID string,
+	queryEmbedding []float32,
+	limit int,
+	minSimilarity float32,
+) ([]memory.ProjectFact, error) {
+	var rows []struct {
+		dbschema.ProjectFact
+		Similarity float32 `db:"similarity"`
+	}
+
+	if err := r.db.WithContext(ctx).
+		Table("project_facts").
+		Select("id, project_id, kind, title, text, confidence, source_conversation_id, created_at, updated_at, 1 - (embedding <=> ?::vector) AS similarity", embeddingToString(queryEmbedding)).
+		Where("project_id = ? AND is_deleted = false AND confidence >= 0.7 AND 1 - (embedding <=> ?::vector) >= ?", projectID, embeddingToString(queryEmbedding), minSimilarity).
+		Order(clause.Expr{SQL: "embedding <=> ?::vector", Vars: []any{embeddingToString(queryEmbedding)}}).
+		Limit(limit).
+		Scan(&rows).Error; err != nil {
+		return nil, fmt.Errorf("search project facts: %w", err)
+	}
+
+	facts := make([]memory.ProjectFact, 0, len(rows))
+	for _, row := range rows {
+		fact := row.ProjectFact.EtoD()
+		fact.Similarity = row.Similarity
+		facts = append(facts, *fact)
+	}
+
+	return facts, nil
+}
diff --git a/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/repository.go b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/repository.go
new file mode 100644
index 00000000..722feda6
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/repository.go
@@ -0,0 +1,52 @@
+package memoryrepo
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+	"gorm.io/gorm"
+)
+
+type Repository struct {
+	db *gorm.DB
+}
+
+func NewRepository(db *gorm.DB) *Repository {
+	return &Repository{db: db}
+}
+
+// helper converts embeddings to pgvector literal.
+func embeddingToString(embedding []float32) string {
+	if len(embedding) == 0 {
+		return "[]"
+	}
+
+	parts := make([]string, len(embedding))
+	for i, val := range embedding {
+		parts[i] = fmt.Sprintf("%f", val)
+	}
+	return "[" + strings.Join(parts, ",") + "]"
+}
+
+// ensure interfaces are implemented
+var _ interface {
+	GetUserMemoryItems(ctx context.Context, userID string) ([]memory.UserMemoryItem, error)
+	UpsertUserMemoryItem(ctx context.Context, item *memory.UserMemoryItem) (string, error)
+	DeleteUserMemoryItem(ctx context.Context, id string) error
+	SearchUserMemory(ctx context.Context, userID string, queryEmbedding []float32, limit int, minSimilarity float32) ([]memory.UserMemoryItem, error)
+
+	GetProjectFacts(ctx context.Context, projectID string) ([]memory.ProjectFact, error)
+	UpsertProjectFact(ctx context.Context, fact *memory.ProjectFact) (string, error)
+	DeleteProjectFact(ctx context.Context, id string) error
+	SearchProjectFacts(ctx context.Context, projectID string, queryEmbedding []float32, limit int, minSimilarity float32) ([]memory.ProjectFact, error)
+
+	GetEpisodicEvents(ctx context.Context, userID string, limit int) ([]memory.EpisodicEvent, error)
+	CreateEpisodicEvent(ctx context.Context, event *memory.EpisodicEvent) error
+	DeleteEpisodicEvent(ctx context.Context, id string) error
+	SearchEpisodicEvents(ctx context.Context, userID string, queryEmbedding []float32, limit int, minSimilarity float32) ([]memory.EpisodicEvent, error)
+
+	CreateConversationItem(ctx context.Context, item *memory.ConversationItem) error
+	GetConversationItems(ctx context.Context, conversationID string) ([]memory.ConversationItem, error)
+} = (*Repository)(nil)
diff --git a/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/user_memory_repository.go b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/user_memory_repository.go
new file mode 100644
index 00000000..6e5a38a9
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/database/repository/memoryrepo/user_memory_repository.go
@@ -0,0 +1,121 @@
+package memoryrepo
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+	"github.com/janhq/jan-server/services/memory-tools/internal/infrastructure/database/dbschema"
+	"gorm.io/gorm/clause"
+)
+
+func (r *Repository) GetUserMemoryItems(ctx context.Context, userID string) ([]memory.UserMemoryItem, error) {
+	query := `
+		id, user_id, scope, key, text, score, created_at, updated_at
+	`
+
+	var rows []dbschema.UserMemoryItem
+	if err := r.db.WithContext(ctx).
+		Table("user_memory_items").
+		Select(query).
+		Where("user_id = ? AND is_deleted = false", userID).
+		Order("score DESC, updated_at DESC").
+		Find(&rows).Error; err != nil {
+		return nil, fmt.Errorf("query user memory: %w", err)
+	}
+
+	items := make([]memory.UserMemoryItem, 0, len(rows))
+	for _, row := range rows {
+		items = append(items, *row.EtoD())
+	}
+
+	return items, nil
+}
+
+func (r *Repository) UpsertUserMemoryItem(ctx context.Context, item *memory.UserMemoryItem) (string, error) {
+	if item.ID == "" {
+		item.ID = uuid.New().String()
+	}
+
+	now := time.Now()
+	if item.CreatedAt.IsZero() {
+		item.CreatedAt = now
+	}
+	item.UpdatedAt = now
+
+	schema := dbschema.NewSchemaUserMemoryItem(item)
+
+	if err := r.db.WithContext(ctx).
+		Table("user_memory_items").
+		Clauses(clause.OnConflict{
+			Columns:   []clause.Column{{Name: "id"}},
+			DoUpdates: clause.AssignmentColumns([]string{"scope", "key", "text", "score", "embedding", "is_deleted", "updated_at"}),
+		}).
+		Create(map[string]any{
+			"id":         schema.ID,
+			"user_id":    schema.UserID,
+			"scope":      schema.Scope,
+			"key":        schema.Key,
+			"text":       schema.Text,
+			"score":      schema.Score,
+			"embedding":  embeddingToString(schema.Embedding),
+			"is_deleted": schema.IsDeleted,
+			"created_at": schema.CreatedAt,
+			"updated_at": schema.UpdatedAt,
+		}).Error; err != nil {
+		return "", fmt.Errorf("upsert user memory item: %w", err)
+	}
+
+	return schema.ID, nil
+}
+
+func (r *Repository) DeleteUserMemoryItem(ctx context.Context, id string) error {
+	result := r.db.WithContext(ctx).
+		Table("user_memory_items").
+		Where("id = ?", id).
+		Updates(map[string]any{
+			"is_deleted": true,
+			"updated_at": time.Now(),
+		})
+	if result.Error != nil {
+		return result.Error
+	}
+	if result.RowsAffected == 0 {
+		return fmt.Errorf("user memory item not found")
+	}
+	return nil
+}
+
+func (r *Repository) SearchUserMemory(
+	ctx context.Context,
+	userID string,
+	queryEmbedding []float32,
+	limit int,
+	minSimilarity float32,
+) ([]memory.UserMemoryItem, error) {
+	var rows []struct {
+		dbschema.UserMemoryItem
+		Similarity float32 `db:"similarity"`
+	}
+
+	if err := r.db.WithContext(ctx).
+		Table("user_memory_items").
+		Select("id, user_id, scope, key, text, score, created_at, updated_at, 1 - (embedding <=> ?::vector) AS similarity", embeddingToString(queryEmbedding)).
+		Where("user_id = ? AND is_deleted = false AND score >= 2 AND 1 - (embedding <=> ?::vector) >= ?", userID, embeddingToString(queryEmbedding), minSimilarity).
+		Order(clause.Expr{SQL: "embedding <=> ?::vector", Vars: []any{embeddingToString(queryEmbedding)}}).
+		Limit(limit).
+		Scan(&rows).Error; err != nil {
+		return nil, fmt.Errorf("search user memory: %w", err)
+	}
+
+	items := make([]memory.UserMemoryItem, 0, len(rows))
+	for _, row := range rows {
+		item := row.UserMemoryItem.EtoD()
+		item.Similarity = row.Similarity
+		items = append(items, *item)
+	}
+
+	return items, nil
+}
diff --git a/services/memory-tools/internal/infrastructure/http/embedding_client.go b/services/memory-tools/internal/infrastructure/http/embedding_client.go
new file mode 100644
index 00000000..c1badd25
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/http/embedding_client.go
@@ -0,0 +1,208 @@
+package http
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"github.com/rs/zerolog/log"
+)
+
+// EmbeddingClient is an HTTP client for the BGE-M3 embedding service
+type EmbeddingClient struct {
+	baseURL    string
+	apiKey     string
+	httpClient *http.Client
+}
+
+// EmbedRequest represents a request to the embedding service
+type EmbedRequest struct {
+	Inputs    interface{} `json:"inputs"` // string or []string
+	Normalize bool        `json:"normalize"`
+	Truncate  bool        `json:"truncate"`
+}
+
+// EmbedResponse represents the response from the embedding service
+type EmbedResponse [][]float32
+
+// ModelInfo represents model information
+type ModelInfo struct {
+	ModelID        string `json:"model_id"`
+	MaxInputLength int    `json:"max_input_length"`
+}
+
+// NewEmbeddingClient creates a new embedding HTTP client
+func NewEmbeddingClient(baseURL, apiKey string, timeout time.Duration) *EmbeddingClient {
+	if timeout == 0 {
+		timeout = 30 * time.Second
+	}
+
+	return &EmbeddingClient{
+		baseURL: baseURL,
+		apiKey:  apiKey,
+		httpClient: &http.Client{
+			Timeout: timeout,
+		},
+	}
+}
+
+// Embed generates embeddings for the given texts
+func (c *EmbeddingClient) Embed(ctx context.Context, texts []string) ([][]float32, error) {
+	reqBody := EmbedRequest{
+		Inputs:    texts,
+		Normalize: true,
+		Truncate:  true,
+	}
+	log.Info().
+		Int("text_count", len(texts)).
+		Str("endpoint", c.baseURL+"/embed").
+		Msg("embedding request")
+
+	jsonData, err := json.Marshal(reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/embed", bytes.NewBuffer(jsonData))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+
+	req.Header.Set("Content-Type", "application/json")
+	if c.apiKey != "" {
+		req.Header.Set("Authorization", "Bearer "+c.apiKey)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	bodyBytes, _ := io.ReadAll(resp.Body)
+
+	if resp.StatusCode != http.StatusOK {
+		log.Error().
+			Int("status", resp.StatusCode).
+			Str("endpoint", c.baseURL+"/embed").
+			Msg("embedding request failed")
+		return nil, fmt.Errorf("embedding service returned status %d: %s", resp.StatusCode, string(bodyBytes))
+	}
+
+	var embeddings EmbedResponse
+	if err := json.Unmarshal(bodyBytes, &embeddings); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+
+	log.Info().
+		Int("status", resp.StatusCode).
+		Int("embeddings", len(embeddings)).
+		Int("dimension", func() int {
+			if len(embeddings) > 0 {
+				return len(embeddings[0])
+			}
+			return 0
+		}()).
+		Msg("embedding response")
+
+	return embeddings, nil
+}
+
+// EmbedSingle generates an embedding for a single text
+func (c *EmbeddingClient) EmbedSingle(ctx context.Context, text string) ([]float32, error) {
+	embeddings, err := c.Embed(ctx, []string{text})
+	if err != nil {
+		return nil, err
+	}
+
+	if len(embeddings) == 0 {
+		return nil, fmt.Errorf("no embeddings returned")
+	}
+
+	return embeddings[0], nil
+}
+
+// Health checks the health of the embedding service
+func (c *EmbeddingClient) Health(ctx context.Context) error {
+	req, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/health", nil)
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("health check failed with status %d", resp.StatusCode)
+	}
+
+	return nil
+}
+
+// Info retrieves model information
+func (c *EmbeddingClient) Info(ctx context.Context) (*ModelInfo, error) {
+	req, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/info", nil)
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("info request failed with status %d", resp.StatusCode)
+	}
+
+	var info ModelInfo
+	if err := json.NewDecoder(resp.Body).Decode(&info); err != nil {
+		return nil, fmt.Errorf("decode response: %w", err)
+	}
+
+	return &info, nil
+}
+
+// ValidateServer validates the embedding server
+func (c *EmbeddingClient) ValidateServer(ctx context.Context) error {
+	// Check health
+	if err := c.Health(ctx); err != nil {
+		return fmt.Errorf("health check failed: %w", err)
+	}
+
+	// Check model info
+	info, err := c.Info(ctx)
+	if err != nil {
+		return fmt.Errorf("info check failed: %w", err)
+	}
+
+	// Verify it's BGE-M3
+	if info.ModelID != "BAAI/bge-m3" {
+		log.Warn().Str("model", info.ModelID).Msg("Expected BGE-M3, got different model")
+	}
+
+	// Test embedding
+	embeddings, err := c.Embed(ctx, []string{"test"})
+	if err != nil {
+		return fmt.Errorf("test embedding failed: %w", err)
+	}
+
+	if len(embeddings) == 0 || len(embeddings[0]) != 1024 {
+		return fmt.Errorf("expected 1024 dimensions, got %d", len(embeddings[0]))
+	}
+
+	log.Info().
+		Str("model", info.ModelID).
+		Int("max_input_length", info.MaxInputLength).
+		Msg("Embedding server validated")
+
+	return nil
+}
diff --git a/services/memory-tools/internal/infrastructure/llm/client.go b/services/memory-tools/internal/infrastructure/llm/client.go
new file mode 100644
index 00000000..152f62b8
--- /dev/null
+++ b/services/memory-tools/internal/infrastructure/llm/client.go
@@ -0,0 +1,184 @@
+package llm
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+	"github.com/rs/zerolog/log"
+)
+
+// Client implements LLM client for internal service calls
+type Client struct {
+	baseURL    string
+	apiKey     string
+	httpClient *http.Client
+}
+
+// ChatCompletionRequest represents a request to the LLM API
+type ChatCompletionRequest struct {
+	Model          string          `json:"model"`
+	Messages       []Message       `json:"messages"`
+	Temperature    float32         `json:"temperature,omitempty"`
+	MaxTokens      int             `json:"max_tokens,omitempty"`
+	ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
+}
+
+// Message represents a chat message
+type Message struct {
+	Role    string `json:"role"`
+	Content string `json:"content"`
+}
+
+// ResponseFormat specifies the response format
+type ResponseFormat struct {
+	Type string `json:"type"` // "json_object" or "text"
+}
+
+// ChatCompletionResponse represents the LLM API response
+type ChatCompletionResponse struct {
+	ID      string   `json:"id"`
+	Object  string   `json:"object"`
+	Created int64    `json:"created"`
+	Model   string   `json:"model"`
+	Choices []Choice `json:"choices"`
+	Usage   Usage    `json:"usage"`
+}
+
+// Choice represents a completion choice
+type Choice struct {
+	Index        int     `json:"index"`
+	Message      Message `json:"message"`
+	FinishReason string  `json:"finish_reason"`
+}
+
+// Usage represents token usage
+type Usage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
+// NewClient creates a new LLM client
+func NewClient(baseURL, apiKey string, timeout time.Duration) *Client {
+	if timeout == 0 {
+		timeout = 30 * time.Second
+	}
+
+	return &Client{
+		baseURL: baseURL,
+		apiKey:  apiKey,
+		httpClient: &http.Client{
+			Timeout: timeout,
+		},
+	}
+}
+
+// Complete implements memory.LLMClient interface
+func (c *Client) Complete(ctx context.Context, prompt string, options memory.LLMOptions) (string, error) {
+	// Build request
+	req := ChatCompletionRequest{
+		Model: options.Model,
+		Messages: []Message{
+			{
+				Role:    "system",
+				Content: "You are a helpful assistant that provides structured, accurate responses.",
+			},
+			{
+				Role:    "user",
+				Content: prompt,
+			},
+		},
+		Temperature: options.Temperature,
+		MaxTokens:   options.MaxTokens,
+	}
+
+	// Set response format if JSON requested
+	if options.ResponseFormat == "json" {
+		req.ResponseFormat = &ResponseFormat{Type: "json_object"}
+	}
+
+	// Marshal request
+	jsonData, err := json.Marshal(req)
+	if err != nil {
+		return "", fmt.Errorf("marshal request: %w", err)
+	}
+
+	// Create HTTP request
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/v1/chat/completions", bytes.NewBuffer(jsonData))
+	if err != nil {
+		return "", fmt.Errorf("create request: %w", err)
+	}
+
+	httpReq.Header.Set("Content-Type", "application/json")
+	if c.apiKey != "" {
+		httpReq.Header.Set("Authorization", "Bearer "+c.apiKey)
+	}
+
+	// Execute request
+	log.Debug().
+		Str("model", options.Model).
+		Str("endpoint", c.baseURL).
+		Msg("Calling LLM API")
+
+	resp, err := c.httpClient.Do(httpReq)
+	if err != nil {
+		return "", fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	// Read response
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return "", fmt.Errorf("read response: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("LLM API returned status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Parse response
+	var chatResp ChatCompletionResponse
+	if err := json.Unmarshal(body, &chatResp); err != nil {
+		return "", fmt.Errorf("unmarshal response: %w", err)
+	}
+
+	if len(chatResp.Choices) == 0 {
+		return "", fmt.Errorf("no choices in response")
+	}
+
+	content := chatResp.Choices[0].Message.Content
+
+	log.Info().
+		Str("model", chatResp.Model).
+		Int("prompt_tokens", chatResp.Usage.PromptTokens).
+		Int("completion_tokens", chatResp.Usage.CompletionTokens).
+		Msg("LLM completion successful")
+
+	return content, nil
+}
+
+// Health checks the health of the LLM service
+func (c *Client) Health(ctx context.Context) error {
+	req, err := http.NewRequestWithContext(ctx, "GET", c.baseURL+"/health", nil)
+	if err != nil {
+		return fmt.Errorf("create request: %w", err)
+	}
+
+	resp, err := c.httpClient.Do(req)
+	if err != nil {
+		return fmt.Errorf("execute request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return fmt.Errorf("health check failed with status %d", resp.StatusCode)
+	}
+
+	return nil
+}
diff --git a/services/memory-tools/internal/interfaces/httpserver/handlers/memory_handler.go b/services/memory-tools/internal/interfaces/httpserver/handlers/memory_handler.go
new file mode 100644
index 00000000..ac5fa0fd
--- /dev/null
+++ b/services/memory-tools/internal/interfaces/httpserver/handlers/memory_handler.go
@@ -0,0 +1,333 @@
+package handlers
+
+import (
+	"encoding/json"
+	"net/http"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/domain/memory"
+	"github.com/janhq/jan-server/services/memory-tools/internal/interfaces/httpserver/responses"
+	"github.com/rs/zerolog/log"
+)
+
+type MemoryHandler struct {
+	service *memory.Service
+}
+
+func NewMemoryHandler(service *memory.Service) *MemoryHandler {
+	return &MemoryHandler{service: service}
+}
+
+// HandleLoad handles POST /v1/memory/load
+func (h *MemoryHandler) HandleLoad(w http.ResponseWriter, r *http.Request) {
+	logger := log.Ctx(r.Context())
+	if logger == nil {
+		logger = &log.Logger
+	}
+
+	if r.Method != http.MethodPost {
+		responses.Error(w, r, http.StatusMethodNotAllowed, "method not allowed")
+		return
+	}
+
+	var req memory.MemoryLoadRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		logger.Error().Err(err).Msg("Failed to decode load request")
+		responses.Error(w, r, http.StatusBadRequest, "invalid request body")
+		return
+	}
+
+	// Validate request
+	if req.UserID == "" {
+		responses.Error(w, r, http.StatusBadRequest, "user_id is required")
+		return
+	}
+	if req.Query == "" {
+		responses.Error(w, r, http.StatusBadRequest, "query is required")
+		return
+	}
+
+	logger.Info().
+		Str("user_id", req.UserID).
+		Str("project_id", req.ProjectID).
+		Str("query", req.Query).
+		Msg("Memory load request received")
+
+	// Load memories
+	resp, err := h.service.Load(r.Context(), req)
+	if err != nil {
+		logger.Error().Err(err).Msg("Failed to load memories")
+		responses.Error(w, r, http.StatusInternalServerError, "failed to load memories")
+		return
+	}
+
+	// Return response
+	responses.JSON(w, r, http.StatusOK, resp)
+}
+
+// HandleObserve handles POST /v1/memory/observe
+func (h *MemoryHandler) HandleObserve(w http.ResponseWriter, r *http.Request) {
+	logger := log.Ctx(r.Context())
+	if logger == nil {
+		logger = &log.Logger
+	}
+
+	if r.Method != http.MethodPost {
+		responses.Error(w, r, http.StatusMethodNotAllowed, "method not allowed")
+		return
+	}
+
+	var req memory.MemoryObserveRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		logger.Error().Err(err).Msg("Failed to decode observe request")
+		responses.Error(w, r, http.StatusBadRequest, "invalid request body")
+		return
+	}
+
+	// Validate request
+	if req.UserID == "" {
+		responses.Error(w, r, http.StatusBadRequest, "user_id is required")
+		return
+	}
+	if req.ConversationID == "" {
+		responses.Error(w, r, http.StatusBadRequest, "conversation_id is required")
+		return
+	}
+	if len(req.Messages) == 0 {
+		responses.Error(w, r, http.StatusBadRequest, "messages are required")
+		return
+	}
+
+	logger.Info().
+		Str("user_id", req.UserID).
+		Str("project_id", req.ProjectID).
+		Str("conversation_id", req.ConversationID).
+		Int("message_count", len(req.Messages)).
+		Msg("Memory observe request received")
+
+	// Observe and store
+	if err := h.service.Observe(r.Context(), req); err != nil {
+		logger.Error().Err(err).Msg("Failed to observe memories")
+		responses.Error(w, r, http.StatusInternalServerError, "failed to observe memories")
+		return
+	}
+
+	// Return success
+	responses.JSON(w, r, http.StatusOK, map[string]interface{}{
+		"status":  "success",
+		"message": "Memory observation completed",
+	})
+}
+
+// HandleStats handles GET /v1/memory/stats
+func (h *MemoryHandler) HandleStats(w http.ResponseWriter, r *http.Request) {
+	logger := log.Ctx(r.Context())
+	if logger == nil {
+		logger = &log.Logger
+	}
+
+	if r.Method != http.MethodGet {
+		responses.Error(w, r, http.StatusMethodNotAllowed, "method not allowed")
+		return
+	}
+
+	userID := r.URL.Query().Get("user_id")
+	if userID == "" {
+		responses.Error(w, r, http.StatusBadRequest, "user_id query parameter is required")
+		return
+	}
+
+	projectID := r.URL.Query().Get("project_id")
+
+	stats, err := h.service.GetMemoryStats(r.Context(), userID, projectID)
+	if err != nil {
+		logger.Error().Err(err).Msg("Failed to get memory stats")
+		responses.Error(w, r, http.StatusInternalServerError, "failed to get memory stats")
+		return
+	}
+
+	responses.JSON(w, r, http.StatusOK, stats)
+}
+
+// HandleExport handles GET /v1/memory/export
+func (h *MemoryHandler) HandleExport(w http.ResponseWriter, r *http.Request) {
+	logger := log.Ctx(r.Context())
+	if logger == nil {
+		logger = &log.Logger
+	}
+
+	if r.Method != http.MethodGet {
+		responses.Error(w, r, http.StatusMethodNotAllowed, "method not allowed")
+		return
+	}
+
+	userID := r.URL.Query().Get("user_id")
+	if userID == "" {
+		responses.Error(w, r, http.StatusBadRequest, "user_id query parameter is required")
+		return
+	}
+
+	exportData, err := h.service.ExportMemory(r.Context(), userID)
+	if err != nil {
+		logger.Error().Err(err).Msg("Failed to export memory")
+		responses.Error(w, r, http.StatusInternalServerError, "failed to export memory")
+		return
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	w.Header().Set("Content-Disposition", "attachment; filename=memory_export.json")
+	w.WriteHeader(http.StatusOK)
+	w.Write([]byte(exportData))
+}
+
+// HandleHealth handles GET /healthz
+func (h *MemoryHandler) HandleHealth(w http.ResponseWriter, r *http.Request) {
+	responses.JSON(w, r, http.StatusOK, map[string]interface{}{
+		"status":  "healthy",
+		"service": "memory-tools",
+	})
+}
+
+// HandleUserUpsert handles POST /v1/memory/user/upsert
+func (h *MemoryHandler) HandleUserUpsert(w http.ResponseWriter, r *http.Request) {
+	logger := log.Ctx(r.Context())
+	if logger == nil {
+		logger = &log.Logger
+	}
+
+	if r.Method != http.MethodPost {
+		responses.Error(w, r, http.StatusMethodNotAllowed, "method not allowed")
+		return
+	}
+
+	var req memory.UserMemoryUpsertRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		logger.Error().Err(err).Msg("Failed to decode user upsert request")
+		responses.Error(w, r, http.StatusBadRequest, "invalid request body")
+		return
+	}
+
+	// Validate request
+	if req.UserID == "" {
+		responses.Error(w, r, http.StatusBadRequest, "user_id is required")
+		return
+	}
+	if len(req.Items) == 0 {
+		responses.Error(w, r, http.StatusBadRequest, "items are required")
+		return
+	}
+
+	logger.Info().
+		Str("user_id", req.UserID).
+		Int("item_count", len(req.Items)).
+		Msg("User memory upsert request received")
+
+	// Upsert user memories
+	ids, err := h.service.UpsertUserMemories(r.Context(), req)
+	if err != nil {
+		logger.Error().Err(err).Msg("Failed to upsert user memories")
+		responses.Error(w, r, http.StatusInternalServerError, "failed to upsert user memories")
+		return
+	}
+
+	// Return response
+	responses.JSON(w, r, http.StatusOK, map[string]interface{}{
+		"status":  "success",
+		"message": "User memories upserted successfully",
+		"ids":     ids,
+	})
+}
+
+// HandleProjectUpsert handles POST /v1/memory/project/upsert
+func (h *MemoryHandler) HandleProjectUpsert(w http.ResponseWriter, r *http.Request) {
+	logger := log.Ctx(r.Context())
+	if logger == nil {
+		logger = &log.Logger
+	}
+
+	if r.Method != http.MethodPost {
+		responses.Error(w, r, http.StatusMethodNotAllowed, "method not allowed")
+		return
+	}
+
+	var req memory.ProjectFactUpsertRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		logger.Error().Err(err).Msg("Failed to decode project upsert request")
+		responses.Error(w, r, http.StatusBadRequest, "invalid request body")
+		return
+	}
+
+	// Validate request
+	if req.ProjectID == "" {
+		responses.Error(w, r, http.StatusBadRequest, "project_id is required")
+		return
+	}
+	if len(req.Facts) == 0 {
+		responses.Error(w, r, http.StatusBadRequest, "facts are required")
+		return
+	}
+
+	logger.Info().
+		Str("project_id", req.ProjectID).
+		Int("fact_count", len(req.Facts)).
+		Msg("Project fact upsert request received")
+
+	// Upsert project facts
+	ids, err := h.service.UpsertProjectFacts(r.Context(), req)
+	if err != nil {
+		logger.Error().Err(err).Msg("Failed to upsert project facts")
+		responses.Error(w, r, http.StatusInternalServerError, "failed to upsert project facts")
+		return
+	}
+
+	// Return response
+	responses.JSON(w, r, http.StatusOK, map[string]interface{}{
+		"status":  "success",
+		"message": "Project facts upserted successfully",
+		"ids":     ids,
+	})
+}
+
+// HandleDelete handles DELETE /v1/memory/delete
+func (h *MemoryHandler) HandleDelete(w http.ResponseWriter, r *http.Request) {
+	logger := log.Ctx(r.Context())
+	if logger == nil {
+		logger = &log.Logger
+	}
+
+	if r.Method != http.MethodDelete && r.Method != http.MethodPost {
+		responses.Error(w, r, http.StatusMethodNotAllowed, "method not allowed")
+		return
+	}
+
+	var req memory.DeleteRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		logger.Error().Err(err).Msg("Failed to decode delete request")
+		responses.Error(w, r, http.StatusBadRequest, "invalid request body")
+		return
+	}
+
+	// Validate request
+	if len(req.IDs) == 0 {
+		responses.Error(w, r, http.StatusBadRequest, "ids are required")
+		return
+	}
+
+	logger.Info().
+		Int("id_count", len(req.IDs)).
+		Msg("Memory delete request received")
+
+	// Delete memories
+	deletedCount, err := h.service.DeleteMemories(r.Context(), req)
+	if err != nil {
+		logger.Error().Err(err).Msg("Failed to delete memories")
+		responses.Error(w, r, http.StatusInternalServerError, "failed to delete memories")
+		return
+	}
+
+	// Return response
+	responses.JSON(w, r, http.StatusOK, map[string]interface{}{
+		"status":        "success",
+		"message":       "Memories deleted successfully",
+		"deleted_count": deletedCount,
+	})
+}
diff --git a/services/memory-tools/internal/interfaces/httpserver/middleware/auth.go b/services/memory-tools/internal/interfaces/httpserver/middleware/auth.go
new file mode 100644
index 00000000..d5111095
--- /dev/null
+++ b/services/memory-tools/internal/interfaces/httpserver/middleware/auth.go
@@ -0,0 +1,47 @@
+package middleware
+
+import (
+	"net/http"
+	"strings"
+)
+
+// AuthMiddleware provides basic authentication middleware
+func AuthMiddleware(apiKey string) func(http.Handler) http.Handler {
+	return func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			// Skip auth for health check
+			if r.URL.Path == "/healthz" {
+				next.ServeHTTP(w, r)
+				return
+			}
+
+			// If no API key configured, skip auth
+			if apiKey == "" {
+				next.ServeHTTP(w, r)
+				return
+			}
+
+			// Check Authorization header
+			authHeader := r.Header.Get("Authorization")
+			if authHeader == "" {
+				http.Error(w, "Missing authorization header", http.StatusUnauthorized)
+				return
+			}
+
+			// Extract token
+			parts := strings.SplitN(authHeader, " ", 2)
+			if len(parts) != 2 || parts[0] != "Bearer" {
+				http.Error(w, "Invalid authorization header format", http.StatusUnauthorized)
+				return
+			}
+
+			token := parts[1]
+			if token != apiKey {
+				http.Error(w, "Invalid API key", http.StatusUnauthorized)
+				return
+			}
+
+			next.ServeHTTP(w, r)
+		})
+	}
+}
diff --git a/services/memory-tools/internal/interfaces/httpserver/middleware/request_id.go b/services/memory-tools/internal/interfaces/httpserver/middleware/request_id.go
new file mode 100644
index 00000000..5ff2ad95
--- /dev/null
+++ b/services/memory-tools/internal/interfaces/httpserver/middleware/request_id.go
@@ -0,0 +1,43 @@
+package middleware
+
+import (
+	"context"
+	"net/http"
+
+	"github.com/google/uuid"
+	"github.com/rs/zerolog/log"
+)
+
+type requestIDKey struct{}
+
+// RequestIDMiddleware ensures every request has an ID and logger bound to the context.
+func RequestIDMiddleware() func(http.Handler) http.Handler {
+	return func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			requestID := r.Header.Get("X-Request-ID")
+			if requestID == "" {
+				requestID = uuid.NewString()
+			}
+
+			ctx := context.WithValue(r.Context(), requestIDKey{}, requestID)
+			logger := log.With().Str("request_id", requestID).Logger()
+			ctx = logger.WithContext(ctx)
+
+			w.Header().Set("X-Request-ID", requestID)
+			next.ServeHTTP(w, r.WithContext(ctx))
+		})
+	}
+}
+
+// GetRequestID extracts the request ID from context.
+func GetRequestID(ctx context.Context) string {
+	if ctx == nil {
+		return ""
+	}
+
+	if id, ok := ctx.Value(requestIDKey{}).(string); ok {
+		return id
+	}
+
+	return ""
+}
diff --git a/services/memory-tools/internal/interfaces/httpserver/middleware/timeout.go b/services/memory-tools/internal/interfaces/httpserver/middleware/timeout.go
new file mode 100644
index 00000000..98711c04
--- /dev/null
+++ b/services/memory-tools/internal/interfaces/httpserver/middleware/timeout.go
@@ -0,0 +1,33 @@
+package middleware
+
+import (
+	"context"
+	"net/http"
+	"time"
+)
+
+// TimeoutMiddleware adds a timeout to HTTP requests
+func TimeoutMiddleware(timeout time.Duration) func(http.Handler) http.Handler {
+	return func(next http.Handler) http.Handler {
+		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+			ctx, cancel := context.WithTimeout(r.Context(), timeout)
+			defer cancel()
+
+			r = r.WithContext(ctx)
+
+			done := make(chan struct{})
+			go func() {
+				next.ServeHTTP(w, r)
+				close(done)
+			}()
+
+			select {
+			case <-done:
+				return
+			case <-ctx.Done():
+				http.Error(w, "Request timeout", http.StatusGatewayTimeout)
+				return
+			}
+		})
+	}
+}
diff --git a/services/memory-tools/internal/interfaces/httpserver/responses/responses.go b/services/memory-tools/internal/interfaces/httpserver/responses/responses.go
new file mode 100644
index 00000000..a77d27da
--- /dev/null
+++ b/services/memory-tools/internal/interfaces/httpserver/responses/responses.go
@@ -0,0 +1,37 @@
+package responses
+
+import (
+	"encoding/json"
+	"net/http"
+
+	"github.com/janhq/jan-server/services/memory-tools/internal/interfaces/httpserver/middleware"
+	"github.com/rs/zerolog/log"
+)
+
+type errorResponse struct {
+	Error     string `json:"error"`
+	RequestID string `json:"request_id,omitempty"`
+}
+
+// JSON writes a JSON response and propagates the request ID header.
+func JSON(w http.ResponseWriter, r *http.Request, status int, payload interface{}) {
+	requestID := middleware.GetRequestID(r.Context())
+	if requestID != "" {
+		w.Header().Set("X-Request-ID", requestID)
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+
+	if err := json.NewEncoder(w).Encode(payload); err != nil {
+		log.Ctx(r.Context()).Error().Err(err).Msg("failed to write response")
+	}
+}
+
+// Error writes a structured error response with request ID included.
+func Error(w http.ResponseWriter, r *http.Request, status int, message string) {
+	resp := errorResponse{
+		Error:     message,
+		RequestID: middleware.GetRequestID(r.Context()),
+	}
+	JSON(w, r, status, resp)
+}
diff --git a/services/memory-tools/migrations/001_create_memory_tables.sql b/services/memory-tools/migrations/001_create_memory_tables.sql
new file mode 100644
index 00000000..ec22acfd
--- /dev/null
+++ b/services/memory-tools/migrations/001_create_memory_tables.sql
@@ -0,0 +1,139 @@
+-- Migration: Create memory tables with pgvector support
+-- Version: 001
+-- Date: 2025-11-20
+
+-- Create schema first
+CREATE SCHEMA IF NOT EXISTS memory_tools;
+
+-- Enable pgvector extension in memory_tools schema
+CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA memory_tools;
+
+-- Also ensure it exists in public schema for compatibility
+CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
+
+-- Ensure we operate inside memory_tools schema
+SET search_path TO memory_tools, public;
+
+-- User Memory Items Table
+CREATE TABLE IF NOT EXISTS memory_tools.user_memory_items (
+    id VARCHAR(255) PRIMARY KEY,
+    user_id VARCHAR(255) NOT NULL,
+    scope VARCHAR(50) NOT NULL, -- 'core', 'preference', 'context'
+    key VARCHAR(255) NOT NULL,
+    text TEXT NOT NULL,
+    score INTEGER NOT NULL DEFAULT 3, -- 1-5, importance level
+    embedding vector(1024), -- BGE-M3 embeddings
+    is_deleted BOOLEAN NOT NULL DEFAULT FALSE,
+    created_at TIMESTAMP NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
+    
+    CONSTRAINT user_memory_scope_check CHECK (scope IN ('core', 'preference', 'context')),
+    CONSTRAINT user_memory_score_check CHECK (score >= 1 AND score <= 5)
+);
+
+-- Indexes for user_memory_items
+CREATE INDEX IF NOT EXISTS idx_user_memory_user_id ON memory_tools.user_memory_items(user_id) WHERE is_deleted = FALSE;
+CREATE INDEX IF NOT EXISTS idx_user_memory_scope ON memory_tools.user_memory_items(scope) WHERE is_deleted = FALSE;
+CREATE INDEX IF NOT EXISTS idx_user_memory_score ON memory_tools.user_memory_items(score DESC) WHERE is_deleted = FALSE;
+CREATE INDEX IF NOT EXISTS idx_user_memory_updated_at ON memory_tools.user_memory_items(updated_at DESC);
+
+-- Vector similarity index (IVFFlat for fast approximate search)
+CREATE INDEX IF NOT EXISTS idx_user_memory_embedding ON memory_tools.user_memory_items 
+USING ivfflat (embedding vector_cosine_ops) 
+WITH (lists = 100)
+WHERE is_deleted = FALSE;
+
+-- Project Facts Table
+CREATE TABLE IF NOT EXISTS memory_tools.project_facts (
+    id VARCHAR(255) PRIMARY KEY,
+    project_id VARCHAR(255) NOT NULL,
+    kind VARCHAR(50) NOT NULL, -- 'decision', 'requirement', 'constraint', 'context'
+    title VARCHAR(500) NOT NULL,
+    text TEXT NOT NULL,
+    confidence REAL NOT NULL DEFAULT 0.8, -- 0.0-1.0
+    embedding vector(1024), -- BGE-M3 embeddings
+    source_conversation_id VARCHAR(255),
+    is_deleted BOOLEAN NOT NULL DEFAULT FALSE,
+    created_at TIMESTAMP NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMP NOT NULL DEFAULT NOW(),
+    
+    CONSTRAINT project_fact_kind_check CHECK (kind IN ('decision', 'requirement', 'constraint', 'context')),
+    CONSTRAINT project_fact_confidence_check CHECK (confidence >= 0.0 AND confidence <= 1.0)
+);
+
+-- Indexes for project_facts
+CREATE INDEX IF NOT EXISTS idx_project_facts_project_id ON memory_tools.project_facts(project_id) WHERE is_deleted = FALSE;
+CREATE INDEX IF NOT EXISTS idx_project_facts_kind ON memory_tools.project_facts(kind) WHERE is_deleted = FALSE;
+CREATE INDEX IF NOT EXISTS idx_project_facts_confidence ON memory_tools.project_facts(confidence DESC) WHERE is_deleted = FALSE;
+CREATE INDEX IF NOT EXISTS idx_project_facts_updated_at ON memory_tools.project_facts(updated_at DESC);
+
+-- Vector similarity index
+CREATE INDEX IF NOT EXISTS idx_project_facts_embedding ON memory_tools.project_facts 
+USING ivfflat (embedding vector_cosine_ops) 
+WITH (lists = 100)
+WHERE is_deleted = FALSE;
+
+-- Update constraints to allow all supported kinds/scopes
+ALTER TABLE memory_tools.user_memory_items DROP CONSTRAINT IF EXISTS user_memory_scope_check;
+ALTER TABLE memory_tools.user_memory_items ADD CONSTRAINT user_memory_scope_check CHECK (scope IN ('core', 'preference', 'context', 'profile', 'skill'));
+
+ALTER TABLE memory_tools.project_facts DROP CONSTRAINT IF EXISTS project_fact_kind_check;
+ALTER TABLE memory_tools.project_facts ADD CONSTRAINT project_fact_kind_check CHECK (kind IN ('decision', 'requirement', 'constraint', 'context', 'assumption', 'risk', 'fact'));
+
+-- Episodic Events Table
+CREATE TABLE IF NOT EXISTS memory_tools.episodic_events (
+    id VARCHAR(255) PRIMARY KEY,
+    user_id VARCHAR(255) NOT NULL,
+    project_id VARCHAR(255),
+    conversation_id VARCHAR(255) NOT NULL,
+    time TIMESTAMP NOT NULL,
+    text TEXT NOT NULL,
+    kind VARCHAR(50) NOT NULL, -- 'interaction', 'decision', 'milestone'
+    embedding vector(1024), -- BGE-M3 embeddings
+    is_deleted BOOLEAN NOT NULL DEFAULT FALSE,
+    created_at TIMESTAMP NOT NULL DEFAULT NOW(),
+    
+    CONSTRAINT episodic_event_kind_check CHECK (kind IN ('interaction', 'decision', 'milestone'))
+);
+
+-- Indexes for episodic_events
+CREATE INDEX IF NOT EXISTS idx_episodic_events_user_id ON memory_tools.episodic_events(user_id) WHERE is_deleted = FALSE;
+CREATE INDEX IF NOT EXISTS idx_episodic_events_project_id ON memory_tools.episodic_events(project_id) WHERE is_deleted = FALSE;
+CREATE INDEX IF NOT EXISTS idx_episodic_events_conversation_id ON memory_tools.episodic_events(conversation_id);
+CREATE INDEX IF NOT EXISTS idx_episodic_events_time ON memory_tools.episodic_events(time DESC) WHERE is_deleted = FALSE;
+CREATE INDEX IF NOT EXISTS idx_episodic_events_kind ON memory_tools.episodic_events(kind) WHERE is_deleted = FALSE;
+
+-- Vector similarity index
+CREATE INDEX IF NOT EXISTS idx_episodic_events_embedding ON memory_tools.episodic_events 
+USING ivfflat (embedding vector_cosine_ops) 
+WITH (lists = 100)
+WHERE is_deleted = FALSE;
+
+-- Conversation Items Table (for storing raw conversation history)
+CREATE TABLE IF NOT EXISTS memory_tools.conversation_items (
+    id VARCHAR(255) PRIMARY KEY,
+    conversation_id VARCHAR(255) NOT NULL,
+    role VARCHAR(50) NOT NULL, -- 'user', 'assistant', 'system'
+    content TEXT NOT NULL,
+    tool_calls TEXT, -- JSON array of tool calls
+    created_at TIMESTAMP NOT NULL DEFAULT NOW(),
+    
+    CONSTRAINT conversation_item_role_check CHECK (role IN ('user', 'assistant', 'system'))
+);
+
+-- Indexes for conversation_items
+CREATE INDEX IF NOT EXISTS idx_conversation_items_conversation_id ON memory_tools.conversation_items(conversation_id);
+CREATE INDEX IF NOT EXISTS idx_conversation_items_created_at ON memory_tools.conversation_items(created_at ASC);
+
+-- Comments for documentation
+COMMENT ON TABLE user_memory_items IS 'User-specific memory items (preferences, context, core facts)';
+COMMENT ON TABLE project_facts IS 'Project-level facts, decisions, and requirements';
+COMMENT ON TABLE episodic_events IS 'Time-bound events and interactions';
+COMMENT ON TABLE conversation_items IS 'Raw conversation history for memory extraction';
+
+COMMENT ON COLUMN user_memory_items.embedding IS 'BGE-M3 1024-dimensional embedding vector';
+COMMENT ON COLUMN project_facts.embedding IS 'BGE-M3 1024-dimensional embedding vector';
+COMMENT ON COLUMN episodic_events.embedding IS 'BGE-M3 1024-dimensional embedding vector';
+
+COMMENT ON COLUMN user_memory_items.score IS 'Importance level: 1=low, 2=medium, 3=normal, 4=high, 5=critical';
+COMMENT ON COLUMN project_facts.confidence IS 'Confidence level: 0.0-1.0, higher is more confident';
diff --git a/services/response-api/Dockerfile b/services/response-api/Dockerfile
new file mode 100644
index 00000000..61443ea8
--- /dev/null
+++ b/services/response-api/Dockerfile
@@ -0,0 +1,20 @@
+ARG GO_VERSION=1.25
+
+FROM golang:${GO_VERSION} as build
+WORKDIR /src
+COPY go.mod go.sum ./
+RUN go mod download
+COPY . .
+RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/response-api ./cmd/server
+
+FROM debian:bookworm-slim
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends ca-certificates curl && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN useradd --system --home /app --no-create-home --uid 10003 response
+WORKDIR /app
+COPY --from=build /out/response-api /app/response-api
+EXPOSE 8082
+USER response
+ENTRYPOINT ["/app/response-api"]
diff --git a/services/response-api/Makefile b/services/response-api/Makefile
new file mode 100644
index 00000000..c5f83f2f
--- /dev/null
+++ b/services/response-api/Makefile
@@ -0,0 +1,28 @@
+SERVICE := response-api
+BIN := bin/$(SERVICE)
+
+.PHONY: run build test tidy wire swagger clean
+
+run:
+	@echo "Starting $(SERVICE)..."
+	go run ./cmd/server
+
+build:
+	go build -o $(BIN) ./cmd/server
+
+test:
+	go test ./...
+
+tidy:
+	go mod tidy
+
+wire:
+	@if ! command -v wire >/dev/null 2>&1; then echo "wire not installed (go install github.com/google/wire/cmd/wire@latest)"; exit 1; fi
+	wire ./cmd/server
+
+swagger:
+	@if ! command -v swag >/dev/null 2>&1; then echo "swag CLI not installed (go install github.com/swaggo/swag/cmd/swag@latest)"; exit 1; fi
+	swag init -g cmd/server/server.go -o docs/swagger
+
+clean:
+	rm -rf $(BIN)
diff --git a/services/response-api/README.md b/services/response-api/README.md
new file mode 100644
index 00000000..8b949ad2
--- /dev/null
+++ b/services/response-api/README.md
@@ -0,0 +1,459 @@
+# response-api
+
+`response-api` is the Jan Responses API microservice. It follows the OpenAI Responses contract, orchestrates multi-step tool calls through `mcp-tools`, and delegates language generation to `llm-api`.
+
+## Key Features
+
+- **OpenAI-Compatible Background Mode**: Async response generation with webhook notifications
+- **Environment-driven config** with sensible defaults (see `internal/config`)
+- **Structured Zerolog logging** plus optional OTEL tracing
+- **PostgreSQL persistence** for responses, conversations, and tool executions (GORM)
+- **PostgreSQL-backed Task Queue**: Reliable background job processing using `FOR UPDATE SKIP LOCKED`
+- **Worker Pool**: Configurable concurrent workers for background tasks (default: 4)
+- **Webhook Notifications**: HTTP POST callbacks on task completion/failure
+- **JSON-RPC integration** with `services/mcp-tools` for tool discovery/calls
+- **HTTP client** for `services/llm-api` chat completions
+- **Gin HTTP server** exposing `/v1/responses` CRUD plus SSE streaming
+- **Optional Keycloak/OIDC JWT** enforcement
+- **Wire-ready DI** entrypoint, Dockerfile, Makefile, and example env file
+
+## Quick start
+
+```bash
+# From repo root
+make env-create            # populates .env from .env.template
+
+cd services/response-api
+go mod tidy
+make run
+
+# Smoke check
+curl http://localhost:8082/healthz
+curl -X POST http://localhost:8082/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{"model":"gpt-4o-mini","input":"ping"}'
+
+```
+
+Useful targets:
+
+- `make wire` - regenerate DI after editing `cmd/server/wire.go`.
+- `make swagger` - regenerate OpenAPI docs from annotations.
+- `make test` - unit/integration test suite.
+
+## Configuration
+
+| Variable | Description | Default |
+| --- | --- | --- |
+| `SERVICE_NAME` | Logical service name | `response-api` |
+| `HTTP_PORT` | HTTP listen port | `8082` |
+| `RESPONSE_DATABASE_URL` | PostgreSQL DSN | `postgres://postgres:postgres@localhost:5432/response_api?sslmode=disable` |
+| `LLM_API_URL` | Base URL for `llm-api` | `http://localhost:8080` |
+| `MCP_TOOLS_URL` | Base URL for `mcp-tools` | `http://localhost:8091` |
+| `MAX_TOOL_EXECUTION_DEPTH` | Max recursive tool chain depth | `8` |
+| `TOOL_EXECUTION_TIMEOUT` | Per-tool call timeout | `45s` |
+| `BACKGROUND_WORKER_COUNT` | Number of concurrent background workers | `4` |
+| `BACKGROUND_TASK_TIMEOUT` | Max execution time per background task | `600s` |
+| `BACKGROUND_POLL_INTERVAL` | How often workers poll for tasks | `2s` |
+| `WEBHOOK_TIMEOUT` | HTTP timeout for webhook delivery | `10s` |
+| `WEBHOOK_MAX_RETRIES` | Number of webhook retry attempts | `3` |
+| `WEBHOOK_RETRY_DELAY` | Delay between webhook retries | `2s` |
+| `AUTH_ENABLED` + `AUTH_*` | Toggle and configure OIDC validation | disabled |
+
+See `.env.template` in the repo root for the full list including tracing/logging knobs.
+
+### Recommended Settings
+
+**Development:**
+```bash
+BACKGROUND_WORKER_COUNT=2
+BACKGROUND_TASK_TIMEOUT=300s
+```
+
+**Production:**
+```bash
+BACKGROUND_WORKER_COUNT=8
+BACKGROUND_TASK_TIMEOUT=600s
+# Monitor queue depth and adjust worker count as needed
+```
+
+## Database
+
+On startup the service runs migrations for:
+
+- `responses`
+- `conversations`
+- `conversation_items`
+- `tool_executions`
+
+Each table uses JSONB columns for flexible payload storage. Point `RESPONSE_DATABASE_URL` at your cluster before starting the service.
+
+## Authentication
+
+- Set `AUTH_ENABLED=true` to enforce Bearer tokens. Provide `AUTH_ISSUER`, `ACCOUNT`, and `AUTH_JWKS_URL`.
+- With auth disabled the service treats callers as `guest` unless a `user` field is provided in the request body.
+
+## Background Mode
+
+The Response API supports OpenAI-compatible background mode for asynchronous response generation. This allows clients to submit long-running requests without holding open HTTP connections.
+
+### Architecture
+
+**Components:**
+1. **PostgreSQL-backed Queue**: Uses the `responses` table with `SELECT FOR UPDATE SKIP LOCKED` for reliable task distribution
+2. **Worker Pool**: Fixed-size pool of background workers (default: 4) that poll for queued tasks
+3. **Webhook Notifications**: HTTP POST callbacks when tasks complete or fail
+4. **Graceful Cancellation**: Queued or in-progress tasks can be cancelled
+
+**Task Lifecycle:**
+```
+Client Request (background=true, store=true)
+    ↓
+Create Response (status=queued, queued_at=now)
+    ↓
+Return Response Immediately (201 Created)
+    ↓
+Worker Dequeues Task
+    ↓
+Mark Processing (status=in_progress, started_at=now)
+    ↓
+Execute LLM Orchestration with Tool Calls
+    ↓
+Update Status (completed/failed, completed_at=now)
+    ↓
+Send Webhook Notification (async, non-blocking)
+```
+
+### API Usage
+
+#### Creating a Background Response
+
+**Request:**
+```http
+POST /v1/responses
+Content-Type: application/json
+Authorization: Bearer <token>
+
+{
+  "model": "gpt-4",
+  "input": "Write a comprehensive analysis of...",
+  "background": true,
+  "store": true,
+  "metadata": {
+    "webhook_url": "https://example.com/webhooks/responses"
+  }
+}
+```
+
+**Response (201 Created):**
+```json
+{
+  "id": "resp_abc123",
+  "object": "response",
+  "status": "queued",
+  "background": true,
+  "store": true,
+  "queued_at": 1705315800,
+  "created_at": 1705315800,
+  "metadata": {
+    "webhook_url": "https://example.com/webhooks/responses"
+  }
+}
+```
+
+#### Polling for Status
+
+**Request:**
+```http
+GET /v1/responses/resp_abc123
+Authorization: Bearer <token>
+```
+
+**Response (In Progress):**
+```json
+{
+  "id": "resp_abc123",
+  "status": "in_progress",
+  "started_at": 1705315805,
+  ...
+}
+```
+
+**Response (Completed):**
+```json
+{
+  "id": "resp_abc123",
+  "status": "completed",
+  "output": "The comprehensive analysis...",
+  "usage": {
+    "prompt_tokens": 150,
+    "completion_tokens": 500,
+    "total_tokens": 650
+  },
+  "started_at": 1705315805,
+  "completed_at": 1705316122,
+  ...
+}
+```
+
+#### Cancelling a Background Task
+
+**Request:**
+```http
+POST /v1/responses/resp_abc123/cancel
+Authorization: Bearer <token>
+```
+
+**Response:**
+```json
+{
+  "id": "resp_abc123",
+  "status": "cancelled",
+  "cancelled_at": 1705315860,
+  ...
+}
+```
+
+**Cancellation Behavior:**
+- If status is `queued`: Immediately marks cancelled, prevents worker pickup
+- If status is `in_progress`: Marks cancelled, but task may complete normally (cooperative cancellation)
+- If status is `completed` or `failed`: No-op, returns current state
+
+### Webhook Notifications
+
+**Webhook Payload (Completed):**
+```json
+{
+  "id": "resp_abc123",
+  "event": "response.completed",
+  "status": "completed",
+  "output": "The response content...",
+  "metadata": {...},
+  "completed_at": 1705316122
+}
+```
+
+**Webhook Payload (Failed):**
+```json
+{
+  "id": "resp_abc123",
+  "event": "response.failed",
+  "status": "failed",
+  "error": {
+    "code": "execution_failed",
+    "message": "LLM provider timeout"
+  },
+  "metadata": {...}
+}
+```
+
+**Webhook Delivery:**
+- **Method**: HTTP POST
+- **Content-Type**: `application/json`
+- **Headers**:
+  - `User-Agent: jan-response-api/1.0`
+  - `X-Jan-Event: response.completed` (or `response.failed`)
+  - `X-Jan-Response-ID: resp_abc123`
+- **Retries**: Up to 3 attempts with 2-second delays
+- **Timeout**: 10 seconds per attempt
+- **Non-blocking**: Webhook failures are logged but don't affect task completion
+
+### Constraints
+
+- **Background mode requires store=true**: Background tasks must be persisted to the database
+- **API Key Authentication**: The user's API key (Bearer token or X-API-Key header) is stored securely and used for LLM API calls during background execution
+- **Task Timeout**: Tasks exceeding `BACKGROUND_TASK_TIMEOUT` will be marked as failed
+- **Queue Ordering**: Tasks are processed in FIFO order based on `queued_at` timestamp
+
+## Testing
+
+### Quick Test
+
+```bash
+# Create background task
+curl -X POST http://localhost:8082/v1/responses \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer <token>" \
+  -d '{
+    "model": "gpt-4",
+    "input": "Write a story",
+    "background": true,
+    "store": true,
+    "metadata": {"webhook_url": "https://webhook.site/your-id"}
+  }'
+
+# Poll status (replace resp_xxx with actual ID)
+curl http://localhost:8082/v1/responses/resp_xxx \
+  -H "Authorization: Bearer <token>"
+```
+
+### Automated Tests
+
+Comprehensive test suite available at `tests/automation/responses-background-webhook.json`:
+
+**Test Suites:**
+1. Setup & Authentication
+2. Basic Background Mode
+3. Background with Webhooks
+4. Background with Tool Calling
+5. Cancellation
+6. Conversation Continuity
+7. Error Handling
+8. Complex Scenarios
+9. Monitoring & Observability
+10. Long-Running Research Task
+
+**Running Tests:**
+
+```bash
+# Run all tests
+jan-cli api-test run tests/automation/responses-background-webhook.json \
+  --timeout-request 60000
+
+# Run with verbose output
+jan-cli api-test run tests/automation/responses-background-webhook.json \
+  --timeout-request 60000 \
+  --verbose
+
+# Export results to JSON
+jan-cli api-test run tests/automation/responses-background-webhook.json \
+  --timeout-request 60000 \
+  --reporters cli,json
+```
+
+### Test Webhook Server
+
+Use webhook.site for testing:
+
+```bash
+# Get unique webhook URL
+curl https://webhook.site/token
+
+# Use in request
+{
+  "model": "gpt-4",
+  "input": "Test input",
+  "background": true,
+  "store": true,
+  "metadata": {
+    "webhook_url": "https://webhook.site/<your-unique-id>"
+  }
+}
+
+# View received webhooks at https://webhook.site/<your-unique-id>
+```
+
+**Local Webhook Server:**
+
+```python
+# webhook_server.py
+from flask import Flask, request
+import json
+
+app = Flask(__name__)
+
+@app.route('/webhook', methods=['POST'])
+def webhook():
+    data = request.get_json()
+    print(json.dumps(data, indent=2))
+    return '', 200
+
+if __name__ == '__main__':
+    app.run(port=9000)
+```
+
+```bash
+# Run local webhook server
+python webhook_server.py
+
+# Use http://host.docker.internal:9000/webhook in Docker
+```
+
+### CI/CD Integration
+
+```yaml
+# .github/workflows/test-background-mode.yml
+name: Background Mode Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      
+      - name: Start services
+        run: docker-compose up -d response-api
+      
+      - name: Wait for health check
+        run: |
+          timeout 60 bash -c 'until curl -f http://localhost:8082/healthz; do sleep 2; done'
+      
+      - name: Run tests
+        run: |
+          jan-cli api-test run tests/automation/responses-background-webhook.json \
+            --timeout-request 60000 \
+            --reporters cli
+      
+      - name: Publish test results
+        uses: EnricoMi/publish-unit-test-result-action@v2
+        if: always()
+        with:
+          files: test-results.xml
+```
+
+## Troubleshooting
+
+### Background Tasks Stuck in Queued
+
+**Issue**: Tasks remain in `queued` status and never get processed
+
+**Solutions**:
+1. Check worker logs: `docker logs <container> --tail 100`
+2. Verify workers are running: Look for "worker started" messages
+3. Check database connectivity: Workers need access to PostgreSQL
+4. Verify `BACKGROUND_WORKER_COUNT > 0`
+
+### Workers Not Picking Up Tasks
+
+**Issue**: Workers running but not dequeuing tasks
+
+**Solutions**:
+1. Check for database locks: `SELECT * FROM pg_locks WHERE granted = false;`
+2. Verify `BACKGROUND_POLL_INTERVAL` setting
+3. Check worker logs for errors
+4. Verify tasks have `background=true` and `store=true`
+
+### Webhook Delivery Failures
+
+**Issue**: Tasks complete but webhooks not received
+
+**Solutions**:
+1. Check webhook URL is accessible from Docker network
+2. Use `http://host.docker.internal:<port>` for local development
+3. Check response-api logs for webhook delivery errors
+4. Verify webhook endpoint returns 2xx status code
+5. Test webhook URL with curl: `curl -X POST <webhook_url> -d '{"test":"data"}'`
+
+### Tasks Timing Out
+
+**Issue**: Tasks marked as failed with timeout errors
+
+**Solutions**:
+1. Increase `BACKGROUND_TASK_TIMEOUT` (default: 600s)
+2. Optimize LLM prompts to reduce processing time
+3. Check LLM API availability and response times
+4. Monitor tool execution times in logs
+
+### High Queue Depth
+
+**Issue**: Many tasks queued, slow processing
+
+**Solutions**:
+1. Increase `BACKGROUND_WORKER_COUNT`
+2. Scale horizontally: Run multiple response-api instances
+3. Monitor database performance
+4. Check LLM API rate limits
+5. Consider task prioritization (future enhancement)
+
diff --git a/services/response-api/bin/response-api b/services/response-api/bin/response-api
new file mode 100644
index 00000000..fb708863
Binary files /dev/null and b/services/response-api/bin/response-api differ
diff --git a/services/response-api/bin/template-api b/services/response-api/bin/template-api
new file mode 100644
index 00000000..a96cedc4
Binary files /dev/null and b/services/response-api/bin/template-api differ
diff --git a/services/response-api/cmd/server/server.go b/services/response-api/cmd/server/server.go
new file mode 100644
index 00000000..42f3624c
--- /dev/null
+++ b/services/response-api/cmd/server/server.go
@@ -0,0 +1,161 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"github.com/joho/godotenv"
+	"github.com/rs/zerolog"
+	gormlogger "gorm.io/gorm/logger"
+
+	"jan-server/services/response-api/internal/config"
+	"jan-server/services/response-api/internal/domain/response"
+	"jan-server/services/response-api/internal/domain/tool"
+	"jan-server/services/response-api/internal/infrastructure/auth"
+	"jan-server/services/response-api/internal/infrastructure/database"
+	"jan-server/services/response-api/internal/infrastructure/llmprovider"
+	"jan-server/services/response-api/internal/infrastructure/logger"
+	"jan-server/services/response-api/internal/infrastructure/mcp"
+	"jan-server/services/response-api/internal/infrastructure/observability"
+	"jan-server/services/response-api/internal/infrastructure/queue"
+	conversationrepo "jan-server/services/response-api/internal/infrastructure/repository/conversation"
+	respRepo "jan-server/services/response-api/internal/infrastructure/repository/response"
+	"jan-server/services/response-api/internal/interfaces/httpserver"
+	"jan-server/services/response-api/internal/webhook"
+	"jan-server/services/response-api/internal/worker"
+)
+
+// @title Response API
+// @version 1.0
+// @description Orchestrates LLM responses with MCP tool integration, conversation context, and streaming support.
+// @contact.name Jan Server Team
+// @contact.url https://github.com/janhq/jan-server
+// @BasePath /
+// @securityDefinitions.apikey BearerAuth
+// @in header
+// @name Authorization
+type Application struct {
+	httpServer *httpserver.HTTPServer
+	log        zerolog.Logger
+}
+
+func NewApplication(httpServer *httpserver.HTTPServer, log zerolog.Logger) *Application {
+	return &Application{
+		httpServer: httpServer,
+		log:        log,
+	}
+}
+
+func (a *Application) Start(ctx context.Context) error {
+	return a.httpServer.Run(ctx)
+}
+
+func main() {
+	loadEnvFiles()
+
+	cfg, err := config.Load()
+	if err != nil {
+		panic(err)
+	}
+
+	log := logger.New(cfg)
+
+	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
+	defer stop()
+
+	shutdownTelemetry, err := observability.Setup(ctx, cfg, log)
+	if err != nil {
+		log.Fatal().Err(err).Msg("initialize observability")
+	}
+	defer func() {
+		shutdownCtx, cancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout)
+		defer cancel()
+		if err := shutdownTelemetry(shutdownCtx); err != nil {
+			log.Error().Err(err).Msg("shutdown telemetry")
+		}
+	}()
+
+	db, err := database.Connect(database.Config{
+		DSN:             cfg.GetDatabaseWriteDSN(),
+		MaxIdleConns:    cfg.DBMaxIdleConns,
+		MaxOpenConns:    cfg.DBMaxOpenConns,
+		ConnMaxLifetime: cfg.DBConnLifetime,
+		LogLevel:        gormlogger.Warn,
+	})
+	if err != nil {
+		log.Fatal().Err(err).Msg("connect database")
+	}
+
+	if err := database.AutoMigrate(ctx, db, log); err != nil {
+		log.Fatal().Err(err).Msg("migrate database")
+	}
+
+	authValidator, err := auth.NewValidator(ctx, cfg, log)
+	if err != nil {
+		log.Fatal().Err(err).Msg("initialize auth validator")
+	}
+
+	responseRepository := respRepo.NewPostgresRepository(db)
+	conversationRepository := conversationrepo.NewRepository(db)
+	conversationItemRepository := conversationrepo.NewItemRepository(db)
+	llmClient := llmprovider.NewClient(cfg.LLMAPIURL)
+	mcpClient := mcp.NewClient(cfg.MCPToolsURL)
+	orchestrator := tool.NewOrchestrator(llmClient, mcpClient, cfg.MaxToolDepth, cfg.ToolTimeout)
+
+	// Initialize webhook service
+	webhookService := webhook.NewHTTPService(log)
+
+	// Initialize response service with webhook support
+	responseService := response.NewService(
+		responseRepository,
+		conversationRepository,
+		conversationItemRepository,
+		responseRepository,
+		orchestrator,
+		mcpClient,
+		webhookService,
+		log,
+	)
+
+	// Initialize background task infrastructure
+	taskQueue := queue.NewPostgresQueue(db, log)
+	workerPool := worker.NewPool(
+		taskQueue,
+		responseService,
+		worker.Config{
+			WorkerCount: cfg.BackgroundWorkerCount,
+			TaskTimeout: cfg.BackgroundTaskTimeout,
+		},
+		log,
+	)
+
+	// Start worker pool
+	workerPool.Start(ctx)
+	defer func() {
+		log.Info().Msg("stopping worker pool")
+		workerPool.Stop()
+	}()
+
+	httpServer := httpserver.New(cfg, log, responseService, authValidator)
+	app := NewApplication(httpServer, log)
+
+	if err := app.Start(ctx); err != nil {
+		log.Fatal().Err(err).Msg("application stopped with error")
+	}
+
+	log.Info().Msg("application exited cleanly")
+}
+
+func loadEnvFiles() {
+	paths := []string{".env", "../.env"}
+	for _, path := range paths {
+		if _, err := os.Stat(path); err == nil {
+			if err := godotenv.Overload(path); err != nil {
+				fmt.Fprintf(os.Stderr, "warning: failed to load %s: %v\n", path, err)
+			}
+		}
+	}
+}
diff --git a/services/response-api/cmd/server/wire.go b/services/response-api/cmd/server/wire.go
new file mode 100644
index 00000000..b40b21ee
--- /dev/null
+++ b/services/response-api/cmd/server/wire.go
@@ -0,0 +1,114 @@
+//go:build wireinject
+
+package main
+
+import (
+	"context"
+
+	"github.com/google/wire"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+	gormlogger "gorm.io/gorm/logger"
+
+	"jan-server/services/response-api/internal/config"
+	"jan-server/services/response-api/internal/domain/conversation"
+	"jan-server/services/response-api/internal/domain/llm"
+	responseDomain "jan-server/services/response-api/internal/domain/response"
+	"jan-server/services/response-api/internal/domain/tool"
+	"jan-server/services/response-api/internal/infrastructure/auth"
+	"jan-server/services/response-api/internal/infrastructure/database"
+	"jan-server/services/response-api/internal/infrastructure/llmprovider"
+	"jan-server/services/response-api/internal/infrastructure/logger"
+	"jan-server/services/response-api/internal/infrastructure/mcp"
+	conversationrepo "jan-server/services/response-api/internal/infrastructure/repository/conversation"
+	responseRepo "jan-server/services/response-api/internal/infrastructure/repository/response"
+	"jan-server/services/response-api/internal/interfaces/httpserver"
+	"jan-server/services/response-api/internal/webhook"
+)
+
+var responseSet = wire.NewSet(
+	responseRepo.NewPostgresRepository,
+	wire.Bind(new(responseDomain.Repository), new(*responseRepo.PostgresRepository)),
+	wire.Bind(new(responseDomain.ToolExecutionRepository), new(*responseRepo.PostgresRepository)),
+	conversationrepo.NewRepository,
+	wire.Bind(new(conversation.Repository), new(*conversationrepo.Repository)),
+	conversationrepo.NewItemRepository,
+	wire.Bind(new(conversation.ItemRepository), new(*conversationrepo.ItemRepository)),
+	newLLMProvider,
+	wire.Bind(new(llm.Provider), new(*llmprovider.Client)),
+	newMCPClient,
+	wire.Bind(new(tool.MCPClient), new(*mcp.Client)),
+	newOrchestrator,
+	newWebhookService,
+	wire.Bind(new(webhook.Service), new(*webhook.HTTPService)),
+	newResponseService,
+)
+
+// BuildApplication demonstrates how to assemble the response service with Wire.
+func BuildApplication(ctx context.Context) (*Application, error) {
+	wire.Build(
+		config.Load,
+		logger.New,
+		newDatabaseConfig,
+		newGormDB,
+		newAuthValidator,
+		responseSet,
+		httpserver.New,
+		NewApplication,
+	)
+	return nil, nil
+}
+
+func newDatabaseConfig(cfg *config.Config) database.Config {
+	return database.Config{
+		DSN:             cfg.GetDatabaseWriteDSN(),
+		MaxIdleConns:    cfg.DBMaxIdleConns,
+		MaxOpenConns:    cfg.DBMaxOpenConns,
+		ConnMaxLifetime: cfg.DBConnLifetime,
+		LogLevel:        gormlogger.Warn,
+	}
+}
+
+func newGormDB(ctx context.Context, cfg database.Config, log zerolog.Logger) (*gorm.DB, error) {
+	db, err := database.Connect(cfg)
+	if err != nil {
+		return nil, err
+	}
+	if err := database.AutoMigrate(ctx, db, log); err != nil {
+		return nil, err
+	}
+	return db, nil
+}
+
+func newAuthValidator(ctx context.Context, cfg *config.Config, log zerolog.Logger) (*auth.Validator, error) {
+	return auth.NewValidator(ctx, cfg, log)
+}
+
+func newLLMProvider(cfg *config.Config) *llmprovider.Client {
+	return llmprovider.NewClient(cfg.LLMAPIURL)
+}
+
+func newMCPClient(cfg *config.Config) *mcp.Client {
+	return mcp.NewClient(cfg.MCPToolsURL)
+}
+
+func newOrchestrator(cfg *config.Config, provider llm.Provider, mcpClient tool.MCPClient) *tool.Orchestrator {
+	return tool.NewOrchestrator(provider, mcpClient, cfg.MaxToolDepth, cfg.ToolTimeout)
+}
+
+func newWebhookService(log zerolog.Logger) *webhook.HTTPService {
+	return webhook.NewHTTPService(log)
+}
+
+func newResponseService(
+	repo responseDomain.Repository,
+	conversations conversation.Repository,
+	conversationItems conversation.ItemRepository,
+	toolRepo responseDomain.ToolExecutionRepository,
+	orchestrator *tool.Orchestrator,
+	mcpClient tool.MCPClient,
+	webhookService webhook.Service,
+	log zerolog.Logger,
+) responseDomain.Service {
+	return responseDomain.NewService(repo, conversations, conversationItems, toolRepo, orchestrator, mcpClient, webhookService, log)
+}
diff --git a/services/response-api/cmd/server/wire_gen.go b/services/response-api/cmd/server/wire_gen.go
new file mode 100644
index 00000000..3b5685bc
--- /dev/null
+++ b/services/response-api/cmd/server/wire_gen.go
@@ -0,0 +1,120 @@
+// Code generated by Wire. DO NOT EDIT.
+
+//go:generate go run -mod=mod github.com/google/wire/cmd/wire
+//go:build !wireinject
+// +build !wireinject
+
+package main
+
+import (
+	"context"
+	"github.com/google/wire"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+	logger2 "gorm.io/gorm/logger"
+	"jan-server/services/response-api/internal/config"
+	conversation2 "jan-server/services/response-api/internal/domain/conversation"
+	"jan-server/services/response-api/internal/domain/llm"
+	response2 "jan-server/services/response-api/internal/domain/response"
+	"jan-server/services/response-api/internal/domain/tool"
+	"jan-server/services/response-api/internal/infrastructure/auth"
+	"jan-server/services/response-api/internal/infrastructure/database"
+	"jan-server/services/response-api/internal/infrastructure/llmprovider"
+	"jan-server/services/response-api/internal/infrastructure/logger"
+	"jan-server/services/response-api/internal/infrastructure/mcp"
+	"jan-server/services/response-api/internal/infrastructure/repository/conversation"
+	"jan-server/services/response-api/internal/infrastructure/repository/response"
+	"jan-server/services/response-api/internal/interfaces/httpserver"
+	"jan-server/services/response-api/internal/webhook"
+)
+
+// Injectors from wire.go:
+
+// BuildApplication demonstrates how to assemble the response service with Wire.
+func BuildApplication(ctx context.Context) (*Application, error) {
+	configConfig, err := config.Load()
+	if err != nil {
+		return nil, err
+	}
+	zerologLogger := logger.New(configConfig)
+	databaseConfig := newDatabaseConfig(configConfig)
+	db, err := newGormDB(ctx, databaseConfig, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	postgresRepository := response.NewPostgresRepository(db)
+	repository := conversation.NewRepository(db)
+	itemRepository := conversation.NewItemRepository(db)
+	client := newLLMProvider(configConfig)
+	mcpClient := newMCPClient(configConfig)
+	orchestrator := newOrchestrator(configConfig, client, mcpClient)
+	httpService := newWebhookService(zerologLogger)
+	service := newResponseService(postgresRepository, repository, itemRepository, postgresRepository, orchestrator, mcpClient, httpService, zerologLogger)
+	validator, err := newAuthValidator(ctx, configConfig, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	httpServer := httpserver.New(configConfig, zerologLogger, service, validator)
+	application := NewApplication(httpServer, zerologLogger)
+	return application, nil
+}
+
+// wire.go:
+
+var responseSet = wire.NewSet(response.NewPostgresRepository, wire.Bind(new(response2.Repository), new(*response.PostgresRepository)), wire.Bind(new(response2.ToolExecutionRepository), new(*response.PostgresRepository)), conversation.NewRepository, wire.Bind(new(conversation2.Repository), new(*conversation.Repository)), conversation.NewItemRepository, wire.Bind(new(conversation2.ItemRepository), new(*conversation.ItemRepository)), newLLMProvider, wire.Bind(new(llm.Provider), new(*llmprovider.Client)), newMCPClient, wire.Bind(new(tool.MCPClient), new(*mcp.Client)), newOrchestrator,
+	newWebhookService, wire.Bind(new(webhook.Service), new(*webhook.HTTPService)), newResponseService,
+)
+
+func newDatabaseConfig(cfg *config.Config) database.Config {
+	return database.Config{
+		DSN:             cfg.GetDatabaseWriteDSN(),
+		MaxIdleConns:    cfg.DBMaxIdleConns,
+		MaxOpenConns:    cfg.DBMaxOpenConns,
+		ConnMaxLifetime: cfg.DBConnLifetime,
+		LogLevel:        logger2.Warn,
+	}
+}
+
+func newGormDB(ctx context.Context, cfg database.Config, log zerolog.Logger) (*gorm.DB, error) {
+	db, err := database.Connect(cfg)
+	if err != nil {
+		return nil, err
+	}
+	if err := database.AutoMigrate(ctx, db, log); err != nil {
+		return nil, err
+	}
+	return db, nil
+}
+
+func newAuthValidator(ctx context.Context, cfg *config.Config, log zerolog.Logger) (*auth.Validator, error) {
+	return auth.NewValidator(ctx, cfg, log)
+}
+
+func newLLMProvider(cfg *config.Config) *llmprovider.Client {
+	return llmprovider.NewClient(cfg.LLMAPIURL)
+}
+
+func newMCPClient(cfg *config.Config) *mcp.Client {
+	return mcp.NewClient(cfg.MCPToolsURL)
+}
+
+func newOrchestrator(cfg *config.Config, provider llm.Provider, mcpClient tool.MCPClient) *tool.Orchestrator {
+	return tool.NewOrchestrator(provider, mcpClient, cfg.MaxToolDepth, cfg.ToolTimeout)
+}
+
+func newWebhookService(log zerolog.Logger) *webhook.HTTPService {
+	return webhook.NewHTTPService(log)
+}
+
+func newResponseService(
+	repo response2.Repository,
+	conversations conversation2.Repository,
+	conversationItems conversation2.ItemRepository,
+	toolRepo response2.ToolExecutionRepository,
+	orchestrator *tool.Orchestrator,
+	mcpClient tool.MCPClient,
+	webhookService webhook.Service,
+	log zerolog.Logger,
+) response2.Service {
+	return response2.NewService(repo, conversations, conversationItems, toolRepo, orchestrator, mcpClient, webhookService, log)
+}
diff --git a/services/response-api/doc.go b/services/response-api/doc.go
new file mode 100644
index 00000000..696b9486
--- /dev/null
+++ b/services/response-api/doc.go
@@ -0,0 +1,2 @@
+// Package responseapi provides module-level docs to satisfy tooling.
+package responseapi
diff --git a/services/response-api/docs/swagger/docs.go b/services/response-api/docs/swagger/docs.go
new file mode 100644
index 00000000..2f5baa42
--- /dev/null
+++ b/services/response-api/docs/swagger/docs.go
@@ -0,0 +1,370 @@
+// Code generated by swaggo/swag. DO NOT EDIT.
+
+package swagger
+
+import "github.com/swaggo/swag"
+
+const docTemplate = `{
+    "schemes": {{ marshal .Schemes }},
+    "swagger": "2.0",
+    "info": {
+        "description": "{{escape .Description}}",
+        "title": "{{.Title}}",
+        "contact": {
+            "name": "Jan Server Team",
+            "url": "https://github.com/janhq/jan-server"
+        },
+        "version": "{{.Version}}"
+    },
+    "host": "{{.Host}}",
+    "basePath": "{{.BasePath}}",
+    "paths": {
+        "/v1/responses": {
+            "post": {
+                "description": "Creates a response and orchestrates MCP tool calls when required.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "Create a response",
+                "parameters": [
+                    {
+                        "description": "Create request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requests.CreateResponseRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ResponsePayload"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "Get a response by ID",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Response ID",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ResponsePayload"
+                        }
+                    },
+                    "404": {
+                        "description": "Not Found",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            },
+            "delete": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "Delete/Cancel a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Response ID",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ResponsePayload"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}/cancel": {
+            "post": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "Cancel a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Response ID",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ResponsePayload"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}/input_items": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "List conversation input items",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Response ID",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/response.ConversationItem"
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "requests.CreateResponseRequest": {
+            "type": "object",
+            "required": [
+                "input",
+                "model"
+            ],
+            "properties": {
+                "background": {
+                    "type": "boolean"
+                },
+                "conversation": {
+                    "type": "string"
+                },
+                "input": {},
+                "max_tokens": {
+                    "type": "integer"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "model": {
+                    "type": "string"
+                },
+                "previous_response_id": {
+                    "type": "string"
+                },
+                "store": {
+                    "type": "boolean"
+                },
+                "stream": {
+                    "type": "boolean"
+                },
+                "system_prompt": {
+                    "type": "string"
+                },
+                "temperature": {
+                    "type": "number"
+                },
+                "tool_choice": {
+                    "$ref": "#/definitions/requests.ToolChoice"
+                },
+                "tools": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/requests.ToolDefinition"
+                    }
+                },
+                "user": {
+                    "type": "string"
+                }
+            }
+        },
+        "requests.ToolChoice": {
+            "type": "object",
+            "properties": {
+                "function": {
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "requests.ToolDefinition": {
+            "type": "object",
+            "properties": {
+                "function": {
+                    "$ref": "#/definitions/requests.ToolFunctionDefinition"
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "requests.ToolFunctionDefinition": {
+            "type": "object",
+            "properties": {
+                "description": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "parameters": {
+                    "type": "object",
+                    "additionalProperties": true
+                }
+            }
+        },
+        "response.ConversationItem": {
+            "type": "object",
+            "properties": {
+                "content": {},
+                "role": {
+                    "type": "string"
+                },
+                "status": {
+                    "type": "string"
+                }
+            }
+        },
+        "responses.ResponsePayload": {
+            "type": "object",
+            "properties": {
+                "background": {
+                    "type": "boolean"
+                },
+                "conversation_id": {
+                    "type": "string"
+                },
+                "created": {
+                    "type": "integer"
+                },
+                "created_at": {
+                    "description": "Same as Created, for compatibility",
+                    "type": "integer"
+                },
+                "error": {},
+                "id": {
+                    "type": "string"
+                },
+                "input": {},
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "model": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "output": {},
+                "previous_response_id": {
+                    "type": "string"
+                },
+                "status": {
+                    "type": "string"
+                },
+                "store": {
+                    "type": "boolean"
+                },
+                "stream": {
+                    "type": "boolean"
+                },
+                "system_prompt": {
+                    "type": "string"
+                },
+                "usage": {}
+            }
+        }
+    },
+    "securityDefinitions": {
+        "BearerAuth": {
+            "type": "apiKey",
+            "name": "Authorization",
+            "in": "header"
+        }
+    }
+}`
+
+// SwaggerInfo holds exported Swagger Info so clients can modify it
+var SwaggerInfo = &swag.Spec{
+	Version:          "1.0",
+	Host:             "",
+	BasePath:         "/",
+	Schemes:          []string{},
+	Title:            "Response API",
+	Description:      "Orchestrates LLM responses with MCP tool integration, conversation context, and streaming support.",
+	InfoInstanceName: "swagger",
+	SwaggerTemplate:  docTemplate,
+}
+
+func init() {
+	swag.Register(SwaggerInfo.InstanceName(), SwaggerInfo)
+}
diff --git a/services/response-api/docs/swagger/swagger.json b/services/response-api/docs/swagger/swagger.json
new file mode 100644
index 00000000..9e96fcb9
--- /dev/null
+++ b/services/response-api/docs/swagger/swagger.json
@@ -0,0 +1,346 @@
+{
+    "swagger": "2.0",
+    "info": {
+        "description": "Orchestrates LLM responses with MCP tool integration, conversation context, and streaming support.",
+        "title": "Response API",
+        "contact": {
+            "name": "Jan Server Team",
+            "url": "https://github.com/janhq/jan-server"
+        },
+        "version": "1.0"
+    },
+    "basePath": "/",
+    "paths": {
+        "/v1/responses": {
+            "post": {
+                "description": "Creates a response and orchestrates MCP tool calls when required.",
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "Create a response",
+                "parameters": [
+                    {
+                        "description": "Create request",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/requests.CreateResponseRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ResponsePayload"
+                        }
+                    },
+                    "400": {
+                        "description": "Bad Request",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "Get a response by ID",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Response ID",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ResponsePayload"
+                        }
+                    },
+                    "404": {
+                        "description": "Not Found",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            },
+            "delete": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "Delete/Cancel a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Response ID",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ResponsePayload"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}/cancel": {
+            "post": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "Cancel a response",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Response ID",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "$ref": "#/definitions/responses.ResponsePayload"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/responses/{response_id}/input_items": {
+            "get": {
+                "produces": [
+                    "application/json"
+                ],
+                "tags": [
+                    "Responses"
+                ],
+                "summary": "List conversation input items",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Response ID",
+                        "name": "response_id",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "schema": {
+                            "type": "array",
+                            "items": {
+                                "$ref": "#/definitions/response.ConversationItem"
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "requests.CreateResponseRequest": {
+            "type": "object",
+            "required": [
+                "input",
+                "model"
+            ],
+            "properties": {
+                "background": {
+                    "type": "boolean"
+                },
+                "conversation": {
+                    "type": "string"
+                },
+                "input": {},
+                "max_tokens": {
+                    "type": "integer"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "model": {
+                    "type": "string"
+                },
+                "previous_response_id": {
+                    "type": "string"
+                },
+                "store": {
+                    "type": "boolean"
+                },
+                "stream": {
+                    "type": "boolean"
+                },
+                "system_prompt": {
+                    "type": "string"
+                },
+                "temperature": {
+                    "type": "number"
+                },
+                "tool_choice": {
+                    "$ref": "#/definitions/requests.ToolChoice"
+                },
+                "tools": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/requests.ToolDefinition"
+                    }
+                },
+                "user": {
+                    "type": "string"
+                }
+            }
+        },
+        "requests.ToolChoice": {
+            "type": "object",
+            "properties": {
+                "function": {
+                    "type": "object",
+                    "properties": {
+                        "name": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "requests.ToolDefinition": {
+            "type": "object",
+            "properties": {
+                "function": {
+                    "$ref": "#/definitions/requests.ToolFunctionDefinition"
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "requests.ToolFunctionDefinition": {
+            "type": "object",
+            "properties": {
+                "description": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "parameters": {
+                    "type": "object",
+                    "additionalProperties": true
+                }
+            }
+        },
+        "response.ConversationItem": {
+            "type": "object",
+            "properties": {
+                "content": {},
+                "role": {
+                    "type": "string"
+                },
+                "status": {
+                    "type": "string"
+                }
+            }
+        },
+        "responses.ResponsePayload": {
+            "type": "object",
+            "properties": {
+                "background": {
+                    "type": "boolean"
+                },
+                "conversation_id": {
+                    "type": "string"
+                },
+                "created": {
+                    "type": "integer"
+                },
+                "created_at": {
+                    "description": "Same as Created, for compatibility",
+                    "type": "integer"
+                },
+                "error": {},
+                "id": {
+                    "type": "string"
+                },
+                "input": {},
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "model": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "output": {},
+                "previous_response_id": {
+                    "type": "string"
+                },
+                "status": {
+                    "type": "string"
+                },
+                "store": {
+                    "type": "boolean"
+                },
+                "stream": {
+                    "type": "boolean"
+                },
+                "system_prompt": {
+                    "type": "string"
+                },
+                "usage": {}
+            }
+        }
+    },
+    "securityDefinitions": {
+        "BearerAuth": {
+            "type": "apiKey",
+            "name": "Authorization",
+            "in": "header"
+        }
+    }
+}
\ No newline at end of file
diff --git a/services/response-api/docs/swagger/swagger.yaml b/services/response-api/docs/swagger/swagger.yaml
new file mode 100644
index 00000000..fb25e6de
--- /dev/null
+++ b/services/response-api/docs/swagger/swagger.yaml
@@ -0,0 +1,230 @@
+basePath: /
+definitions:
+  requests.CreateResponseRequest:
+    properties:
+      background:
+        type: boolean
+      conversation:
+        type: string
+      input: {}
+      max_tokens:
+        type: integer
+      metadata:
+        additionalProperties: true
+        type: object
+      model:
+        type: string
+      previous_response_id:
+        type: string
+      store:
+        type: boolean
+      stream:
+        type: boolean
+      system_prompt:
+        type: string
+      temperature:
+        type: number
+      tool_choice:
+        $ref: '#/definitions/requests.ToolChoice'
+      tools:
+        items:
+          $ref: '#/definitions/requests.ToolDefinition'
+        type: array
+      user:
+        type: string
+    required:
+    - input
+    - model
+    type: object
+  requests.ToolChoice:
+    properties:
+      function:
+        properties:
+          name:
+            type: string
+        type: object
+      type:
+        type: string
+    type: object
+  requests.ToolDefinition:
+    properties:
+      function:
+        $ref: '#/definitions/requests.ToolFunctionDefinition'
+      type:
+        type: string
+    type: object
+  requests.ToolFunctionDefinition:
+    properties:
+      description:
+        type: string
+      name:
+        type: string
+      parameters:
+        additionalProperties: true
+        type: object
+    type: object
+  response.ConversationItem:
+    properties:
+      content: {}
+      role:
+        type: string
+      status:
+        type: string
+    type: object
+  responses.ResponsePayload:
+    properties:
+      background:
+        type: boolean
+      conversation_id:
+        type: string
+      created:
+        type: integer
+      created_at:
+        description: Same as Created, for compatibility
+        type: integer
+      error: {}
+      id:
+        type: string
+      input: {}
+      metadata:
+        additionalProperties: true
+        type: object
+      model:
+        type: string
+      object:
+        type: string
+      output: {}
+      previous_response_id:
+        type: string
+      status:
+        type: string
+      store:
+        type: boolean
+      stream:
+        type: boolean
+      system_prompt:
+        type: string
+      usage: {}
+    type: object
+info:
+  contact:
+    name: Jan Server Team
+    url: https://github.com/janhq/jan-server
+  description: Orchestrates LLM responses with MCP tool integration, conversation
+    context, and streaming support.
+  title: Response API
+  version: "1.0"
+paths:
+  /v1/responses:
+    post:
+      consumes:
+      - application/json
+      description: Creates a response and orchestrates MCP tool calls when required.
+      parameters:
+      - description: Create request
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/requests.CreateResponseRequest'
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/responses.ResponsePayload'
+        "400":
+          description: Bad Request
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Create a response
+      tags:
+      - Responses
+  /v1/responses/{response_id}:
+    delete:
+      parameters:
+      - description: Response ID
+        in: path
+        name: response_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/responses.ResponsePayload'
+      summary: Delete/Cancel a response
+      tags:
+      - Responses
+    get:
+      parameters:
+      - description: Response ID
+        in: path
+        name: response_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/responses.ResponsePayload'
+        "404":
+          description: Not Found
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Get a response by ID
+      tags:
+      - Responses
+  /v1/responses/{response_id}/cancel:
+    post:
+      parameters:
+      - description: Response ID
+        in: path
+        name: response_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            $ref: '#/definitions/responses.ResponsePayload'
+      summary: Cancel a response
+      tags:
+      - Responses
+  /v1/responses/{response_id}/input_items:
+    get:
+      parameters:
+      - description: Response ID
+        in: path
+        name: response_id
+        required: true
+        type: string
+      produces:
+      - application/json
+      responses:
+        "200":
+          description: OK
+          schema:
+            items:
+              $ref: '#/definitions/response.ConversationItem'
+            type: array
+      summary: List conversation input items
+      tags:
+      - Responses
+securityDefinitions:
+  BearerAuth:
+    in: header
+    name: Authorization
+    type: apiKey
+swagger: "2.0"
diff --git a/services/response-api/go.mod b/services/response-api/go.mod
new file mode 100644
index 00000000..6cbe4681
--- /dev/null
+++ b/services/response-api/go.mod
@@ -0,0 +1,91 @@
+module jan-server/services/response-api
+
+go 1.25.0
+
+require (
+	github.com/MicahParks/keyfunc/v2 v2.1.0
+	github.com/caarlos0/env/v10 v10.0.0
+	github.com/gin-gonic/gin v1.10.0
+	github.com/go-resty/resty/v2 v2.11.0
+	github.com/golang-jwt/jwt/v5 v5.3.0
+	github.com/golang-migrate/migrate/v4 v4.19.0
+	github.com/google/uuid v1.6.0
+	github.com/google/wire v0.7.0
+	github.com/joho/godotenv v1.5.1
+	github.com/lib/pq v1.10.9
+	github.com/rs/zerolog v1.31.0
+	github.com/swaggo/files v1.0.1
+	github.com/swaggo/gin-swagger v1.6.0
+	github.com/swaggo/swag v1.16.4
+	go.opentelemetry.io/otel v1.37.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0
+	go.opentelemetry.io/otel/sdk v1.29.0
+	gorm.io/datatypes v1.2.7
+	gorm.io/driver/postgres v1.5.7
+	gorm.io/gorm v1.30.0
+)
+
+require (
+	filippo.io/edwards25519 v1.1.0 // indirect
+	github.com/KyleBanks/depth v1.2.1 // indirect
+	github.com/PuerkitoBio/purell v1.1.1 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
+	github.com/bytedance/sonic v1.11.6 // indirect
+	github.com/bytedance/sonic/loader v0.1.1 // indirect
+	github.com/cenkalti/backoff/v4 v4.2.1 // indirect
+	github.com/cloudwego/base64x v0.1.4 // indirect
+	github.com/cloudwego/iasm v0.2.0 // indirect
+	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
+	github.com/gin-contrib/sse v0.1.0 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-openapi/jsonpointer v0.19.5 // indirect
+	github.com/go-openapi/jsonreference v0.19.6 // indirect
+	github.com/go-openapi/spec v0.20.4 // indirect
+	github.com/go-openapi/swag v0.19.15 // indirect
+	github.com/go-playground/locales v0.14.1 // indirect
+	github.com/go-playground/universal-translator v0.18.1 // indirect
+	github.com/go-playground/validator/v10 v10.20.0 // indirect
+	github.com/go-sql-driver/mysql v1.8.1 // indirect
+	github.com/goccy/go-json v0.10.2 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
+	github.com/hashicorp/errwrap v1.1.0 // indirect
+	github.com/hashicorp/go-multierror v1.1.1 // indirect
+	github.com/jackc/pgpassfile v1.0.0 // indirect
+	github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 // indirect
+	github.com/jackc/pgx/v5 v5.5.5 // indirect
+	github.com/jackc/puddle/v2 v2.2.1 // indirect
+	github.com/jinzhu/inflection v1.0.0 // indirect
+	github.com/jinzhu/now v1.1.5 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
+	github.com/leodido/go-urn v1.4.0 // indirect
+	github.com/mailru/easyjson v0.7.6 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
+	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
+	github.com/ugorji/go/codec v1.2.12 // indirect
+	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 // indirect
+	go.opentelemetry.io/otel/metric v1.37.0 // indirect
+	go.opentelemetry.io/otel/trace v1.37.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.3.1 // indirect
+	golang.org/x/arch v0.8.0 // indirect
+	golang.org/x/crypto v0.45.0 // indirect
+	golang.org/x/net v0.47.0 // indirect
+	golang.org/x/sync v0.18.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+	golang.org/x/tools v0.38.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
+	google.golang.org/grpc v1.67.0 // indirect
+	google.golang.org/protobuf v1.34.2 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+	gorm.io/driver/mysql v1.5.6 // indirect
+)
diff --git a/services/response-api/go.sum b/services/response-api/go.sum
new file mode 100644
index 00000000..c268c6e3
--- /dev/null
+++ b/services/response-api/go.sum
@@ -0,0 +1,327 @@
+filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
+filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
+github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
+github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
+github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
+github.com/MicahParks/keyfunc/v2 v2.1.0 h1:6ZXKb9Rp6qp1bDbJefnG7cTH8yMN1IC/4nf+GVjO99k=
+github.com/MicahParks/keyfunc/v2 v2.1.0/go.mod h1:rW42fi+xgLJ2FRRXAfNx9ZA8WpD4OeE/yHVMteCkw9k=
+github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
+github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
+github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
+github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
+github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
+github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
+github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
+github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
+github.com/caarlos0/env/v10 v10.0.0 h1:yIHUBZGsyqCnpTkbjk8asUlx6RFhhEs+h7TOBdgdzXA=
+github.com/caarlos0/env/v10 v10.0.0/go.mod h1:ZfulV76NvVPw3tm591U4SwL3Xx9ldzBP9aGxzeN7G18=
+github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
+github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
+github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
+github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
+github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
+github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
+github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
+github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
+github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dhui/dktest v0.4.6 h1:+DPKyScKSEp3VLtbMDHcUq6V5Lm5zfZZVb0Sk7Ahom4=
+github.com/dhui/dktest v0.4.6/go.mod h1:JHTSYDtKkvFNFHJKqCzVzqXecyv+tKt8EzceOmQOgbU=
+github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
+github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
+github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI=
+github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
+github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
+github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
+github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
+github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
+github.com/gin-contrib/gzip v0.0.6 h1:NjcunTcGAj5CO1gn4N8jHOSIeRFHIbn51z6K+xaN4d4=
+github.com/gin-contrib/gzip v0.0.6/go.mod h1:QOJlmV2xmayAjkNS2Y8NQsMneuRShOU/kjovCXNuzzk=
+github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
+github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
+github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
+github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
+github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
+github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
+github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
+github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
+github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
+github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
+github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
+github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
+github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
+github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
+github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
+github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
+github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
+github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
+github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
+github.com/go-resty/resty/v2 v2.11.0 h1:i7jMfNOJYMp69lq7qozJP+bjgzfAzeOhuGlyDrqxT/8=
+github.com/go-resty/resty/v2 v2.11.0/go.mod h1:iiP/OpA0CkcL3IGt1O0+/SIItFUbkkyw5BGXiVdTu+A=
+github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI=
+github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
+github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
+github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
+github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
+github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
+github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
+github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
+github.com/golang-migrate/migrate/v4 v4.19.0 h1:RcjOnCGz3Or6HQYEJ/EEVLfWnmw9KnoigPSjzhCuaSE=
+github.com/golang-migrate/migrate/v4 v4.19.0/go.mod h1:9dyEcu+hO+G9hPSw8AIg50yg622pXJsoHItQnDGZkI0=
+github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA=
+github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
+github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A=
+github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4=
+github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
+github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
+github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
+github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
+github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
+github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
+github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
+github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 h1:L0QtFUgDarD7Fpv9jeVMgy/+Ec0mtnmYuImjTz6dtDA=
+github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
+github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw=
+github.com/jackc/pgx/v5 v5.5.5/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
+github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
+github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
+github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
+github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
+github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
+github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
+github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
+github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
+github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
+github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
+github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
+github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
+github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
+github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
+github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
+github.com/microsoft/go-mssqldb v1.7.2 h1:CHkFJiObW7ItKTJfHo1QX7QBBD1iV+mn1eOyRP3b/PA=
+github.com/microsoft/go-mssqldb v1.7.2/go.mod h1:kOvZKUdrhhFQmxLZqbwUV0rHkNkZpthMITIb2Ko1IoA=
+github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
+github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
+github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
+github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
+github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
+github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
+github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
+github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
+github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
+github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
+github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
+github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
+github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
+github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg=
+github.com/swaggo/gin-swagger v1.6.0 h1:y8sxvQ3E20/RCyrXeFfg60r6H0Z+SwpTjMYsMm+zy8M=
+github.com/swaggo/gin-swagger v1.6.0/go.mod h1:BG00cCEy294xtVpyIAHG6+e2Qzj/xKlRdOqDkvq0uzo=
+github.com/swaggo/swag v1.16.4 h1:clWJtd9LStiG3VeijiCfOVODP6VpHtKdQy9ELFG3s1A=
+github.com/swaggo/swag v1.16.4/go.mod h1:VBsHJRsDvfYvqoiMKnsdwhNV9LEMHgEDZcyVYX0sxPg=
+github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
+github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
+github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
+github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
+go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8=
+go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
+go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0 h1:dIIDULZJpgdiHz5tXrTgKIMLkus6jEFa7x5SOKcyR7E=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.29.0/go.mod h1:jlRVBe7+Z1wyxFSUs48L6OBQZ5JwH2Hg/Vbl+t9rAgI=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0 h1:Xw8U6u2f8DK2XAkGRFV7BBLENgnTGX9i4rQRxJf+/vs=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0/go.mod h1:6KW1Fm6R/s6Z3PGXwSJN2K4eT6wQB3vXX6CVnYX9NmM=
+go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
+go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
+go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo=
+go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok=
+go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
+go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
+go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
+go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
+golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
+golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
+golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
+golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 h1:wKguEg1hsxI2/L3hUYrpo1RVi48K+uTyzKqprwLXsb8=
+google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142/go.mod h1:d6be+8HhtEtucleCbxpPW9PA9XwISACu8nvpPqF0BVo=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
+google.golang.org/grpc v1.67.0 h1:IdH9y6PF5MPSdAntIcpjQ+tXO41pcQsfZV2RxtQgVcw=
+google.golang.org/grpc v1.67.0/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
+google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
+google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gorm.io/datatypes v1.2.7 h1:ww9GAhF1aGXZY3EB3cJPJ7//JiuQo7DlQA7NNlVaTdk=
+gorm.io/datatypes v1.2.7/go.mod h1:M2iO+6S3hhi4nAyYe444Pcb0dcIiOMJ7QHaUXxyiNZY=
+gorm.io/driver/mysql v1.5.6 h1:Ld4mkIickM+EliaQZQx3uOJDJHtrd70MxAUqWqlx3Y8=
+gorm.io/driver/mysql v1.5.6/go.mod h1:sEtPWMiqiN1N1cMXoXmBbd8C6/l+TESwriotuRRpkDM=
+gorm.io/driver/postgres v1.5.7 h1:8ptbNJTDbEmhdr62uReG5BGkdQyeasu/FZHxI0IMGnM=
+gorm.io/driver/postgres v1.5.7/go.mod h1:3e019WlBaYI5o5LIdNV+LyxCMNtLOQETBXL2h4chKpA=
+gorm.io/driver/sqlite v1.4.3 h1:HBBcZSDnWi5BW3B3rwvVTc510KGkBkexlOg0QrmLUuU=
+gorm.io/driver/sqlite v1.4.3/go.mod h1:0Aq3iPO+v9ZKbcdiz8gLWRw5VOPcBOPUQJFLq5e2ecI=
+gorm.io/driver/sqlserver v1.6.0 h1:VZOBQVsVhkHU/NzNhRJKoANt5pZGQAS1Bwc6m6dgfnc=
+gorm.io/driver/sqlserver v1.6.0/go.mod h1:WQzt4IJo/WHKnckU9jXBLMJIVNMVeTu25dnOzehntWw=
+gorm.io/gorm v1.25.7/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8=
+gorm.io/gorm v1.30.0 h1:qbT5aPv1UH8gI99OsRlvDToLxW5zR7FzS9acZDOZcgs=
+gorm.io/gorm v1.30.0/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
+nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/services/response-api/internal/config/config.go b/services/response-api/internal/config/config.go
new file mode 100644
index 00000000..2a4b9c93
--- /dev/null
+++ b/services/response-api/internal/config/config.go
@@ -0,0 +1,99 @@
+package config
+
+import (
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/caarlos0/env/v10"
+)
+
+// Config holds the environment driven configuration for the response service.
+type Config struct {
+	// Service Configuration
+	ServiceName     string        `env:"SERVICE_NAME" envDefault:"response-api"`
+	Environment     string        `env:"ENVIRONMENT" envDefault:"development"`
+	HTTPPort        int           `env:"RESPONSE_API_PORT" envDefault:"8082"`
+	LogLevel        string        `env:"RESPONSE_LOG_LEVEL" envDefault:"info"`
+	EnableTracing   bool          `env:"ENABLE_TRACING" envDefault:"false"`
+	OTLPEndpoint    string        `env:"OTEL_EXPORTER_OTLP_ENDPOINT" envDefault:""`
+	ShutdownTimeout time.Duration `env:"SHUTDOWN_TIMEOUT" envDefault:"10s"`
+
+	// Database - Read/Write Split (required, no defaults)
+	DBPostgresqlWriteDSN string `env:"DB_POSTGRESQL_WRITE_DSN,notEmpty"`
+	DBPostgresqlRead1DSN string `env:"DB_POSTGRESQL_READ1_DSN"` // Optional read replica
+
+	// Database Connection Pool
+	DBMaxIdleConns int           `env:"DB_MAX_IDLE_CONNS" envDefault:"5"`
+	DBMaxOpenConns int           `env:"DB_MAX_OPEN_CONNS" envDefault:"15"`
+	DBConnLifetime time.Duration `env:"DB_CONN_MAX_LIFETIME" envDefault:"30m"`
+
+	// Authentication
+	AuthEnabled bool   `env:"AUTH_ENABLED" envDefault:"false"`
+	AuthIssuer  string `env:"AUTH_ISSUER"`
+	Account     string `env:"ACCOUNT"`
+	AuthJWKSURL string `env:"AUTH_JWKS_URL"`
+
+	// External Services
+	LLMAPIURL   string `env:"RESPONSE_LLM_API_URL" envDefault:"http://localhost:8080"`
+	MCPToolsURL string `env:"RESPONSE_MCP_TOOLS_URL" envDefault:"http://localhost:8091"`
+
+	// Tool Execution
+	MaxToolDepth int           `env:"RESPONSE_MAX_TOOL_DEPTH" envDefault:"8"`
+	ToolTimeout  time.Duration `env:"TOOL_EXECUTION_TIMEOUT" envDefault:"300s"`
+
+	// Background Task Processing
+	BackgroundWorkerCount  int           `env:"BACKGROUND_WORKER_COUNT" envDefault:"4"`
+	BackgroundTaskTimeout  time.Duration `env:"BACKGROUND_TASK_TIMEOUT" envDefault:"600s"`
+	BackgroundPollInterval time.Duration `env:"BACKGROUND_POLL_INTERVAL" envDefault:"2s"`
+	WebhookTimeout         time.Duration `env:"WEBHOOK_TIMEOUT" envDefault:"10s"`
+	WebhookMaxRetries      int           `env:"WEBHOOK_MAX_RETRIES" envDefault:"3"`
+	WebhookRetryDelay      time.Duration `env:"WEBHOOK_RETRY_DELAY" envDefault:"2s"`
+}
+
+// Load parses environment variables into Config.
+func Load() (*Config, error) {
+	cfg := &Config{}
+	if err := env.Parse(cfg); err != nil {
+		return nil, fmt.Errorf("parse env config: %w", err)
+	}
+
+	if cfg.AuthEnabled {
+		if strings.TrimSpace(cfg.AuthIssuer) == "" {
+			return nil, fmt.Errorf("AUTH_ISSUER is required when AUTH_ENABLED is true")
+		}
+		if strings.TrimSpace(cfg.AuthJWKSURL) == "" {
+			return nil, fmt.Errorf("AUTH_JWKS_URL is required when AUTH_ENABLED is true")
+		}
+	}
+
+	if cfg.MaxToolDepth <= 0 {
+		cfg.MaxToolDepth = 8
+	}
+
+	if cfg.ToolTimeout <= 0 {
+		cfg.ToolTimeout = 300 * time.Second
+	}
+
+	return cfg, nil
+}
+
+// GetDatabaseWriteDSN returns the write database connection string.
+func (c *Config) GetDatabaseWriteDSN() string {
+	return c.DBPostgresqlWriteDSN
+}
+
+// GetDatabaseReadDSN returns the read database connection string.
+// If DB_POSTGRESQL_READ1_DSN is set, it returns that.
+// Otherwise, falls back to write DSN (no replica configured).
+func (c *Config) GetDatabaseReadDSN() string {
+	if c.DBPostgresqlRead1DSN != "" {
+		return c.DBPostgresqlRead1DSN
+	}
+	return c.GetDatabaseWriteDSN()
+}
+
+// Addr returns the HTTP listen address.
+func (c *Config) Addr() string {
+	return fmt.Sprintf(":%d", c.HTTPPort)
+}
diff --git a/services/response-api/internal/domain/conversation/model.go b/services/response-api/internal/domain/conversation/model.go
new file mode 100644
index 00000000..bd6967e9
--- /dev/null
+++ b/services/response-api/internal/domain/conversation/model.go
@@ -0,0 +1,40 @@
+package conversation
+
+import "time"
+
+// Conversation represents a logical chat thread for the Responses API.
+type Conversation struct {
+	ID        uint                   `json:"-"`
+	PublicID  string                 `json:"id"`
+	UserID    string                 `json:"user_id"`
+	Metadata  map[string]interface{} `json:"metadata,omitempty"`
+	CreatedAt time.Time              `json:"created_at"`
+	UpdatedAt time.Time              `json:"updated_at"`
+}
+
+// ItemRole indicates who authored the conversation item.
+type ItemRole string
+
+// ItemStatus tracks whether the item is finalised.
+type ItemStatus string
+
+const (
+	RoleSystem    ItemRole = "system"
+	RoleUser      ItemRole = "user"
+	RoleAssistant ItemRole = "assistant"
+	RoleTool      ItemRole = "tool"
+
+	ItemStatusCompleted ItemStatus = "completed"
+	ItemStatusPending   ItemStatus = "pending"
+)
+
+// Item contains individual conversation message state.
+type Item struct {
+	ID             uint                   `json:"-"`
+	ConversationID uint                   `json:"conversation_id"`
+	Role           ItemRole               `json:"role"`
+	Status         ItemStatus             `json:"status"`
+	Content        map[string]interface{} `json:"content"`
+	Sequence       int                    `json:"sequence"`
+	CreatedAt      time.Time              `json:"created_at"`
+}
diff --git a/services/response-api/internal/domain/conversation/repository.go b/services/response-api/internal/domain/conversation/repository.go
new file mode 100644
index 00000000..00f4b0c9
--- /dev/null
+++ b/services/response-api/internal/domain/conversation/repository.go
@@ -0,0 +1,15 @@
+package conversation
+
+import "context"
+
+// Repository exposes CRUD operations for conversation metadata.
+type Repository interface {
+	Create(ctx context.Context, conversation *Conversation) error
+	FindByPublicID(ctx context.Context, publicID string) (*Conversation, error)
+}
+
+// ItemRepository persists individual conversation messages.
+type ItemRepository interface {
+	BulkInsert(ctx context.Context, items []Item) error
+	ListByConversationID(ctx context.Context, conversationID uint) ([]Item, error)
+}
diff --git a/services/response-api/internal/domain/llm/context.go b/services/response-api/internal/domain/llm/context.go
new file mode 100644
index 00000000..39cafa34
--- /dev/null
+++ b/services/response-api/internal/domain/llm/context.go
@@ -0,0 +1,26 @@
+package llm
+
+import "context"
+
+type contextKey string
+
+const authTokenKey contextKey = "llm-auth-token"
+
+// ContextWithAuthToken stores an Authorization header value in context for downstream LLM calls.
+func ContextWithAuthToken(ctx context.Context, authHeader string) context.Context {
+	if ctx == nil || authHeader == "" {
+		return ctx
+	}
+	return context.WithValue(ctx, authTokenKey, authHeader)
+}
+
+// AuthTokenFromContext extracts the Authorization header value if one was provided.
+func AuthTokenFromContext(ctx context.Context) string {
+	if ctx == nil {
+		return ""
+	}
+	if token, ok := ctx.Value(authTokenKey).(string); ok {
+		return token
+	}
+	return ""
+}
diff --git a/services/response-api/internal/domain/llm/types.go b/services/response-api/internal/domain/llm/types.go
new file mode 100644
index 00000000..0c512218
--- /dev/null
+++ b/services/response-api/internal/domain/llm/types.go
@@ -0,0 +1,122 @@
+package llm
+
+import (
+	"context"
+	"encoding/json"
+)
+
+// Provider defines the contract for calling the LLM API /v1/chat/completions endpoint.
+type Provider interface {
+	CreateChatCompletion(reqCtx context.Context, req ChatCompletionRequest) (*ChatCompletionResponse, error)
+	CreateChatCompletionStream(reqCtx context.Context, req ChatCompletionRequest) (Stream, error)
+}
+
+// Stream abstracts an SSE or chunked response from the LLM API.
+type Stream interface {
+	Recv() (*ChatCompletionDelta, error)
+	Close() error
+}
+
+// ChatCompletionRequest mirrors the OpenAI-compatible request shape exposed by llm-api.
+type ChatCompletionRequest struct {
+	Model       string           `json:"model"`
+	Messages    []ChatMessage    `json:"messages"`
+	Tools       []ToolDefinition `json:"tools,omitempty"`
+	ToolChoice  *ToolChoice      `json:"tool_choice,omitempty"`
+	Temperature *float64         `json:"temperature,omitempty"`
+	MaxTokens   *int             `json:"max_tokens,omitempty"`
+	Stream      bool             `json:"stream"`
+}
+
+// ChatMessage represents a single message in the conversation history.
+type ChatMessage struct {
+	Role       string      `json:"role"`
+	Content    interface{} `json:"content"`
+	ToolCalls  []ToolCall  `json:"tool_calls,omitempty"`
+	ToolCallID *string     `json:"tool_call_id,omitempty"`
+}
+
+// GetContentAsString returns the content as a string, converting if necessary.
+// This ensures compatibility with OpenAI SDK which expects string content.
+func (m *ChatMessage) GetContentAsString() string {
+	switch v := m.Content.(type) {
+	case string:
+		return v
+	case nil:
+		return ""
+	default:
+		// Marshal complex content (arrays, objects) to JSON string
+		contentBytes, _ := json.Marshal(v)
+		return string(contentBytes)
+	}
+}
+
+// ToolCall mirrors the OpenAI tool call format.
+type ToolCall struct {
+	ID       string       `json:"id"`
+	Type     string       `json:"type"`
+	Function ToolFunction `json:"function"`
+}
+
+// ToolFunction contains the function name and JSON arguments.
+type ToolFunction struct {
+	Name      string          `json:"name"`
+	Arguments json.RawMessage `json:"arguments"`
+}
+
+// ToolDefinition is the OpenAI compatible representation of an MCP tool.
+type ToolDefinition struct {
+	Type     string             `json:"type"`
+	Function ToolFunctionSchema `json:"function"`
+}
+
+// ToolFunctionSchema declares the function contract passed to the LLM.
+type ToolFunctionSchema struct {
+	Name        string                 `json:"name"`
+	Description string                 `json:"description,omitempty"`
+	Parameters  map[string]interface{} `json:"parameters"`
+}
+
+// ToolChoice allows forcing a specific tool or auto mode.
+type ToolChoice struct {
+	Type     string `json:"type"`
+	Function struct {
+		Name string `json:"name"`
+	} `json:"function"`
+}
+
+// ChatCompletionResponse captures the non-streaming completion payload.
+type ChatCompletionResponse struct {
+	ID      string                 `json:"id"`
+	Object  string                 `json:"object"`
+	Created int64                  `json:"created"`
+	Model   string                 `json:"model"`
+	Choices []ChatCompletionChoice `json:"choices"`
+	Usage   *Usage                 `json:"usage,omitempty"`
+}
+
+// ChatCompletionChoice represents one completion choice.
+type ChatCompletionChoice struct {
+	Index        int         `json:"index"`
+	Message      ChatMessage `json:"message"`
+	FinishReason string      `json:"finish_reason"`
+}
+
+// Usage contains token accounting metadata.
+type Usage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
+// ChatCompletionDelta represents a streaming chunk.
+type ChatCompletionDelta struct {
+	Choices []ChatCompletionDeltaChoice `json:"choices"`
+}
+
+// ChatCompletionDeltaChoice mirrors OpenAI streaming deltas.
+type ChatCompletionDeltaChoice struct {
+	Delta        ChatMessage `json:"delta"`
+	FinishReason string      `json:"finish_reason"`
+	Index        int         `json:"index"`
+}
diff --git a/services/response-api/internal/domain/response/model.go b/services/response-api/internal/domain/response/model.go
new file mode 100644
index 00000000..c6ec6b5f
--- /dev/null
+++ b/services/response-api/internal/domain/response/model.go
@@ -0,0 +1,98 @@
+package response
+
+import (
+	"context"
+	"time"
+
+	"jan-server/services/response-api/internal/domain/llm"
+	"jan-server/services/response-api/internal/domain/tool"
+)
+
+// Status represents the lifecycle of a response.
+type Status string
+
+const (
+	StatusPending    Status = "pending"
+	StatusQueued     Status = "queued"
+	StatusInProgress Status = "in_progress"
+	StatusCompleted  Status = "completed"
+	StatusFailed     Status = "failed"
+	StatusCancelled  Status = "cancelled"
+)
+
+// Response is the main aggregate persisted to the database.
+type Response struct {
+	ID                   uint                   `json:"-"`
+	PublicID             string                 `json:"id"`
+	Object               string                 `json:"object"`
+	UserID               string                 `json:"user_id"`
+	Model                string                 `json:"model"`
+	SystemPrompt         *string                `json:"system_prompt,omitempty"`
+	Input                interface{}            `json:"input"`
+	Output               interface{}            `json:"output,omitempty"`
+	Status               Status                 `json:"status"`
+	Stream               bool                   `json:"stream"`
+	Background           bool                   `json:"background"`
+	Store                bool                   `json:"store"`
+	APIKey               *string                `json:"-"` // API key (X-API-Key or Bearer token) for background LLM calls
+	Metadata             map[string]interface{} `json:"metadata,omitempty"`
+	Usage                *llm.Usage             `json:"usage,omitempty"`
+	Error                *ErrorDetails          `json:"error,omitempty"`
+	ConversationID       *uint                  `json:"-"`
+	ConversationPublicID *string                `json:"conversation_id,omitempty"`
+	PreviousResponseID   *string                `json:"previous_response_id,omitempty"`
+	CreatedAt            time.Time              `json:"created_at"`
+	UpdatedAt            time.Time              `json:"updated_at"`
+	QueuedAt             *time.Time             `json:"queued_at,omitempty"`
+	StartedAt            *time.Time             `json:"started_at,omitempty"`
+	CompletedAt          *time.Time             `json:"completed_at,omitempty"`
+	CancelledAt          *time.Time             `json:"cancelled_at,omitempty"`
+	FailedAt             *time.Time             `json:"failed_at,omitempty"`
+}
+
+// ErrorDetails contains machine readable error info surfaced to clients.
+type ErrorDetails struct {
+	Code    string `json:"code"`
+	Message string `json:"message"`
+}
+
+// CreateParams contains inputs collected from the HTTP layer.
+type CreateParams struct {
+	UserID             string
+	Model              string
+	Input              interface{}
+	SystemPrompt       *string
+	Temperature        *float64
+	MaxTokens          *int
+	Stream             bool
+	Background         bool
+	Store              bool
+	APIKey             *string // API key (X-API-Key or Bearer token) for background LLM calls
+	ToolChoice         *llm.ToolChoice
+	Tools              []llm.ToolDefinition
+	PreviousResponseID *string
+	ConversationID     *string
+	Metadata           map[string]interface{}
+	StreamObserver     StreamObserver
+}
+
+// Service exposes the Response domain operations.
+type Service interface {
+	Create(ctx context.Context, params CreateParams) (*Response, error)
+	GetByPublicID(ctx context.Context, publicID string) (*Response, error)
+	Cancel(ctx context.Context, publicID string) (*Response, error)
+	ListConversationItems(ctx context.Context, publicID string) ([]ConversationItem, error)
+}
+
+// ConversationItem is returned when listing stored conversation history.
+type ConversationItem struct {
+	Role    string      `json:"role"`
+	Content interface{} `json:"content"`
+	Status  string      `json:"status"`
+}
+
+// StreamObserver receives streaming lifecycle events.
+type StreamObserver interface {
+	tool.StreamObserver
+	OnResponseCreated(resp *Response)
+}
diff --git a/services/response-api/internal/domain/response/repository.go b/services/response-api/internal/domain/response/repository.go
new file mode 100644
index 00000000..ab4b862a
--- /dev/null
+++ b/services/response-api/internal/domain/response/repository.go
@@ -0,0 +1,20 @@
+package response
+
+import (
+	"context"
+
+	"jan-server/services/response-api/internal/domain/tool"
+)
+
+// Repository defines persistence operations for responses.
+type Repository interface {
+	Create(ctx context.Context, response *Response) error
+	Update(ctx context.Context, response *Response) error
+	FindByPublicID(ctx context.Context, publicID string) (*Response, error)
+	MarkCancelled(ctx context.Context, response *Response) error
+}
+
+// ToolExecutionRepository persists tool execution metadata.
+type ToolExecutionRepository interface {
+	RecordExecutions(ctx context.Context, responseID uint, executions []tool.Execution) error
+}
diff --git a/services/response-api/internal/domain/response/service.go b/services/response-api/internal/domain/response/service.go
new file mode 100644
index 00000000..e7932389
--- /dev/null
+++ b/services/response-api/internal/domain/response/service.go
@@ -0,0 +1,633 @@
+package response
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/response-api/internal/domain/conversation"
+	"jan-server/services/response-api/internal/domain/llm"
+	"jan-server/services/response-api/internal/domain/tool"
+	"jan-server/services/response-api/internal/webhook"
+)
+
+// ServiceImpl provides the domain implementation.
+type ServiceImpl struct {
+	responses         Repository
+	conversations     conversation.Repository
+	conversationItems conversation.ItemRepository
+	toolExecutions    ToolExecutionRepository
+	orchestrator      *tool.Orchestrator
+	mcpClient         tool.MCPClient
+	webhookService    webhook.Service
+	log               zerolog.Logger
+}
+
+// NewService wires dependencies.
+func NewService(
+	responses Repository,
+	conversations conversation.Repository,
+	conversationItems conversation.ItemRepository,
+	toolExecutions ToolExecutionRepository,
+	orchestrator *tool.Orchestrator,
+	mcpClient tool.MCPClient,
+	webhookService webhook.Service,
+	log zerolog.Logger,
+) *ServiceImpl {
+	return &ServiceImpl{
+		responses:         responses,
+		conversations:     conversations,
+		conversationItems: conversationItems,
+		toolExecutions:    toolExecutions,
+		orchestrator:      orchestrator,
+		mcpClient:         mcpClient,
+		webhookService:    webhookService,
+		log:               log.With().Str("component", "response-service").Logger(),
+	}
+}
+
+// Create orchestrates a complete response lifecycle.
+// If Background=true, it enqueues the task and returns immediately.
+// Otherwise, it executes synchronously.
+func (s *ServiceImpl) Create(ctx context.Context, params CreateParams) (*Response, error) {
+	// Validate background constraints
+	if params.Background && !params.Store {
+		return nil, errors.New("background mode requires store=true")
+	}
+
+	if params.Background {
+		return s.createAsync(ctx, params)
+	}
+	return s.createSync(ctx, params)
+}
+
+// createAsync creates a response record with status=queued and returns immediately.
+func (s *ServiceImpl) createAsync(ctx context.Context, params CreateParams) (*Response, error) {
+	var conv *conversation.Conversation
+	var err error
+
+	// Handle conversation context
+	if params.PreviousResponseID != nil && strings.TrimSpace(*params.PreviousResponseID) != "" {
+		prevResp, err := s.responses.FindByPublicID(ctx, *params.PreviousResponseID)
+		if err != nil {
+			s.log.Warn().Err(err).Str("previous_response_id", *params.PreviousResponseID).Msg("failed to load previous response")
+		} else if prevResp.ConversationPublicID != nil {
+			conv, err = s.conversations.FindByPublicID(ctx, *prevResp.ConversationPublicID)
+			if err != nil {
+				s.log.Warn().Err(err).Str("conversation_id", *prevResp.ConversationPublicID).Msg("failed to load conversation")
+			}
+		}
+	}
+
+	if conv == nil {
+		if params.ConversationID != nil && strings.TrimSpace(*params.ConversationID) != "" {
+			conv, err = s.conversations.FindByPublicID(ctx, *params.ConversationID)
+			if err != nil {
+				return nil, fmt.Errorf("fetch conversation: %w", err)
+			}
+		} else {
+			conv = &conversation.Conversation{
+				PublicID: newPublicID("conv"),
+				UserID:   params.UserID,
+			}
+			if err := s.conversations.Create(ctx, conv); err != nil {
+				return nil, fmt.Errorf("create conversation: %w", err)
+			}
+		}
+	}
+
+	now := time.Now()
+	responseModel := &Response{
+		PublicID:             newPublicID("resp"),
+		Object:               "response",
+		UserID:               params.UserID,
+		Model:                params.Model,
+		SystemPrompt:         params.SystemPrompt,
+		Input:                params.Input,
+		Status:               StatusQueued,
+		Stream:               params.Stream,
+		Background:           params.Background,
+		Store:                params.Store,
+		APIKey:               params.APIKey, // Store API key for background execution
+		Metadata:             params.Metadata,
+		ConversationID:       &conv.ID,
+		ConversationPublicID: &conv.PublicID,
+		PreviousResponseID:   params.PreviousResponseID,
+		CreatedAt:            now,
+		UpdatedAt:            now,
+		QueuedAt:             &now,
+	}
+
+	if err := s.responses.Create(ctx, responseModel); err != nil {
+		return nil, fmt.Errorf("create response: %w", err)
+	}
+
+	s.log.Info().
+		Str("response_id", responseModel.PublicID).
+		Str("user_id", params.UserID).
+		Str("model", params.Model).
+		Msg("background response queued")
+
+	return responseModel, nil
+}
+
+// createSync executes the response synchronously (original behavior).
+func (s *ServiceImpl) createSync(ctx context.Context, params CreateParams) (*Response, error) {
+	var conv *conversation.Conversation
+	var err error
+
+	// If PreviousResponseID is provided, load that response's conversation for context
+	if params.PreviousResponseID != nil && strings.TrimSpace(*params.PreviousResponseID) != "" {
+		prevResp, err := s.responses.FindByPublicID(ctx, *params.PreviousResponseID)
+		if err != nil {
+			s.log.Warn().Err(err).Str("previous_response_id", *params.PreviousResponseID).Msg("failed to load previous response, continuing without context")
+		} else if prevResp.ConversationPublicID != nil {
+			// Use the previous response's conversation to maintain context
+			conv, err = s.conversations.FindByPublicID(ctx, *prevResp.ConversationPublicID)
+			if err != nil {
+				s.log.Warn().Err(err).Str("conversation_id", *prevResp.ConversationPublicID).Msg("failed to load conversation, creating new one")
+			}
+		}
+	}
+
+	// If we still don't have a conversation, check for explicit conversation_id or create new
+	if conv == nil {
+		if params.ConversationID != nil && strings.TrimSpace(*params.ConversationID) != "" {
+			conv, err = s.conversations.FindByPublicID(ctx, *params.ConversationID)
+			if err != nil {
+				return nil, fmt.Errorf("fetch conversation: %w", err)
+			}
+		} else {
+			conv = &conversation.Conversation{
+				PublicID: newPublicID("conv"),
+				UserID:   params.UserID,
+			}
+			if err := s.conversations.Create(ctx, conv); err != nil {
+				return nil, fmt.Errorf("create conversation: %w", err)
+			}
+		}
+	}
+
+	existingItems, err := s.conversationItems.ListByConversationID(ctx, conv.ID)
+	if err != nil {
+		return nil, fmt.Errorf("list conversation items: %w", err)
+	}
+
+	responseModel := &Response{
+		PublicID:             newPublicID("resp"),
+		Object:               "response",
+		UserID:               params.UserID,
+		Model:                params.Model,
+		SystemPrompt:         params.SystemPrompt,
+		Input:                params.Input,
+		Status:               StatusInProgress,
+		Stream:               params.Stream,
+		Background:           params.Background,
+		Store:                params.Store,
+		Metadata:             params.Metadata,
+		ConversationID:       &conv.ID,
+		ConversationPublicID: &conv.PublicID,
+		PreviousResponseID:   params.PreviousResponseID,
+		CreatedAt:            time.Now(),
+		UpdatedAt:            time.Now(),
+	}
+
+	if params.StreamObserver != nil {
+		params.StreamObserver.OnResponseCreated(responseModel)
+	}
+
+	if err := s.responses.Create(ctx, responseModel); err != nil {
+		return nil, fmt.Errorf("create response: %w", err)
+	}
+
+	baseMessages, err := s.buildBaseMessages(params.SystemPrompt, existingItems)
+	if err != nil {
+		return s.failResponse(ctx, responseModel, fmt.Errorf("build base messages: %w", err))
+	}
+
+	userMessages, convoItems, err := s.convertInputToMessages(conv.ID, len(existingItems), params.Input)
+	if err != nil {
+		return s.failResponse(ctx, responseModel, err)
+	}
+	messages := append(baseMessages, userMessages...)
+	initialLength := len(messages)
+
+	toolDefs := params.Tools
+	if len(toolDefs) == 0 {
+		if toolDefs, err = s.fetchAvailableTools(ctx); err != nil {
+			return s.failResponse(ctx, responseModel, err)
+		}
+	}
+
+	execParams := func(defs []llm.ToolDefinition, toolChoice *llm.ToolChoice) tool.ExecuteParams {
+		return tool.ExecuteParams{
+			Ctx:             ctx,
+			Model:           params.Model,
+			Messages:        messages,
+			Temperature:     params.Temperature,
+			MaxTokens:       params.MaxTokens,
+			ToolChoice:      toolChoice,
+			ToolDefinitions: defs,
+			StreamObserver:  params.StreamObserver,
+		}
+	}
+
+	orchestratorResult, err := s.orchestrator.Execute(execParams(toolDefs, params.ToolChoice))
+	if err != nil && shouldRetryWithoutTools(err) && len(toolDefs) > 0 {
+		s.log.Warn().Err(err).Str("response_id", responseModel.PublicID).Msg("llm provider rejected tool definitions, retrying without tools")
+		orchestratorResult, err = s.orchestrator.Execute(execParams(nil, nil))
+	}
+	if err != nil {
+		return s.failResponse(ctx, responseModel, err)
+	}
+
+	responseModel.Status = StatusCompleted
+	responseModel.Output = orchestratorResult.FinalMessage.Content
+	responseModel.Usage = orchestratorResult.Usage
+	now := time.Now()
+	responseModel.CompletedAt = &now
+	responseModel.UpdatedAt = now
+
+	if err := s.responses.Update(ctx, responseModel); err != nil {
+		return nil, err
+	}
+
+	if err := s.toolExecutions.RecordExecutions(ctx, responseModel.ID, orchestratorResult.Executions); err != nil {
+		s.log.Error().Err(err).Str("response_id", responseModel.PublicID).Msg("store tool executions failed")
+	}
+
+	newMessages := orchestratorResult.Messages[initialLength:]
+	newItems := append(convoItems, s.convertMessagesToItems(conv.ID, len(existingItems)+len(convoItems), newMessages)...)
+	if err := s.conversationItems.BulkInsert(ctx, newItems); err != nil {
+		s.log.Error().Err(err).Str("response_id", responseModel.PublicID).Msg("store conversation items failed")
+	}
+
+	return responseModel, nil
+}
+
+// GetByPublicID returns the response by id.
+func (s *ServiceImpl) GetByPublicID(ctx context.Context, publicID string) (*Response, error) {
+	return s.responses.FindByPublicID(ctx, publicID)
+}
+
+// Cancel marks the response as cancelled.
+// For queued tasks, this prevents them from being picked up by workers.
+// For in-progress tasks, workers should periodically check cancellation status.
+func (s *ServiceImpl) Cancel(ctx context.Context, publicID string) (*Response, error) {
+	resp, err := s.responses.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, err
+	}
+
+	// Already in terminal state
+	if resp.Status == StatusCompleted || resp.Status == StatusCancelled || resp.Status == StatusFailed {
+		return resp, nil
+	}
+
+	// Cancel the response
+	if err := s.responses.MarkCancelled(ctx, resp); err != nil {
+		return nil, err
+	}
+
+	s.log.Info().
+		Str("response_id", resp.PublicID).
+		Str("previous_status", string(resp.Status)).
+		Msg("response cancelled")
+
+	return resp, nil
+}
+
+// ListConversationItems returns the textual conversation history for the response.
+func (s *ServiceImpl) ListConversationItems(ctx context.Context, publicID string) ([]ConversationItem, error) {
+	resp, err := s.responses.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.ConversationID == nil {
+		return nil, errors.New("response has no conversation")
+	}
+
+	items, err := s.conversationItems.ListByConversationID(ctx, *resp.ConversationID)
+	if err != nil {
+		return nil, err
+	}
+
+	result := make([]ConversationItem, 0, len(items))
+	for _, item := range items {
+		result = append(result, ConversationItem{
+			Role:    string(item.Role),
+			Content: item.Content,
+			Status:  string(item.Status),
+		})
+	}
+	return result, nil
+}
+
+func (s *ServiceImpl) failResponse(ctx context.Context, resp *Response, failure error) (*Response, error) {
+	now := time.Now()
+	resp.Status = StatusFailed
+	resp.FailedAt = &now
+	resp.Error = &ErrorDetails{
+		Code:    "response_failed",
+		Message: failure.Error(),
+	}
+	if err := s.responses.Update(ctx, resp); err != nil {
+		s.log.Error().Err(err).Str("response_id", resp.PublicID).Msg("update failed response")
+	}
+	return nil, failure
+}
+
+func (s *ServiceImpl) buildBaseMessages(systemPrompt *string, items []conversation.Item) ([]llm.ChatMessage, error) {
+	messages := make([]llm.ChatMessage, 0, len(items)+1)
+	if systemPrompt != nil && strings.TrimSpace(*systemPrompt) != "" {
+		messages = append(messages, llm.ChatMessage{
+			Role:    "system",
+			Content: strings.TrimSpace(*systemPrompt),
+		})
+	}
+
+	for _, item := range items {
+		messages = append(messages, llm.ChatMessage{
+			Role:    string(item.Role),
+			Content: contentToLLM(item.Content),
+		})
+	}
+	return messages, nil
+}
+
+func (s *ServiceImpl) convertInputToMessages(conversationID uint, startingSeq int, input interface{}) ([]llm.ChatMessage, []conversation.Item, error) {
+	var messages []llm.ChatMessage
+	var convoItems []conversation.Item
+
+	switch v := input.(type) {
+	case string:
+		msg := llm.ChatMessage{Role: "user", Content: strings.TrimSpace(v)}
+		messages = append(messages, msg)
+		convoItems = append(convoItems, newConversationItem(conversationID, startingSeq, msg))
+	case []interface{}:
+		for _, raw := range v {
+			msg, err := mapToChatMessage(raw)
+			if err != nil {
+				return nil, nil, err
+			}
+			messages = append(messages, msg)
+			convoItems = append(convoItems, newConversationItem(conversationID, startingSeq+len(convoItems), msg))
+		}
+	case map[string]interface{}:
+		msg, err := mapToChatMessage(v)
+		if err != nil {
+			return nil, nil, err
+		}
+		messages = append(messages, msg)
+		convoItems = append(convoItems, newConversationItem(conversationID, startingSeq, msg))
+	default:
+		bytes, _ := json.Marshal(input)
+		msg := llm.ChatMessage{
+			Role:    "user",
+			Content: string(bytes),
+		}
+		messages = append(messages, msg)
+		convoItems = append(convoItems, newConversationItem(conversationID, startingSeq, msg))
+	}
+
+	return messages, convoItems, nil
+}
+
+func (s *ServiceImpl) convertMessagesToItems(conversationID uint, startingSeq int, messages []llm.ChatMessage) []conversation.Item {
+	items := make([]conversation.Item, 0, len(messages))
+	for _, msg := range messages {
+		items = append(items, newConversationItem(conversationID, startingSeq+len(items), msg))
+	}
+	return items
+}
+
+func (s *ServiceImpl) fetchAvailableTools(ctx context.Context) ([]llm.ToolDefinition, error) {
+	mcpTools, err := s.mcpClient.ListTools(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("list MCP tools: %w", err)
+	}
+
+	defs := make([]llm.ToolDefinition, 0, len(mcpTools))
+	for _, tool := range mcpTools {
+		defs = append(defs, tool.ToLLMTool())
+	}
+	return defs, nil
+}
+
+func newConversationItem(conversationID uint, sequence int, msg llm.ChatMessage) conversation.Item {
+	content := normalizeContent(msg.Content)
+	role := conversation.ItemRole(msg.Role)
+	if role == "" {
+		role = conversation.RoleUser
+	}
+	return conversation.Item{
+		ConversationID: conversationID,
+		Role:           role,
+		Status:         conversation.ItemStatusCompleted,
+		Content:        content,
+		Sequence:       sequence + 1,
+		CreatedAt:      time.Now(),
+	}
+}
+
+func contentToLLM(content map[string]interface{}) interface{} {
+	if content == nil {
+		return nil
+	}
+	if text, ok := content["text"]; ok {
+		return text
+	}
+	return content
+}
+
+func normalizeContent(content interface{}) map[string]interface{} {
+	switch v := content.(type) {
+	case string:
+		return map[string]interface{}{"type": "text", "text": v}
+	case map[string]interface{}:
+		return v
+	case []interface{}:
+		return map[string]interface{}{"type": "list", "items": v}
+	default:
+		bytes, _ := json.Marshal(v)
+		return map[string]interface{}{"type": "json", "text": string(bytes)}
+	}
+}
+
+func mapToChatMessage(messageData interface{}) (llm.ChatMessage, error) {
+	payload, ok := messageData.(map[string]interface{})
+	if !ok {
+		return llm.ChatMessage{}, errors.New("input items must be objects with role/content")
+	}
+
+	role, _ := payload["role"].(string)
+	if role == "" {
+		role = "user"
+	}
+	content := payload["content"]
+	if content == nil {
+		content = payload["text"]
+	}
+	if content == nil {
+		return llm.ChatMessage{}, errors.New("input item missing content")
+	}
+
+	return llm.ChatMessage{
+		Role:    role,
+		Content: content,
+	}, nil
+}
+
+func newPublicID(prefix string) string {
+	return fmt.Sprintf("%s_%s", prefix, uuid.NewString())
+}
+
+func shouldRetryWithoutTools(err error) bool {
+	if err == nil {
+		return false
+	}
+	message := strings.ToLower(err.Error())
+	if strings.Contains(message, "failed to complete chat request") {
+		return true
+	}
+	if strings.Contains(message, "tools unsupported") {
+		return true
+	}
+	return false
+}
+
+// ExecuteBackground processes a queued background task.
+// This method is called by workers from the worker pool.
+func (s *ServiceImpl) ExecuteBackground(ctx context.Context, publicID string) error {
+	// Load the response record
+	resp, err := s.responses.FindByPublicID(ctx, publicID)
+	if err != nil {
+		return fmt.Errorf("failed to load response: %w", err)
+	}
+
+	// Verify it's in a processable state
+	if resp.Status != StatusInProgress {
+		return fmt.Errorf("response %s is not in_progress (current: %s)", publicID, resp.Status)
+	}
+
+	// Inject API key into context for LLM API calls
+	if resp.APIKey != nil && *resp.APIKey != "" {
+		ctx = llm.ContextWithAuthToken(ctx, *resp.APIKey)
+	}
+
+	// Load conversation for context
+	if resp.ConversationID == nil {
+		return errors.New("response has no conversation")
+	}
+	conv, err := s.conversations.FindByPublicID(ctx, *resp.ConversationPublicID)
+	if err != nil {
+		return fmt.Errorf("failed to load conversation: %w", err)
+	}
+
+	// Load conversation items (history)
+	existingItems, err := s.conversationItems.ListByConversationID(ctx, conv.ID)
+	if err != nil {
+		return fmt.Errorf("failed to load conversation items: %w", err)
+	}
+
+	// Build messages from history and current input
+	baseMessages, err := s.buildBaseMessages(resp.SystemPrompt, existingItems)
+	if err != nil {
+		return fmt.Errorf("build base messages: %w", err)
+	}
+
+	userMessages, convoItems, err := s.convertInputToMessages(conv.ID, len(existingItems), resp.Input)
+	if err != nil {
+		return fmt.Errorf("convert input: %w", err)
+	}
+	messages := append(baseMessages, userMessages...)
+	initialLength := len(messages)
+
+	// Load tool definitions
+	toolDefs, err := s.fetchAvailableTools(ctx)
+	if err != nil {
+		s.log.Warn().Err(err).Msg("Failed to load MCP tools, continuing without tools")
+		toolDefs = []llm.ToolDefinition{}
+	}
+
+	// Execute orchestration (no streaming in background mode)
+	execParams := tool.ExecuteParams{
+		Ctx:             ctx,
+		Model:           resp.Model,
+		Messages:        messages,
+		Temperature:     nil, // Use model defaults for background tasks
+		MaxTokens:       nil,
+		ToolDefinitions: toolDefs,
+		StreamObserver:  nil, // Background mode never streams
+	}
+
+	orchestratorResult, execErr := s.orchestrator.Execute(execParams)
+	if execErr != nil && shouldRetryWithoutTools(execErr) && len(toolDefs) > 0 {
+		s.log.Warn().Err(execErr).Str("response_id", resp.PublicID).Msg("llm provider rejected tool definitions, retrying without tools")
+		execParams.ToolDefinitions = nil
+		orchestratorResult, execErr = s.orchestrator.Execute(execParams)
+	}
+
+	// Update response status
+	now := time.Now()
+	if execErr != nil {
+		resp.Status = StatusFailed
+		resp.Error = &ErrorDetails{Message: execErr.Error()}
+		resp.CompletedAt = &now
+		resp.UpdatedAt = now
+	} else {
+		resp.Status = StatusCompleted
+		resp.Output = orchestratorResult.FinalMessage.Content
+		resp.Usage = orchestratorResult.Usage
+		resp.CompletedAt = &now
+		resp.UpdatedAt = now
+
+		// Record tool executions
+		if err := s.toolExecutions.RecordExecutions(ctx, resp.ID, orchestratorResult.Executions); err != nil {
+			s.log.Error().Err(err).Str("response_id", resp.PublicID).Msg("store tool executions failed")
+		}
+
+		// Record conversation items (skip initial messages, only store new ones)
+		newMessages := orchestratorResult.Messages[initialLength:]
+		newItems := append(convoItems, s.convertMessagesToItems(conv.ID, len(existingItems)+len(convoItems), newMessages)...)
+		if err := s.conversationItems.BulkInsert(ctx, newItems); err != nil {
+			s.log.Error().Err(err).Str("response_id", resp.PublicID).Msg("store conversation items failed")
+		}
+	}
+
+	// Persist final state
+	if err := s.responses.Update(ctx, resp); err != nil {
+		return fmt.Errorf("failed to update response: %w", err)
+	}
+
+	// Send webhook notifications (async, don't block on webhook failures)
+	go func() {
+		webhookCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+		defer cancel()
+
+		if execErr != nil {
+			errorCode := "execution_failed"
+			errorMsg := execErr.Error()
+			if resp.Error != nil {
+				errorCode = resp.Error.Code
+				errorMsg = resp.Error.Message
+			}
+			if err := s.webhookService.NotifyFailed(webhookCtx, resp.PublicID, errorCode, errorMsg, resp.Metadata); err != nil {
+				s.log.Error().Err(err).Str("response_id", resp.PublicID).Msg("webhook notification failed")
+			}
+		} else {
+			if err := s.webhookService.NotifyCompleted(webhookCtx, resp.PublicID, resp.Output, resp.Metadata, resp.CompletedAt); err != nil {
+				s.log.Error().Err(err).Str("response_id", resp.PublicID).Msg("webhook notification failed")
+			}
+		}
+	}()
+
+	return execErr
+}
diff --git a/services/response-api/internal/domain/tool/orchestrator.go b/services/response-api/internal/domain/tool/orchestrator.go
new file mode 100644
index 00000000..547820c3
--- /dev/null
+++ b/services/response-api/internal/domain/tool/orchestrator.go
@@ -0,0 +1,389 @@
+package tool
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"strings"
+	"time"
+
+	"jan-server/services/response-api/internal/domain/llm"
+)
+
+var (
+	// ErrToolDepthExceeded is returned when the orchestrator hits the max recursion depth.
+	ErrToolDepthExceeded = errors.New("tool orchestration depth exceeded")
+)
+
+// Orchestrator coordinates LLM reasoning with MCP tool execution until a final answer is produced.
+type Orchestrator struct {
+	llmProvider     llm.Provider
+	mcpClient       MCPClient
+	maxDepth        int
+	toolCallTimeout time.Duration
+}
+
+// NewOrchestrator constructs a tool orchestrator instance.
+func NewOrchestrator(llmProvider llm.Provider, mcpClient MCPClient, maxDepth int, toolCallTimeout time.Duration) *Orchestrator {
+	return &Orchestrator{
+		llmProvider:     llmProvider,
+		mcpClient:       mcpClient,
+		maxDepth:        maxDepth,
+		toolCallTimeout: toolCallTimeout,
+	}
+}
+
+// ExecuteParams contains the data needed to start the orchestration loop.
+type ExecuteParams struct {
+	Ctx             context.Context
+	Model           string
+	Messages        []llm.ChatMessage
+	Temperature     *float64
+	MaxTokens       *int
+	ToolChoice      *llm.ToolChoice
+	ToolDefinitions []llm.ToolDefinition
+	StreamObserver  StreamObserver
+}
+
+// ExecuteResult captures the final assistant message and tool execution records.
+type ExecuteResult struct {
+	FinalMessage llm.ChatMessage
+	Messages     []llm.ChatMessage
+	Usage        *llm.Usage
+	Executions   []Execution
+}
+
+// Execute drains the orchestration loop until the assistant responds without requesting tools.
+func (o *Orchestrator) Execute(params ExecuteParams) (*ExecuteResult, error) {
+	messages := append([]llm.ChatMessage(nil), params.Messages...)
+	var executions []Execution
+
+	for depth := 0; depth < o.maxDepth; depth++ {
+		req := llm.ChatCompletionRequest{
+			Model:       params.Model,
+			Messages:    messages,
+			Tools:       params.ToolDefinitions,
+			ToolChoice:  params.ToolChoice,
+			Temperature: params.Temperature,
+			MaxTokens:   params.MaxTokens,
+			Stream:      false,
+		}
+		req.Stream = params.StreamObserver != nil
+
+		var choice llm.ChatCompletionChoice
+		var usage *llm.Usage
+
+		if params.StreamObserver != nil {
+			streamChoice, err := o.streamChatCompletion(params.Ctx, req, params.StreamObserver)
+			if err != nil {
+				return nil, err
+			}
+			choice = *streamChoice
+		} else {
+			resp, err := o.llmProvider.CreateChatCompletion(params.Ctx, req)
+			if err != nil {
+				return nil, err
+			}
+			if len(resp.Choices) == 0 {
+				return nil, errors.New("llm returned no choices")
+			}
+			choice = resp.Choices[0]
+			usage = resp.Usage
+		}
+
+		messages = append(messages, choice.Message)
+
+		if len(choice.Message.ToolCalls) == 0 {
+			return &ExecuteResult{
+				FinalMessage: choice.Message,
+				Messages:     messages,
+				Usage:        usage,
+				Executions:   executions,
+			}, nil
+		}
+
+		for _, call := range choice.Message.ToolCalls {
+			parsedCall, err := ParseToolCall(call)
+			if err != nil {
+				return nil, fmt.Errorf("parse tool call: %w", err)
+			}
+
+			execution := Execution{
+				CallID:         parsedCall.ID,
+				ToolName:       parsedCall.Name,
+				Arguments:      parsedCall.Arguments,
+				Status:         ExecutionStatusRunning,
+				ExecutionOrder: len(executions) + 1,
+				CreatedAt:      time.Now(),
+				UpdatedAt:      time.Now(),
+			}
+
+			if params.StreamObserver != nil {
+				params.StreamObserver.OnToolCall(parsedCall)
+			}
+
+			callCtx := params.Ctx
+			var cancel context.CancelFunc
+			if o.toolCallTimeout > 0 {
+				callCtx, cancel = context.WithTimeout(callCtx, o.toolCallTimeout)
+			}
+
+			result, err := o.mcpClient.CallTool(callCtx, parsedCall.Name, parsedCall.Arguments)
+			if cancel != nil {
+				cancel()
+			}
+			if err != nil {
+				execution.Status = ExecutionStatusFailed
+				execution.ErrorMessage = err.Error()
+			} else {
+				execution.Status = ExecutionStatusCompleted
+				execution.Result = result
+				if result != nil && result.IsError {
+					execution.Status = ExecutionStatusFailed
+					execution.ErrorMessage = result.Error
+				}
+			}
+			execution.UpdatedAt = time.Now()
+			executions = append(executions, execution)
+
+			if params.StreamObserver != nil {
+				params.StreamObserver.OnToolResult(parsedCall.ID, execution.Result)
+			}
+
+			messages = append(messages, toolResultToMessage(parsedCall.ID, execution.Result, execution.ErrorMessage))
+		}
+	}
+
+	return nil, ErrToolDepthExceeded
+}
+
+func (o *Orchestrator) streamChatCompletion(ctx context.Context, req llm.ChatCompletionRequest, observer StreamObserver) (*llm.ChatCompletionChoice, error) {
+	stream, err := o.llmProvider.CreateChatCompletionStream(ctx, req)
+	if err != nil {
+		return nil, err
+	}
+	defer stream.Close()
+
+	accumulator := newStreamAccumulator()
+
+	for {
+		delta, err := stream.Recv()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return nil, err
+		}
+		if observer != nil && delta != nil {
+			observer.OnDelta(*delta)
+		}
+		accumulator.Apply(delta)
+	}
+
+	choice := accumulator.Result()
+	if choice == nil {
+		return nil, errors.New("stream produced no choices")
+	}
+	return choice, nil
+}
+
+func toolResultToMessage(toolCallID string, result *Result, errorMessage string) llm.ChatMessage {
+	content := buildContentFromResult(result, errorMessage)
+	return llm.ChatMessage{
+		Role:       "tool",
+		Content:    content,
+		ToolCallID: &toolCallID,
+	}
+}
+
+func buildContentFromResult(result *Result, errorMessage string) interface{} {
+	if result == nil {
+		return map[string]string{
+			"type": "text",
+			"text": errorMessage,
+		}
+	}
+
+	if result.IsError {
+		return map[string]string{
+			"type": "text",
+			"text": firstNonEmpty(errorMessage, "tool execution returned an error"),
+		}
+	}
+
+	var sb strings.Builder
+	for _, c := range result.Content {
+		if c.Type == "text" {
+			if sb.Len() > 0 {
+				sb.WriteString("\n")
+			}
+			sb.WriteString(c.Text)
+		}
+	}
+
+	text := sb.String()
+	if text == "" {
+		text = "[tool execution completed]"
+	}
+
+	return map[string]string{
+		"type": "text",
+		"text": text,
+	}
+}
+
+func firstNonEmpty(values ...string) string {
+	for _, v := range values {
+		if strings.TrimSpace(v) != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+type streamAccumulator struct {
+	choices map[int]*choiceAccumulator
+}
+
+func newStreamAccumulator() *streamAccumulator {
+	return &streamAccumulator{
+		choices: make(map[int]*choiceAccumulator),
+	}
+}
+
+func (a *streamAccumulator) Apply(delta *llm.ChatCompletionDelta) {
+	if delta == nil {
+		return
+	}
+	for _, choice := range delta.Choices {
+		acc := a.ensure(choice.Index)
+		acc.apply(choice)
+	}
+}
+
+func (a *streamAccumulator) ensure(index int) *choiceAccumulator {
+	if acc, ok := a.choices[index]; ok {
+		return acc
+	}
+	acc := &choiceAccumulator{
+		role:      "assistant",
+		toolCalls: make(map[string]*toolCallAccumulator),
+	}
+	a.choices[index] = acc
+	return acc
+}
+
+func (a *streamAccumulator) Result() *llm.ChatCompletionChoice {
+	if len(a.choices) == 0 {
+		return nil
+	}
+	acc := a.choices[0]
+	choice := acc.build(0)
+	return &choice
+}
+
+type choiceAccumulator struct {
+	role         string
+	finishReason string
+	content      strings.Builder
+	toolCalls    map[string]*toolCallAccumulator
+	toolOrder    []string
+}
+
+func (c *choiceAccumulator) apply(choice llm.ChatCompletionDeltaChoice) {
+	if choice.Delta.Role != "" {
+		c.role = choice.Delta.Role
+	}
+
+	if choice.Delta.Content != nil {
+		c.appendContent(choice.Delta.Content)
+	}
+
+	if len(choice.Delta.ToolCalls) > 0 {
+		for idx, call := range choice.Delta.ToolCalls {
+			c.addOrUpdateToolCall(idx, call)
+		}
+	}
+
+	if choice.FinishReason != "" {
+		c.finishReason = choice.FinishReason
+	}
+}
+
+func (c *choiceAccumulator) appendContent(content interface{}) {
+	switch v := content.(type) {
+	case string:
+		c.content.WriteString(v)
+	case []interface{}:
+		for _, item := range v {
+			c.appendContent(item)
+		}
+	case map[string]interface{}:
+		if text, ok := v["text"].(string); ok {
+			c.content.WriteString(text)
+		}
+	case nil:
+		return
+	default:
+		c.content.WriteString(fmt.Sprint(v))
+	}
+}
+
+func (c *choiceAccumulator) addOrUpdateToolCall(idx int, call llm.ToolCall) {
+	id := call.ID
+	if id == "" {
+		id = fmt.Sprintf("tool_%d", len(c.toolOrder)+idx)
+	}
+
+	builder, ok := c.toolCalls[id]
+	if !ok {
+		builder = &toolCallAccumulator{}
+		builder.call.ID = id
+		c.toolCalls[id] = builder
+		c.toolOrder = append(c.toolOrder, id)
+	}
+
+	if call.Type != "" {
+		builder.call.Type = call.Type
+	}
+	if call.Function.Name != "" {
+		builder.call.Function.Name = call.Function.Name
+	}
+	if len(call.Function.Arguments) > 0 {
+		builder.args.Write(call.Function.Arguments)
+		builder.call.Function.Arguments = json.RawMessage(builder.args.String())
+	}
+}
+
+func (c *choiceAccumulator) build(index int) llm.ChatCompletionChoice {
+	message := llm.ChatMessage{
+		Role: c.role,
+	}
+	if c.content.Len() > 0 {
+		message.Content = c.content.String()
+	}
+	if len(c.toolOrder) > 0 {
+		message.ToolCalls = make([]llm.ToolCall, 0, len(c.toolOrder))
+		for _, id := range c.toolOrder {
+			builder := c.toolCalls[id]
+			call := builder.call
+			if len(call.Function.Arguments) == 0 && builder.args.Len() > 0 {
+				call.Function.Arguments = json.RawMessage(builder.args.String())
+			}
+			message.ToolCalls = append(message.ToolCalls, call)
+		}
+	}
+
+	return llm.ChatCompletionChoice{
+		Index:        index,
+		Message:      message,
+		FinishReason: c.finishReason,
+	}
+}
+
+type toolCallAccumulator struct {
+	call llm.ToolCall
+	args strings.Builder
+}
diff --git a/services/response-api/internal/domain/tool/types.go b/services/response-api/internal/domain/tool/types.go
new file mode 100644
index 00000000..bd5c236e
--- /dev/null
+++ b/services/response-api/internal/domain/tool/types.go
@@ -0,0 +1,115 @@
+package tool
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"jan-server/services/response-api/internal/domain/llm"
+)
+
+// ExecutionStatus represents the lifecycle of a tool execution attempt.
+type ExecutionStatus string
+
+const (
+	ExecutionStatusPending   ExecutionStatus = "pending"
+	ExecutionStatusRunning   ExecutionStatus = "running"
+	ExecutionStatusCompleted ExecutionStatus = "completed"
+	ExecutionStatusFailed    ExecutionStatus = "failed"
+)
+
+// Call encapsulates one tool call requested by the LLM.
+type Call struct {
+	ID        string                 `json:"id"`
+	Name      string                 `json:"name"`
+	Arguments map[string]interface{} `json:"arguments"`
+}
+
+// Result captures the outcome returned by the MCP tool runner.
+type Result struct {
+	ToolName string       `json:"tool_name"`
+	Content  []MCPContent `json:"content"`
+	IsError  bool         `json:"is_error"`
+	Error    string       `json:"error,omitempty"`
+}
+
+// MCPContent represents values inside the MCP streaming payload.
+type MCPContent struct {
+	Type     string                 `json:"type"`
+	Text     string                 `json:"text,omitempty"`
+	Resource map[string]interface{} `json:"resource,omitempty"`
+}
+
+// Execution links a requested tool call to its persisted record.
+type Execution struct {
+	ID             uint            `json:"id"`
+	ResponseID     uint            `json:"response_id"`
+	CallID         string          `json:"call_id"`
+	ToolName       string          `json:"tool_name"`
+	Arguments      map[string]any  `json:"arguments"`
+	Result         *Result         `json:"result,omitempty"`
+	Status         ExecutionStatus `json:"status"`
+	ErrorMessage   string          `json:"error_message,omitempty"`
+	ExecutionOrder int             `json:"execution_order"`
+	CreatedAt      time.Time       `json:"created_at"`
+	UpdatedAt      time.Time       `json:"updated_at"`
+}
+
+// MCPClient abstracts calls to mcp-tools /v1/mcp endpoint.
+type MCPClient interface {
+	ListTools(ctx context.Context) ([]MCPTool, error)
+	CallTool(ctx context.Context, name string, args map[string]interface{}) (*Result, error)
+}
+
+// StreamObserver receives live updates during orchestration.
+type StreamObserver interface {
+	OnDelta(delta llm.ChatCompletionDelta)
+	OnToolCall(call Call)
+	OnToolResult(callID string, result *Result)
+}
+
+// MCPTool describes the tool metadata returned by mcp-tools.
+type MCPTool struct {
+	Name        string                 `json:"name"`
+	Description string                 `json:"description"`
+	InputSchema map[string]interface{} `json:"inputSchema"`
+}
+
+// ToLLMTool converts MCP metadata into OpenAI-compatible tool definition.
+func (t MCPTool) ToLLMTool() llm.ToolDefinition {
+	return llm.ToolDefinition{
+		Type: "function",
+		Function: llm.ToolFunctionSchema{
+			Name:        t.Name,
+			Description: t.Description,
+			Parameters:  t.InputSchema,
+		},
+	}
+}
+
+// ParseToolCall converts an LLM provided tool call into the domain Call struct.
+func ParseToolCall(call llm.ToolCall) (Call, error) {
+	var args map[string]interface{}
+	if len(call.Function.Arguments) > 0 {
+		// First, try to unmarshal directly as JSON object
+		if err := json.Unmarshal(call.Function.Arguments, &args); err != nil {
+			// If that fails, the Arguments might be a JSON string (double-encoded)
+			// Try to unmarshal as string first, then parse that string as JSON
+			var argsStr string
+			if strErr := json.Unmarshal(call.Function.Arguments, &argsStr); strErr != nil {
+				// Neither direct object nor string worked, return original error
+				return Call{}, err
+			}
+			// Now parse the string as JSON
+			if parseErr := json.Unmarshal([]byte(argsStr), &args); parseErr != nil {
+				return Call{}, fmt.Errorf("parse arguments string: %w", parseErr)
+			}
+		}
+	}
+	return Call{
+		ID:        call.ID,
+		Name:      call.Function.Name,
+		Arguments: args,
+	}, nil
+}
diff --git a/services/response-api/internal/infrastructure/auth/auth.go b/services/response-api/internal/infrastructure/auth/auth.go
new file mode 100644
index 00000000..b7baac27
--- /dev/null
+++ b/services/response-api/internal/infrastructure/auth/auth.go
@@ -0,0 +1,150 @@
+package auth
+
+import (
+	"context"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/MicahParks/keyfunc/v2"
+	"github.com/gin-gonic/gin"
+	"github.com/golang-jwt/jwt/v5"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/response-api/internal/config"
+)
+
+// Validator validates JWTs using JWKS.
+type Validator struct {
+	cfg  *config.Config
+	log  zerolog.Logger
+	jwks *keyfunc.JWKS
+}
+
+// NewValidator initializes JWKS fetching when auth is enabled.
+func NewValidator(ctx context.Context, cfg *config.Config, log zerolog.Logger) (*Validator, error) {
+	if !cfg.AuthEnabled {
+		return &Validator{cfg: cfg, log: log}, nil
+	}
+
+	options := keyfunc.Options{
+		Ctx:               ctx,
+		RefreshInterval:   time.Hour,
+		RefreshUnknownKID: true,
+		RefreshErrorHandler: func(err error) {
+			log.Error().Err(err).Msg("jwks refresh error")
+		},
+	}
+
+	jwks, err := keyfunc.Get(cfg.AuthJWKSURL, options)
+	if err != nil {
+		return nil, err
+	}
+
+	return &Validator{
+		cfg:  cfg,
+		log:  log,
+		jwks: jwks,
+	}, nil
+}
+
+// Middleware enforces JWT auth when enabled.
+func (v *Validator) Middleware() gin.HandlerFunc {
+	if v == nil || !v.cfg.AuthEnabled {
+		return func(c *gin.Context) {
+			c.Next()
+		}
+	}
+
+	return func(c *gin.Context) {
+		tokenString := bearerToken(c.GetHeader("Authorization"))
+		if tokenString == "" {
+			abortUnauthorized(c, "missing bearer token")
+			return
+		}
+
+		token, err := jwt.Parse(tokenString, v.jwks.Keyfunc,
+			jwt.WithIssuer(v.cfg.AuthIssuer),
+			jwt.WithValidMethods([]string{"RS256", "RS384", "RS512"}),
+		)
+		if err != nil || !token.Valid {
+			abortUnauthorized(c, "invalid token")
+			return
+		}
+
+		claims, ok := token.Claims.(jwt.MapClaims)
+		if !ok {
+			abortUnauthorized(c, "invalid token claims")
+			return
+		}
+
+		issuerClaim, _ := claims["iss"].(string)
+		if issuer := strings.TrimSpace(v.cfg.AuthIssuer); issuer != "" {
+			allowed := map[string]struct{}{
+				issuer:                             {},
+				"http://localhost:8085/realms/jan": {},
+				"http://keycloak:8085/realms/jan":  {},
+			}
+			if _, exists := allowed[issuerClaim]; !exists {
+				abortUnauthorized(c, "invalid token issuer")
+				return
+			}
+		}
+
+		if audience := strings.TrimSpace(v.cfg.Account); audience != "" {
+			audClaim, hasAud := claims["aud"]
+			if hasAud {
+				switch aud := audClaim.(type) {
+				case string:
+					if aud != audience {
+						abortUnauthorized(c, "invalid token audience")
+						return
+					}
+				case []any:
+					found := false
+					for _, entry := range aud {
+						if s, ok := entry.(string); ok && s == audience {
+							found = true
+							break
+						}
+					}
+					if !found {
+						abortUnauthorized(c, "invalid token audience")
+						return
+					}
+				default:
+					abortUnauthorized(c, "invalid token audience")
+					return
+				}
+			}
+		}
+
+		c.Set("auth_token", token)
+		c.Next()
+	}
+}
+
+// Ready indicates if the validator is prepared.
+func (v *Validator) Ready() bool {
+	if v == nil || !v.cfg.AuthEnabled {
+		return true
+	}
+	return v.jwks != nil
+}
+
+func bearerToken(header string) string {
+	if header == "" {
+		return ""
+	}
+	parts := strings.SplitN(header, " ", 2)
+	if len(parts) != 2 || !strings.EqualFold(parts[0], "Bearer") {
+		return ""
+	}
+	return strings.TrimSpace(parts[1])
+}
+
+func abortUnauthorized(c *gin.Context, message string) {
+	c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{
+		"error": message,
+	})
+}
diff --git a/services/response-api/internal/infrastructure/database/database.go b/services/response-api/internal/infrastructure/database/database.go
new file mode 100644
index 00000000..9bfe5091
--- /dev/null
+++ b/services/response-api/internal/infrastructure/database/database.go
@@ -0,0 +1,115 @@
+package database
+
+import (
+	"database/sql"
+	"errors"
+	"fmt"
+	"net/url"
+	"strings"
+	"time"
+
+	_ "github.com/lib/pq"
+	"gorm.io/driver/postgres"
+	"gorm.io/gorm"
+	gormlogger "gorm.io/gorm/logger"
+	"gorm.io/gorm/schema"
+)
+
+// Config controls GORM/PostgreSQL connectivity.
+type Config struct {
+	DSN             string
+	MaxIdleConns    int
+	MaxOpenConns    int
+	ConnMaxLifetime time.Duration
+	LogLevel        gormlogger.LogLevel
+}
+
+// Connect initializes a GORM connection using the provided config.
+func Connect(cfg Config) (*gorm.DB, error) {
+	if cfg.DSN == "" {
+		return nil, fmt.Errorf("database DSN is empty")
+	}
+
+	// Add search_path to DSN if not present
+	dsn := cfg.DSN
+	if !strings.Contains(dsn, "search_path") {
+		separator := "?"
+		if strings.Contains(dsn, "?") {
+			separator = "&"
+		}
+		dsn = dsn + separator + "search_path=response_api"
+	}
+
+	if err := ensureDatabaseExists(dsn); err != nil {
+		return nil, fmt.Errorf("ensure database: %w", err)
+	}
+
+	if cfg.LogLevel == 0 {
+		cfg.LogLevel = gormlogger.Warn
+	}
+
+	db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
+		PrepareStmt: true,
+		NamingStrategy: schema.NamingStrategy{
+			SingularTable: true,
+		},
+		Logger: gormlogger.Default.LogMode(cfg.LogLevel),
+	})
+	if err != nil {
+		return nil, fmt.Errorf("connect database: %w", err)
+	}
+
+	sqlDB, err := db.DB()
+	if err != nil {
+		return nil, fmt.Errorf("retrieve sql db: %w", err)
+	}
+
+	if cfg.MaxIdleConns > 0 {
+		sqlDB.SetMaxIdleConns(cfg.MaxIdleConns)
+	}
+	if cfg.MaxOpenConns > 0 {
+		sqlDB.SetMaxOpenConns(cfg.MaxOpenConns)
+	}
+	if cfg.ConnMaxLifetime > 0 {
+		sqlDB.SetConnMaxLifetime(cfg.ConnMaxLifetime)
+	}
+
+	return db, nil
+}
+
+func ensureDatabaseExists(dsn string) error {
+	u, err := url.Parse(dsn)
+	if err != nil {
+		return nil // non-URL formats are ignored
+	}
+
+	dbName := strings.TrimPrefix(u.Path, "/")
+	if dbName == "" || dbName == "postgres" {
+		return nil
+	}
+
+	adminURL := *u
+	adminURL.Path = "/postgres"
+
+	sqlDB, err := sql.Open("postgres", adminURL.String())
+	if err != nil {
+		return err
+	}
+	defer sqlDB.Close()
+
+	var exists bool
+	err = sqlDB.QueryRow("SELECT EXISTS (SELECT 1 FROM pg_database WHERE datname = $1)", dbName).Scan(&exists)
+	if err != nil && !errors.Is(err, sql.ErrNoRows) {
+		return err
+	}
+	if exists {
+		return nil
+	}
+
+	_, err = sqlDB.Exec("CREATE DATABASE " + pqQuoteIdentifier(dbName))
+	return err
+}
+
+func pqQuoteIdentifier(ident string) string {
+	return `"` + strings.ReplaceAll(ident, `"`, `""`) + `"`
+}
diff --git a/services/response-api/internal/infrastructure/database/entities/conversation.go b/services/response-api/internal/infrastructure/database/entities/conversation.go
new file mode 100644
index 00000000..fe96babb
--- /dev/null
+++ b/services/response-api/internal/infrastructure/database/entities/conversation.go
@@ -0,0 +1,22 @@
+package entities
+
+import (
+	"time"
+
+	"gorm.io/datatypes"
+)
+
+// Conversation stores metadata for threaded chats.
+type Conversation struct {
+	ID        uint           `gorm:"primaryKey"`
+	PublicID  string         `gorm:"uniqueIndex;size:64"`
+	UserID    string         `gorm:"size:64"`
+	Metadata  datatypes.JSON `gorm:"type:jsonb"`
+	CreatedAt time.Time
+	UpdatedAt time.Time
+}
+
+// TableName specifies the table name for Conversation.
+func (Conversation) TableName() string {
+	return "conversations"
+}
diff --git a/services/response-api/internal/infrastructure/database/entities/conversation_item.go b/services/response-api/internal/infrastructure/database/entities/conversation_item.go
new file mode 100644
index 00000000..7f264be8
--- /dev/null
+++ b/services/response-api/internal/infrastructure/database/entities/conversation_item.go
@@ -0,0 +1,23 @@
+package entities
+
+import (
+	"time"
+
+	"gorm.io/datatypes"
+)
+
+// ConversationItem stores each message for a conversation.
+type ConversationItem struct {
+	ID             uint           `gorm:"primaryKey"`
+	ConversationID uint           `gorm:"index"`
+	Role           string         `gorm:"size:32"`
+	Status         string         `gorm:"size:32"`
+	Content        datatypes.JSON `gorm:"type:jsonb"`
+	Sequence       int            `gorm:"index"`
+	CreatedAt      time.Time
+}
+
+// TableName specifies the table name for ConversationItem.
+func (ConversationItem) TableName() string {
+	return "conversation_items"
+}
diff --git a/services/response-api/internal/infrastructure/database/entities/response.go b/services/response-api/internal/infrastructure/database/entities/response.go
new file mode 100644
index 00000000..eb1f9e0b
--- /dev/null
+++ b/services/response-api/internal/infrastructure/database/entities/response.go
@@ -0,0 +1,51 @@
+package entities
+
+import (
+	"time"
+
+	"gorm.io/datatypes"
+	"gorm.io/gorm"
+)
+
+// TableName specifies the table name for Response.
+func (Response) TableName() string {
+	return "responses"
+}
+
+// Response represents the persisted response record.
+type Response struct {
+	ID                 uint           `gorm:"primaryKey"`
+	PublicID           string         `gorm:"uniqueIndex;size:64"`
+	UserID             string         `gorm:"size:64"`
+	Model              string         `gorm:"size:128"`
+	SystemPrompt       *string        `gorm:"type:text"`
+	Input              datatypes.JSON `gorm:"type:jsonb"`
+	Output             datatypes.JSON `gorm:"type:jsonb"`
+	Status             string         `gorm:"size:32;index:idx_status"`
+	Stream             bool
+	Background         bool           `gorm:"default:false"`
+	Store              bool           `gorm:"default:false"`
+	APIKey             *string        `gorm:"type:text"` // Store API key (X-API-Key or Bearer token) for background tasks
+	Metadata           datatypes.JSON `gorm:"type:jsonb"`
+	Usage              datatypes.JSON `gorm:"type:jsonb"`
+	Error              datatypes.JSON `gorm:"type:jsonb"`
+	ConversationID     *uint
+	Conversation       *Conversation
+	PreviousResponseID *string `gorm:"size:64"`
+	Object             string  `gorm:"size:32"`
+	CreatedAt          time.Time
+	UpdatedAt          time.Time
+	QueuedAt           *time.Time
+	StartedAt          *time.Time
+	CompletedAt        *time.Time
+	CancelledAt        *time.Time
+	FailedAt           *time.Time
+}
+
+// BeforeCreate ensures defaults.
+func (r *Response) BeforeCreate(tx *gorm.DB) error {
+	if r.Object == "" {
+		r.Object = "response"
+	}
+	return nil
+}
diff --git a/services/response-api/internal/infrastructure/database/entities/tool_execution.go b/services/response-api/internal/infrastructure/database/entities/tool_execution.go
new file mode 100644
index 00000000..aa70f1a8
--- /dev/null
+++ b/services/response-api/internal/infrastructure/database/entities/tool_execution.go
@@ -0,0 +1,27 @@
+package entities
+
+import (
+	"time"
+
+	"gorm.io/datatypes"
+)
+
+// ToolExecution persists each invocation performed via MCP tools.
+type ToolExecution struct {
+	ID             uint           `gorm:"primaryKey"`
+	ResponseID     uint           `gorm:"index"`
+	CallID         string         `gorm:"size:64"`
+	ToolName       string         `gorm:"size:128"`
+	Arguments      datatypes.JSON `gorm:"type:jsonb"`
+	Result         datatypes.JSON `gorm:"type:jsonb"`
+	Status         string         `gorm:"size:32"`
+	ErrorMessage   string         `gorm:"type:text"`
+	ExecutionOrder int
+	CreatedAt      time.Time
+	UpdatedAt      time.Time
+}
+
+// TableName specifies the table name for ToolExecution.
+func (ToolExecution) TableName() string {
+	return "tool_executions"
+}
diff --git a/services/response-api/internal/infrastructure/database/migrate.go b/services/response-api/internal/infrastructure/database/migrate.go
new file mode 100644
index 00000000..e04c92f2
--- /dev/null
+++ b/services/response-api/internal/infrastructure/database/migrate.go
@@ -0,0 +1,120 @@
+package database
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io/fs"
+
+	"github.com/golang-migrate/migrate/v4"
+	"github.com/golang-migrate/migrate/v4/database/postgres"
+	iofs "github.com/golang-migrate/migrate/v4/source/iofs"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+
+	"jan-server/services/response-api/migrations"
+)
+
+// AutoMigrate applies all pending SQL migrations bundled with the service.
+func AutoMigrate(ctx context.Context, db *gorm.DB, log zerolog.Logger) error {
+	// List migration files
+	log.Info().Msg("Scanning migration files...")
+	entries, err := fs.ReadDir(migrations.FS, ".")
+	if err != nil {
+		return fmt.Errorf("read migration directory: %w", err)
+	}
+
+	for _, entry := range entries {
+		if !entry.IsDir() {
+			log.Info().Str("file", entry.Name()).Msg("Found migration file")
+		}
+	}
+
+	sqlDB, err := db.DB()
+	if err != nil {
+		return fmt.Errorf("retrieve sql db: %w", err)
+	}
+
+	// Ensure response_api schema exists before running migrations
+	if err := db.Exec("CREATE SCHEMA IF NOT EXISTS response_api").Error; err != nil {
+		log.Warn().Err(err).Msg("Failed to create response_api schema, may already exist")
+	} else {
+		log.Info().Msg("Created response_api schema")
+	}
+
+	conn, err := sqlDB.Conn(context.Background())
+	if err != nil {
+		return fmt.Errorf("acquire dedicated connection: %w", err)
+	}
+
+	driver, err := postgres.WithConnection(context.Background(), conn, &postgres.Config{
+		MigrationsTable: "schema_migrations",
+		SchemaName:      "response_api",
+	})
+	if err != nil {
+		_ = conn.Close()
+		return fmt.Errorf("initialize postgres driver: %w", err)
+	}
+	defer func() {
+		if closeErr := driver.Close(); err == nil && closeErr != nil {
+			err = fmt.Errorf("close migration connection: %w", closeErr)
+		}
+	}()
+
+	source, err := iofs.New(migrations.FS, ".")
+	if err != nil {
+		return fmt.Errorf("load migrations: %w", err)
+	}
+	defer func() {
+		if closeErr := source.Close(); err == nil && closeErr != nil {
+			err = fmt.Errorf("close migration source: %w", closeErr)
+		}
+	}()
+
+	migrator, err := migrate.NewWithInstance("iofs", source, "postgres", driver)
+	if err != nil {
+		return fmt.Errorf("create migrator: %w", err)
+	}
+
+	// Check current version and dirty state
+	version, dirty, err := migrator.Version()
+	if err != nil && !errors.Is(err, migrate.ErrNilVersion) {
+		log.Warn().Err(err).Msg("Error getting migration version")
+	} else if errors.Is(err, migrate.ErrNilVersion) {
+		log.Info().Msg("No migrations have been applied yet")
+	} else {
+		log.Info().Uint("version", version).Bool("dirty", dirty).Msg("Current migration state")
+	}
+
+	// If database is dirty, force the version to allow re-running
+	if dirty {
+		log.Warn().Uint("version", version).Msg("Database is in dirty state, forcing version...")
+		// Force to the current version to clear dirty state
+		if forceErr := migrator.Force(int(version)); forceErr != nil {
+			return fmt.Errorf("force version %d to clear dirty state: %w", version, forceErr)
+		}
+		log.Info().Msg("Dirty state cleared")
+	}
+
+	log.Info().Msg("Applying migrations...")
+	err = migrator.Up()
+	if err != nil {
+		if errors.Is(err, migrate.ErrNoChange) {
+			log.Info().Msg("No new migrations to apply")
+		} else {
+			log.Error().Err(err).Msg("Failed to apply migrations")
+			return fmt.Errorf("apply migrations: %w", err)
+		}
+	} else {
+		log.Info().Msg("Migrations applied successfully")
+	}
+
+	// Get final version
+	finalVersion, _, versionErr := migrator.Version()
+	if versionErr == nil {
+		log.Info().Uint("version", finalVersion).Msg("Current migration version")
+	}
+
+	log.Info().Msg("database schema up to date")
+	return nil
+}
diff --git a/services/response-api/internal/infrastructure/llmprovider/client.go b/services/response-api/internal/infrastructure/llmprovider/client.go
new file mode 100644
index 00000000..422634e5
--- /dev/null
+++ b/services/response-api/internal/infrastructure/llmprovider/client.go
@@ -0,0 +1,208 @@
+package llmprovider
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/go-resty/resty/v2"
+
+	"jan-server/services/response-api/internal/domain/llm"
+)
+
+// Client implements the llm.Provider interface.
+type Client struct {
+	httpClient *resty.Client
+	baseURL    string
+}
+
+// NewClient creates a Resty-backed client.
+func NewClient(baseURL string) *Client {
+	return &Client{
+		httpClient: resty.New().
+			SetBaseURL(baseURL).
+			SetHeader("Content-Type", "application/json").
+			SetTimeout(900 * time.Second),
+		baseURL: baseURL,
+	}
+}
+
+// CreateChatCompletion calls llm-api /v1/chat/completions.
+func (c *Client) CreateChatCompletion(ctx context.Context, req llm.ChatCompletionRequest) (*llm.ChatCompletionResponse, error) {
+	// Convert to API-compatible format with string content
+	apiReq := convertToAPIRequest(req)
+
+	var completion llm.ChatCompletionResponse
+	request := c.httpClient.R().
+		SetContext(ctx).
+		SetBody(apiReq).
+		SetResult(&completion)
+
+	if token := llm.AuthTokenFromContext(ctx); token != "" {
+		// If token starts with "Bearer ", use Authorization header
+		// Otherwise, treat as X-API-Key
+		if strings.HasPrefix(token, "Bearer ") {
+			request.SetHeader("Authorization", token)
+		} else {
+			request.SetHeader("X-API-Key", token)
+		}
+	}
+
+	resp, err := request.Post("/v1/chat/completions")
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.IsError() {
+		return nil, fmt.Errorf("llm api error: %s", resp.String())
+	}
+	return &completion, nil
+}
+
+// CreateChatCompletionStream calls llm-api /v1/chat/completions with streaming enabled.
+func (c *Client) CreateChatCompletionStream(ctx context.Context, req llm.ChatCompletionRequest) (llm.Stream, error) {
+	req.Stream = true
+
+	body, err := json.Marshal(req)
+	if err != nil {
+		return nil, fmt.Errorf("marshal request: %w", err)
+	}
+
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", c.baseURL+"/v1/chat/completions", bytes.NewReader(body))
+	if err != nil {
+		return nil, fmt.Errorf("create request: %w", err)
+	}
+
+	httpReq.Header.Set("Content-Type", "application/json")
+	httpReq.Header.Set("Accept", "text/event-stream")
+	if token := llm.AuthTokenFromContext(ctx); token != "" {
+		// If token starts with "Bearer ", use Authorization header
+		// Otherwise, treat as X-API-Key
+		if strings.HasPrefix(token, "Bearer ") {
+			httpReq.Header.Set("Authorization", token)
+		} else {
+			httpReq.Header.Set("X-API-Key", token)
+		}
+	}
+
+	httpClient := &http.Client{Timeout: 900 * time.Second}
+	resp, err := httpClient.Do(httpReq)
+	if err != nil {
+		return nil, fmt.Errorf("execute request: %w", err)
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		body, _ := io.ReadAll(resp.Body)
+		resp.Body.Close()
+		return nil, fmt.Errorf("llm api error: %d %s", resp.StatusCode, string(body))
+	}
+
+	return &sseStream{
+		resp:   resp,
+		reader: bufio.NewReader(resp.Body),
+	}, nil
+}
+
+// Ensure interface compliance.
+var _ llm.Provider = (*Client)(nil)
+
+// convertToAPIRequest converts domain types to API-compatible format.
+// This ensures Content is always a string as expected by LLM API.
+func convertToAPIRequest(req llm.ChatCompletionRequest) map[string]interface{} {
+	// Convert messages with string content
+	messages := make([]map[string]interface{}, len(req.Messages))
+	for i, msg := range req.Messages {
+		messages[i] = map[string]interface{}{
+			"role":    msg.Role,
+			"content": msg.GetContentAsString(), // Convert content to string
+		}
+
+		if len(msg.ToolCalls) > 0 {
+			messages[i]["tool_calls"] = msg.ToolCalls
+		}
+
+		if msg.ToolCallID != nil {
+			messages[i]["tool_call_id"] = *msg.ToolCallID
+		}
+	}
+
+	apiReq := map[string]interface{}{
+		"model":    req.Model,
+		"messages": messages,
+		"stream":   req.Stream,
+	}
+
+	if len(req.Tools) > 0 {
+		apiReq["tools"] = req.Tools
+	}
+	if req.ToolChoice != nil {
+		apiReq["tool_choice"] = req.ToolChoice
+	}
+	if req.Temperature != nil {
+		apiReq["temperature"] = *req.Temperature
+	}
+	if req.MaxTokens != nil {
+		apiReq["max_tokens"] = *req.MaxTokens
+	}
+
+	return apiReq
+}
+
+// sseStream implements llm.Stream backed by http.Response body with SSE parsing.
+type sseStream struct {
+	resp   *http.Response
+	reader *bufio.Reader
+}
+
+func (s *sseStream) Recv() (*llm.ChatCompletionDelta, error) {
+	for {
+		line, err := s.reader.ReadString('\n')
+		if err != nil {
+			if err == io.EOF {
+				return nil, io.EOF
+			}
+			return nil, fmt.Errorf("read line: %w", err)
+		}
+
+		line = strings.TrimSpace(line)
+
+		// Skip empty lines and comments
+		if line == "" || strings.HasPrefix(line, ":") {
+			continue
+		}
+
+		// Look for data: prefix
+		if !strings.HasPrefix(line, "data: ") {
+			continue
+		}
+
+		data := strings.TrimPrefix(line, "data: ")
+
+		// Check for stream termination
+		if data == "[DONE]" {
+			return nil, io.EOF
+		}
+
+		// Parse the JSON delta
+		var delta llm.ChatCompletionDelta
+		if err := json.Unmarshal([]byte(data), &delta); err != nil {
+			// Skip malformed chunks
+			continue
+		}
+
+		return &delta, nil
+	}
+}
+
+func (s *sseStream) Close() error {
+	if s.resp != nil && s.resp.Body != nil {
+		return s.resp.Body.Close()
+	}
+	return nil
+}
diff --git a/services/response-api/internal/infrastructure/logger/logger.go b/services/response-api/internal/infrastructure/logger/logger.go
new file mode 100644
index 00000000..510a4772
--- /dev/null
+++ b/services/response-api/internal/infrastructure/logger/logger.go
@@ -0,0 +1,40 @@
+package logger
+
+import (
+	"os"
+	"strings"
+	"time"
+
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+
+	"jan-server/services/response-api/internal/config"
+)
+
+// New creates a zerolog.Logger configured for the response service.
+func New(cfg *config.Config) zerolog.Logger {
+	level := parseLevel(cfg.LogLevel)
+	output := zerolog.ConsoleWriter{
+		Out:        os.Stdout,
+		TimeFormat: time.RFC3339,
+	}
+	base := log.Output(output).
+		With().
+		Timestamp().
+		Str("service", cfg.ServiceName).
+		Str("environment", cfg.Environment).
+		Logger().
+		Level(level)
+	return base
+}
+
+func parseLevel(levelString string) zerolog.Level {
+	if levelString == "" {
+		return zerolog.InfoLevel
+	}
+	level, err := zerolog.ParseLevel(strings.ToLower(levelString))
+	if err != nil {
+		return zerolog.InfoLevel
+	}
+	return level
+}
diff --git a/services/response-api/internal/infrastructure/mcp/client.go b/services/response-api/internal/infrastructure/mcp/client.go
new file mode 100644
index 00000000..ac8443ff
--- /dev/null
+++ b/services/response-api/internal/infrastructure/mcp/client.go
@@ -0,0 +1,120 @@
+package mcp
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+
+	"github.com/go-resty/resty/v2"
+
+	"jan-server/services/response-api/internal/domain/tool"
+)
+
+// Client implements tool.MCPClient.
+type Client struct {
+	httpClient *resty.Client
+}
+
+// NewClient constructs the MCP client.
+func NewClient(baseURL string) *Client {
+	return &Client{
+		httpClient: resty.New().
+			SetBaseURL(baseURL).
+			SetHeader("Content-Type", "application/json"),
+	}
+}
+
+// ListTools fetches the tools via JSON-RPC call tools/list.
+func (c *Client) ListTools(ctx context.Context) ([]tool.MCPTool, error) {
+	payload := map[string]interface{}{
+		"jsonrpc": "2.0",
+		"method":  "tools/list",
+		"params":  map[string]interface{}{},
+		"id":      1,
+	}
+
+	var rpcResp rpcResponse
+	resp, err := c.httpClient.R().
+		SetContext(ctx).
+		SetBody(payload).
+		SetResult(&rpcResp).
+		Post("/v1/mcp")
+	if err != nil {
+		return nil, err
+	}
+	if resp.IsError() {
+		return nil, fmt.Errorf("mcp list tools error: %s", resp.String())
+	}
+	if rpcResp.Error != nil {
+		return nil, rpcResp.Error
+	}
+
+	var result struct {
+		Tools []tool.MCPTool `json:"tools"`
+	}
+	if err := json.Unmarshal(rpcResp.Result, &result); err != nil {
+		return nil, err
+	}
+	return result.Tools, nil
+}
+
+// CallTool triggers a tool execution via JSON-RPC tools/call.
+func (c *Client) CallTool(ctx context.Context, name string, args map[string]interface{}) (*tool.Result, error) {
+	payload := map[string]interface{}{
+		"jsonrpc": "2.0",
+		"method":  "tools/call",
+		"params": map[string]interface{}{
+			"name":      name,
+			"arguments": args,
+		},
+		"id": name,
+	}
+
+	var rpcResp rpcResponse
+	resp, err := c.httpClient.R().
+		SetContext(ctx).
+		SetBody(payload).
+		SetResult(&rpcResp).
+		Post("/v1/mcp")
+	if err != nil {
+		return nil, err
+	}
+	if resp.IsError() {
+		return nil, fmt.Errorf("mcp call error: %s", resp.String())
+	}
+	if rpcResp.Error != nil {
+		return nil, rpcResp.Error
+	}
+
+	var result struct {
+		Content []tool.MCPContent `json:"content"`
+		IsError bool              `json:"isError"`
+		Error   string            `json:"error"`
+	}
+	if err := json.Unmarshal(rpcResp.Result, &result); err != nil {
+		return nil, err
+	}
+
+	return &tool.Result{
+		ToolName: name,
+		Content:  result.Content,
+		IsError:  result.IsError,
+		Error:    result.Error,
+	}, nil
+}
+
+type rpcResponse struct {
+	JSONRPC string          `json:"jsonrpc"`
+	Result  json.RawMessage `json:"result"`
+	Error   *rpcError       `json:"error"`
+	ID      interface{}     `json:"id"`
+}
+
+type rpcError struct {
+	Code    int    `json:"code"`
+	Message string `json:"message"`
+}
+
+func (r *rpcError) Error() string {
+	return fmt.Sprintf("mcp error (%d): %s", r.Code, r.Message)
+}
diff --git a/services/response-api/internal/infrastructure/observability/observability.go b/services/response-api/internal/infrastructure/observability/observability.go
new file mode 100644
index 00000000..d309e509
--- /dev/null
+++ b/services/response-api/internal/infrastructure/observability/observability.go
@@ -0,0 +1,56 @@
+package observability
+
+import (
+	"context"
+
+	"github.com/rs/zerolog"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
+	"go.opentelemetry.io/otel/sdk/resource"
+	sdktrace "go.opentelemetry.io/otel/sdk/trace"
+	semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
+
+	"jan-server/services/response-api/internal/config"
+)
+
+// Shutdown is a function that releases telemetry resources.
+type Shutdown func(ctx context.Context) error
+
+// Setup configures OpenTelemetry tracing if enabled.
+func Setup(ctx context.Context, cfg *config.Config, log zerolog.Logger) (Shutdown, error) {
+	if !cfg.EnableTracing || cfg.OTLPEndpoint == "" {
+		log.Info().Msg("Tracing disabled")
+		return func(context.Context) error { return nil }, nil
+	}
+
+	exporter, err := otlptracehttp.New(ctx,
+		otlptracehttp.WithEndpoint(cfg.OTLPEndpoint),
+		otlptracehttp.WithInsecure(),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	res, err := resource.New(ctx,
+		resource.WithAttributes(
+			semconv.ServiceName(cfg.ServiceName),
+			semconv.DeploymentEnvironment(cfg.Environment),
+		),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	tp := sdktrace.NewTracerProvider(
+		sdktrace.WithSampler(sdktrace.AlwaysSample()),
+		sdktrace.WithBatcher(exporter),
+		sdktrace.WithResource(res),
+	)
+	otel.SetTracerProvider(tp)
+
+	log.Info().Str("endpoint", cfg.OTLPEndpoint).Msg("Tracing enabled")
+
+	return func(ctx context.Context) error {
+		return tp.Shutdown(ctx)
+	}, nil
+}
diff --git a/services/response-api/internal/infrastructure/queue/postgres_queue.go b/services/response-api/internal/infrastructure/queue/postgres_queue.go
new file mode 100644
index 00000000..3e010a8b
--- /dev/null
+++ b/services/response-api/internal/infrastructure/queue/postgres_queue.go
@@ -0,0 +1,145 @@
+package queue
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+
+	"jan-server/services/response-api/internal/infrastructure/database/entities"
+)
+
+// PostgresQueue implements TaskQueue using the responses table.
+type PostgresQueue struct {
+	db  *gorm.DB
+	log zerolog.Logger
+}
+
+// NewPostgresQueue creates a new PostgreSQL-backed task queue.
+func NewPostgresQueue(db *gorm.DB, log zerolog.Logger) *PostgresQueue {
+	return &PostgresQueue{
+		db:  db,
+		log: log.With().Str("component", "postgres-queue").Logger(),
+	}
+}
+
+// Enqueue is not used directly - tasks are created via service.createAsync
+func (q *PostgresQueue) Enqueue(ctx context.Context, task *Task) error {
+	return fmt.Errorf("enqueue should not be called directly - use service.createAsync")
+}
+
+// Dequeue fetches the next queued task using FOR UPDATE SKIP LOCKED.
+func (q *PostgresQueue) Dequeue(ctx context.Context) (*Task, error) {
+	var entity entities.Response
+
+	err := q.db.WithContext(ctx).
+		Raw("SELECT * FROM responses WHERE status = ? AND background = ? ORDER BY queued_at ASC LIMIT 1 FOR UPDATE SKIP LOCKED", "queued", true).
+		Scan(&entity).Error
+
+	if err != nil {
+		if err == gorm.ErrRecordNotFound {
+			return nil, nil // No tasks available
+		}
+		return nil, fmt.Errorf("dequeue task: %w", err)
+	}
+
+	// Check if no rows were returned (entity.ID will be 0)
+	if entity.ID == 0 {
+		return nil, nil // No tasks available
+	}
+
+	task := &Task{
+		ResponseID: fmt.Sprintf("%d", entity.ID),
+		PublicID:   entity.PublicID,
+		UserID:     entity.UserID,
+		Model:      entity.Model,
+		QueuedAt:   *entity.QueuedAt,
+	}
+
+	return task, nil
+}
+
+// MarkProcessing updates the response status to in_progress.
+func (q *PostgresQueue) MarkProcessing(ctx context.Context, publicID string) error {
+	now := time.Now()
+	result := q.db.WithContext(ctx).
+		Model(&entities.Response{}).
+		Where("public_id = ?", publicID).
+		Updates(map[string]interface{}{
+			"status":     "in_progress",
+			"started_at": now,
+			"updated_at": now,
+		})
+
+	if result.Error != nil {
+		return fmt.Errorf("mark processing: %w", result.Error)
+	}
+
+	if result.RowsAffected == 0 {
+		return fmt.Errorf("response not found: %s", publicID)
+	}
+
+	return nil
+}
+
+// MarkCompleted updates the response status to completed.
+func (q *PostgresQueue) MarkCompleted(ctx context.Context, publicID string) error {
+	now := time.Now()
+	result := q.db.WithContext(ctx).
+		Model(&entities.Response{}).
+		Where("public_id = ?", publicID).
+		Updates(map[string]interface{}{
+			"status":       "completed",
+			"completed_at": now,
+			"updated_at":   now,
+		})
+
+	if result.Error != nil {
+		return fmt.Errorf("mark completed: %w", result.Error)
+	}
+
+	return nil
+}
+
+// MarkFailed updates the response status to failed.
+func (q *PostgresQueue) MarkFailed(ctx context.Context, publicID string, taskErr error) error {
+	now := time.Now()
+	errorJSON := map[string]interface{}{
+		"code":    "task_execution_failed",
+		"message": taskErr.Error(),
+	}
+
+	result := q.db.WithContext(ctx).
+		Model(&entities.Response{}).
+		Where("public_id = ?", publicID).
+		Updates(map[string]interface{}{
+			"status":     "failed",
+			"error":      errorJSON,
+			"failed_at":  now,
+			"updated_at": now,
+		})
+
+	if result.Error != nil {
+		return fmt.Errorf("mark failed: %w", result.Error)
+	}
+
+	return nil
+}
+
+// GetQueueDepth returns the number of queued background tasks.
+func (q *PostgresQueue) GetQueueDepth(ctx context.Context) (int64, error) {
+	var count int64
+	err := q.db.WithContext(ctx).
+		Model(&entities.Response{}).
+		Where("status = ?", "queued").
+		Where("background = ?", true).
+		Count(&count).Error
+
+	if err != nil {
+		return 0, fmt.Errorf("get queue depth: %w", err)
+	}
+
+	return count, nil
+}
diff --git a/services/response-api/internal/infrastructure/queue/queue.go b/services/response-api/internal/infrastructure/queue/queue.go
new file mode 100644
index 00000000..57930afa
--- /dev/null
+++ b/services/response-api/internal/infrastructure/queue/queue.go
@@ -0,0 +1,36 @@
+package queue
+
+import (
+	"context"
+	"time"
+)
+
+// Task represents a background task to be processed.
+type Task struct {
+	ResponseID string
+	PublicID   string
+	UserID     string
+	Model      string
+	QueuedAt   time.Time
+}
+
+// TaskQueue defines the interface for task queue operations.
+type TaskQueue interface {
+	// Enqueue adds a task to the queue
+	Enqueue(ctx context.Context, task *Task) error
+
+	// Dequeue fetches the next available task using SELECT FOR UPDATE SKIP LOCKED
+	Dequeue(ctx context.Context) (*Task, error)
+
+	// MarkProcessing updates task status to in_progress
+	MarkProcessing(ctx context.Context, taskID string) error
+
+	// MarkCompleted updates task status to completed
+	MarkCompleted(ctx context.Context, taskID string) error
+
+	// MarkFailed updates task status to failed
+	MarkFailed(ctx context.Context, taskID string, err error) error
+
+	// GetQueueDepth returns the number of queued tasks
+	GetQueueDepth(ctx context.Context) (int64, error)
+}
diff --git a/services/response-api/internal/infrastructure/repository/conversation/helpers.go b/services/response-api/internal/infrastructure/repository/conversation/helpers.go
new file mode 100644
index 00000000..504e0fc3
--- /dev/null
+++ b/services/response-api/internal/infrastructure/repository/conversation/helpers.go
@@ -0,0 +1,15 @@
+package conversation
+
+import (
+	"encoding/json"
+
+	"gorm.io/datatypes"
+)
+
+func marshalJSON(value interface{}) (datatypes.JSON, error) {
+	if value == nil {
+		return datatypes.JSON([]byte("null")), nil
+	}
+	bytes, err := json.Marshal(value)
+	return datatypes.JSON(bytes), err
+}
diff --git a/services/response-api/internal/infrastructure/repository/conversation/item_repository.go b/services/response-api/internal/infrastructure/repository/conversation/item_repository.go
new file mode 100644
index 00000000..8df587a2
--- /dev/null
+++ b/services/response-api/internal/infrastructure/repository/conversation/item_repository.go
@@ -0,0 +1,100 @@
+package conversation
+
+import (
+	"context"
+	"encoding/json"
+
+	"gorm.io/gorm"
+
+	domain "jan-server/services/response-api/internal/domain/conversation"
+	"jan-server/services/response-api/internal/infrastructure/database/entities"
+	"jan-server/services/response-api/internal/utils/platformerrors"
+)
+
+// ItemRepository persists conversation items.
+type ItemRepository struct {
+	db *gorm.DB
+}
+
+// NewItemRepository constructs the item repository.
+func NewItemRepository(db *gorm.DB) *ItemRepository {
+	return &ItemRepository{db: db}
+}
+
+// BulkInsert stores multiple conversation items in sequence order.
+func (r *ItemRepository) BulkInsert(ctx context.Context, items []domain.Item) error {
+	if len(items) == 0 {
+		return nil
+	}
+
+	rows := make([]entities.ConversationItem, 0, len(items))
+	for _, item := range items {
+		content, err := marshalJSON(item.Content)
+		if err != nil {
+			return platformerrors.NewError(
+				ctx,
+				platformerrors.LayerRepository,
+				platformerrors.ErrorTypeInternal,
+				"failed to marshal conversation item",
+				err,
+				"6k7j8i9h-0f1a-2b3c-4d5e-6f7a8b9c0d1e",
+			)
+		}
+		rows = append(rows, entities.ConversationItem{
+			ConversationID: item.ConversationID,
+			Role:           string(item.Role),
+			Status:         string(item.Status),
+			Content:        content,
+			Sequence:       item.Sequence,
+			CreatedAt:      item.CreatedAt,
+		})
+	}
+
+	if err := r.db.WithContext(ctx).Create(&rows).Error; err != nil {
+		return platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to bulk insert conversation items",
+			err,
+			"7l8k9j0i-1a2b-3c4d-5e6f-7a8b9c0d1e2f",
+		)
+	}
+	return nil
+}
+
+// ListByConversationID returns items ordered by sequence.
+func (r *ItemRepository) ListByConversationID(ctx context.Context, conversationID uint) ([]domain.Item, error) {
+	var rows []entities.ConversationItem
+	if err := r.db.WithContext(ctx).
+		Where("conversation_id = ?", conversationID).
+		Order("sequence ASC").
+		Find(&rows).Error; err != nil {
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to list conversation items",
+			err,
+			"8m9l0k1j-2b3c-4d5e-6f7a-8b9c0d1e2f3a",
+		)
+	}
+
+	items := make([]domain.Item, 0, len(rows))
+	for _, row := range rows {
+		var content map[string]interface{}
+		if len(row.Content) > 0 {
+			_ = json.Unmarshal(row.Content, &content)
+		}
+		items = append(items, domain.Item{
+			ID:             row.ID,
+			ConversationID: row.ConversationID,
+			Role:           domain.ItemRole(row.Role),
+			Status:         domain.ItemStatus(row.Status),
+			Content:        content,
+			Sequence:       row.Sequence,
+			CreatedAt:      row.CreatedAt,
+		})
+	}
+	return items, nil
+}
diff --git a/services/response-api/internal/infrastructure/repository/conversation/postgres_repository.go b/services/response-api/internal/infrastructure/repository/conversation/postgres_repository.go
new file mode 100644
index 00000000..e5a40870
--- /dev/null
+++ b/services/response-api/internal/infrastructure/repository/conversation/postgres_repository.go
@@ -0,0 +1,109 @@
+package conversation
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+
+	"gorm.io/gorm"
+
+	domain "jan-server/services/response-api/internal/domain/conversation"
+	"jan-server/services/response-api/internal/infrastructure/database/entities"
+	"jan-server/services/response-api/internal/utils/platformerrors"
+)
+
+// Repository persists conversation metadata.
+type Repository struct {
+	db *gorm.DB
+}
+
+// NewRepository builds a conversation repository.
+func NewRepository(db *gorm.DB) *Repository {
+	return &Repository{db: db}
+}
+
+// Create inserts the conversation record.
+func (r *Repository) Create(ctx context.Context, conv *domain.Conversation) error {
+	metadata, err := marshalJSON(conv.Metadata)
+	if err != nil {
+		return platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeInternal,
+			"failed to marshal conversation metadata",
+			err,
+			"1f2e3d4c-5a6b-7c8d-9e0f-1a2b3c4d5e6f",
+		)
+	}
+
+	entity := &entities.Conversation{
+		PublicID: conv.PublicID,
+		UserID:   conv.UserID,
+		Metadata: metadata,
+	}
+
+	if err := r.db.WithContext(ctx).Create(entity).Error; err != nil {
+		return platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to create conversation",
+			err,
+			"2g3f4e5d-6b7c-8d9e-0f1a-2b3c4d5e6f7a",
+		)
+	}
+
+	conv.ID = entity.ID
+	conv.CreatedAt = entity.CreatedAt
+	conv.UpdatedAt = entity.UpdatedAt
+	return nil
+}
+
+// FindByPublicID fetches a conversation.
+func (r *Repository) FindByPublicID(ctx context.Context, publicID string) (*domain.Conversation, error) {
+	var entity entities.Conversation
+	if err := r.db.WithContext(ctx).Where("public_id = ?", publicID).First(&entity).Error; err != nil {
+		if errors.Is(err, gorm.ErrRecordNotFound) {
+			return nil, platformerrors.NewError(
+				ctx,
+				platformerrors.LayerRepository,
+				platformerrors.ErrorTypeNotFound,
+				fmt.Sprintf("conversation not found: %s", publicID),
+				nil,
+				"3h4g5f6e-7c8d-9e0f-1a2b-3c4d5e6f7a8b",
+			)
+		}
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to fetch conversation",
+			err,
+			"4i5h6g7f-8d9e-0f1a-2b3c-4d5e6f7a8b9c",
+		)
+	}
+
+	var metadata map[string]interface{}
+	if len(entity.Metadata) > 0 {
+		if err := json.Unmarshal(entity.Metadata, &metadata); err != nil {
+			return nil, platformerrors.NewError(
+				ctx,
+				platformerrors.LayerRepository,
+				platformerrors.ErrorTypeInternal,
+				"failed to unmarshal conversation metadata",
+				err,
+				"5j6i7h8g-9e0f-1a2b-3c4d-5e6f7a8b9c0d",
+			)
+		}
+	}
+
+	return &domain.Conversation{
+		ID:        entity.ID,
+		PublicID:  entity.PublicID,
+		UserID:    entity.UserID,
+		Metadata:  metadata,
+		CreatedAt: entity.CreatedAt,
+		UpdatedAt: entity.UpdatedAt,
+	}, nil
+}
diff --git a/services/response-api/internal/infrastructure/repository/response/postgres_repository.go b/services/response-api/internal/infrastructure/repository/response/postgres_repository.go
new file mode 100644
index 00000000..4d65f272
--- /dev/null
+++ b/services/response-api/internal/infrastructure/repository/response/postgres_repository.go
@@ -0,0 +1,269 @@
+package response
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"time"
+
+	"gorm.io/datatypes"
+	"gorm.io/gorm"
+
+	"jan-server/services/response-api/internal/domain/llm"
+	domain "jan-server/services/response-api/internal/domain/response"
+	"jan-server/services/response-api/internal/domain/tool"
+	"jan-server/services/response-api/internal/infrastructure/database/entities"
+	"jan-server/services/response-api/internal/utils/platformerrors"
+)
+
+// PostgresRepository provides persistence for responses.
+type PostgresRepository struct {
+	db *gorm.DB
+}
+
+// NewPostgresRepository constructs the repository.
+func NewPostgresRepository(db *gorm.DB) *PostgresRepository {
+	return &PostgresRepository{db: db}
+}
+
+// Create inserts a new response record.
+func (r *PostgresRepository) Create(ctx context.Context, resp *domain.Response) error {
+	entity, err := mapToEntity(resp)
+	if err != nil {
+		return platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeInternal,
+			"failed to map response to entity",
+			err,
+			"5a6b7c8d-9e0f-4a1b-2c3d-4e5f6a7b8c9d",
+		)
+	}
+
+	if err := r.db.WithContext(ctx).Create(entity).Error; err != nil {
+		return platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to create response",
+			err,
+			"6b7c8d9e-0f1a-4b2c-3d4e-5f6a7b8c9d0e",
+		)
+	}
+
+	return mapFromEntity(entity, resp)
+}
+
+// Update persists changes to a response (status/output/etc).
+func (r *PostgresRepository) Update(ctx context.Context, resp *domain.Response) error {
+	entity, err := mapToEntity(resp)
+	if err != nil {
+		return platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeInternal,
+			"failed to map response to entity for update",
+			err,
+			"7c8d9e0f-1a2b-4c3d-4e5f-6a7b8c9d0e1f",
+		)
+	}
+	entity.ID = resp.ID
+
+	if err := r.db.WithContext(ctx).Model(&entities.Response{ID: resp.ID}).Updates(entity).Error; err != nil {
+		return platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to update response",
+			err,
+			"8d9e0f1a-2b3c-4d5e-6f7a-8b9c0d1e2f3a",
+		)
+	}
+	return nil
+}
+
+// FindByPublicID fetches a response and hydrates the domain model.
+func (r *PostgresRepository) FindByPublicID(ctx context.Context, publicID string) (*domain.Response, error) {
+	var entity entities.Response
+	if err := r.db.WithContext(ctx).
+		Preload("Conversation").
+		Where("public_id = ?", publicID).
+		First(&entity).Error; err != nil {
+		if err == gorm.ErrRecordNotFound {
+			return nil, platformerrors.NewError(
+				ctx,
+				platformerrors.LayerRepository,
+				platformerrors.ErrorTypeNotFound,
+				"response not found",
+				err,
+				"9e0f1a2b-3c4d-5e6f-7a8b-9c0d1e2f3a4b",
+			)
+		}
+		return nil, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to find response by public id",
+			err,
+			"0f1a2b3c-4d5e-6f7a-8b9c-0d1e2f3a4b5c",
+		)
+	}
+
+	resp := &domain.Response{}
+	if err := mapFromEntity(&entity, resp); err != nil {
+		return nil, err
+	}
+	return resp, nil
+}
+
+// MarkCancelled sets the status and timestamps for a cancelled response.
+func (r *PostgresRepository) MarkCancelled(ctx context.Context, resp *domain.Response) error {
+	now := time.Now()
+	resp.Status = domain.StatusCancelled
+	resp.CancelledAt = &now
+	return r.Update(ctx, resp)
+}
+
+// RecordExecutions persists tool execution snapshot rows.
+func (r *PostgresRepository) RecordExecutions(ctx context.Context, responseID uint, executions []tool.Execution) error {
+	if len(executions) == 0 {
+		return nil
+	}
+
+	rows := make([]entities.ToolExecution, 0, len(executions))
+	for _, exec := range executions {
+		args, err := json.Marshal(exec.Arguments)
+		if err != nil {
+			return fmt.Errorf("marshal tool arguments: %w", err)
+		}
+		var result datatypes.JSON
+		if exec.Result != nil {
+			if result, err = json.Marshal(exec.Result); err != nil {
+				return fmt.Errorf("marshal tool result: %w", err)
+			}
+		}
+		rows = append(rows, entities.ToolExecution{
+			ResponseID:     responseID,
+			CallID:         exec.CallID,
+			ToolName:       exec.ToolName,
+			Arguments:      args,
+			Result:         result,
+			Status:         string(exec.Status),
+			ErrorMessage:   exec.ErrorMessage,
+			ExecutionOrder: exec.ExecutionOrder,
+		})
+	}
+
+	return r.db.WithContext(ctx).Create(&rows).Error
+}
+
+func mapToEntity(resp *domain.Response) (*entities.Response, error) {
+	input, err := marshalJSON(resp.Input)
+	if err != nil {
+		return nil, fmt.Errorf("marshal response input: %w", err)
+	}
+	output, err := marshalJSON(resp.Output)
+	if err != nil {
+		return nil, fmt.Errorf("marshal response output: %w", err)
+	}
+	metadata, err := marshalJSON(resp.Metadata)
+	if err != nil {
+		return nil, fmt.Errorf("marshal metadata: %w", err)
+	}
+	usage, err := marshalJSON(resp.Usage)
+	if err != nil {
+		return nil, fmt.Errorf("marshal usage: %w", err)
+	}
+	errJSON, err := marshalJSON(resp.Error)
+	if err != nil {
+		return nil, fmt.Errorf("marshal error: %w", err)
+	}
+
+	return &entities.Response{
+		PublicID:           resp.PublicID,
+		UserID:             resp.UserID,
+		Model:              resp.Model,
+		SystemPrompt:       resp.SystemPrompt,
+		Input:              input,
+		Output:             output,
+		Status:             string(resp.Status),
+		Stream:             resp.Stream,
+		Background:         resp.Background,
+		Store:              resp.Store,
+		APIKey:             resp.APIKey,
+		Metadata:           metadata,
+		Usage:              usage,
+		Error:              errJSON,
+		ConversationID:     resp.ConversationID,
+		PreviousResponseID: resp.PreviousResponseID,
+		Object:             resp.Object,
+		QueuedAt:           resp.QueuedAt,
+		StartedAt:          resp.StartedAt,
+		CompletedAt:        resp.CompletedAt,
+		CancelledAt:        resp.CancelledAt,
+		FailedAt:           resp.FailedAt,
+	}, nil
+}
+
+func mapFromEntity(entity *entities.Response, resp *domain.Response) error {
+	resp.ID = entity.ID
+	resp.PublicID = entity.PublicID
+	resp.UserID = entity.UserID
+	resp.Model = entity.Model
+	resp.SystemPrompt = entity.SystemPrompt
+	resp.Status = domain.Status(entity.Status)
+	resp.Stream = entity.Stream
+	resp.Background = entity.Background
+	resp.Store = entity.Store
+	resp.APIKey = entity.APIKey
+	resp.ConversationID = entity.ConversationID
+	resp.PreviousResponseID = entity.PreviousResponseID
+	resp.CreatedAt = entity.CreatedAt
+	resp.UpdatedAt = entity.UpdatedAt
+	resp.QueuedAt = entity.QueuedAt
+	resp.StartedAt = entity.StartedAt
+	resp.CompletedAt = entity.CompletedAt
+	resp.CancelledAt = entity.CancelledAt
+	resp.FailedAt = entity.FailedAt
+	resp.Object = entity.Object
+
+	if err := json.Unmarshal(entity.Input, &resp.Input); err != nil {
+		return fmt.Errorf("unmarshal input: %w", err)
+	}
+	if len(entity.Output) > 0 {
+		if err := json.Unmarshal(entity.Output, &resp.Output); err != nil {
+			return fmt.Errorf("unmarshal output: %w", err)
+		}
+	}
+	if len(entity.Metadata) > 0 {
+		if err := json.Unmarshal(entity.Metadata, &resp.Metadata); err != nil {
+			return fmt.Errorf("unmarshal metadata: %w", err)
+		}
+	}
+	if len(entity.Usage) > 0 {
+		var usage llm.Usage
+		if err := json.Unmarshal(entity.Usage, &usage); err == nil {
+			resp.Usage = &usage
+		}
+	}
+	if len(entity.Error) > 0 {
+		var errDetails domain.ErrorDetails
+		if err := json.Unmarshal(entity.Error, &errDetails); err == nil {
+			resp.Error = &errDetails
+		}
+	}
+
+	if resp.ConversationPublicID == nil && entity.Conversation != nil {
+		resp.ConversationPublicID = &entity.Conversation.PublicID
+	}
+
+	return nil
+}
+
+func marshalJSON(value interface{}) (datatypes.JSON, error) {
+	if value == nil {
+		return datatypes.JSON([]byte("null")), nil
+	}
+	bytes, err := json.Marshal(value)
+	return datatypes.JSON(bytes), err
+}
diff --git a/services/response-api/internal/interfaces/httpserver/handlers/provider.go b/services/response-api/internal/interfaces/httpserver/handlers/provider.go
new file mode 100644
index 00000000..eb707451
--- /dev/null
+++ b/services/response-api/internal/interfaces/httpserver/handlers/provider.go
@@ -0,0 +1,19 @@
+package handlers
+
+import (
+	"github.com/rs/zerolog"
+
+	domain "jan-server/services/response-api/internal/domain/response"
+)
+
+// Provider wires all HTTP handlers for dependency injection.
+type Provider struct {
+	Response *ResponseHandler
+}
+
+// NewProvider constructs the handler provider with domain services.
+func NewProvider(responseService domain.Service, log zerolog.Logger) *Provider {
+	return &Provider{
+		Response: NewResponseHandler(responseService, log),
+	}
+}
diff --git a/services/response-api/internal/interfaces/httpserver/handlers/response_handler.go b/services/response-api/internal/interfaces/httpserver/handlers/response_handler.go
new file mode 100644
index 00000000..c9fa631a
--- /dev/null
+++ b/services/response-api/internal/interfaces/httpserver/handlers/response_handler.go
@@ -0,0 +1,361 @@
+package handlers
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"strings"
+	"sync"
+
+	"github.com/gin-gonic/gin"
+	"github.com/golang-jwt/jwt/v5"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/response-api/internal/domain/llm"
+	"jan-server/services/response-api/internal/domain/response"
+	"jan-server/services/response-api/internal/domain/tool"
+	"jan-server/services/response-api/internal/interfaces/httpserver/requests"
+	"jan-server/services/response-api/internal/interfaces/httpserver/responses"
+)
+
+// ResponseHandler exposes HTTP entrypoints for the Responses API.
+type ResponseHandler struct {
+	service response.Service
+	log     zerolog.Logger
+}
+
+// NewResponseHandler constructs the handler.
+func NewResponseHandler(service response.Service, log zerolog.Logger) *ResponseHandler {
+	return &ResponseHandler{
+		service: service,
+		log:     log.With().Str("handler", "response").Logger(),
+	}
+}
+
+// Create handles POST /v1/responses
+// @Summary Create a response
+// @Description Creates a response and orchestrates MCP tool calls when required.
+// @Tags Responses
+// @Accept json
+// @Produce json
+// @Param request body requests.CreateResponseRequest true "Create request"
+// @Success 200 {object} responses.ResponsePayload
+// @Failure 400 {object} map[string]string
+// @Router /v1/responses [post]
+func (h *ResponseHandler) Create(c *gin.Context) {
+	var req requests.CreateResponseRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+
+	userID := req.User
+	if userID == "" {
+		userID = extractSubject(c)
+		if userID == "" {
+			userID = "guest"
+		}
+	}
+
+	stream := req.Stream != nil && *req.Stream
+	background := req.Background != nil && *req.Background
+	store := req.Store != nil && *req.Store
+
+	// Validate background mode constraints
+	if background && !store {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "background mode requires store=true"})
+		return
+	}
+
+	// Extract API key for background tasks (supports both X-API-Key and Authorization)
+	apiKey := strings.TrimSpace(c.GetHeader("X-API-Key"))
+	if apiKey == "" {
+		apiKey = strings.TrimSpace(c.GetHeader("Authorization"))
+	}
+	var apiKeyPtr *string
+	if apiKey != "" {
+		apiKeyPtr = &apiKey
+	}
+
+	params := response.CreateParams{
+		UserID:             userID,
+		Model:              req.Model,
+		Input:              req.Input,
+		SystemPrompt:       req.SystemPrompt,
+		Temperature:        req.Temperature,
+		MaxTokens:          req.MaxTokens,
+		Stream:             stream,
+		Background:         background,
+		Store:              store,
+		APIKey:             apiKeyPtr,
+		ToolChoice:         mapToolChoice(req.ToolChoice),
+		Tools:              mapTools(req.Tools),
+		PreviousResponseID: req.PreviousResponseID,
+		ConversationID:     req.Conversation,
+		Metadata:           req.Metadata,
+	}
+
+	authCtx := llm.ContextWithAuthToken(c.Request.Context(), apiKey)
+	c.Request = c.Request.WithContext(authCtx)
+
+	if stream {
+		h.streamResponse(c, params)
+		return
+	}
+
+	resp, err := h.service.Create(c.Request.Context(), params)
+	if err != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+		return
+	}
+
+	c.JSON(http.StatusOK, responses.FromDomain(resp))
+}
+
+// Get handles GET /v1/responses/:id
+// @Summary Get a response by ID
+// @Tags Responses
+// @Produce json
+// @Param response_id path string true "Response ID"
+// @Success 200 {object} responses.ResponsePayload
+// @Failure 404 {object} map[string]string
+// @Router /v1/responses/{response_id} [get]
+func (h *ResponseHandler) Get(c *gin.Context) {
+	id := c.Param("response_id")
+	resp, err := h.service.GetByPublicID(c.Request.Context(), id)
+	if err != nil {
+		c.JSON(http.StatusNotFound, gin.H{"error": err.Error()})
+		return
+	}
+	c.JSON(http.StatusOK, responses.FromDomain(resp))
+}
+
+// Cancel handles POST /v1/responses/:id/cancel
+// @Summary Cancel a response
+// @Tags Responses
+// @Produce json
+// @Param response_id path string true "Response ID"
+// @Success 200 {object} responses.ResponsePayload
+// @Router /v1/responses/{response_id}/cancel [post]
+func (h *ResponseHandler) Cancel(c *gin.Context) {
+	id := c.Param("response_id")
+	resp, err := h.service.Cancel(c.Request.Context(), id)
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+	c.JSON(http.StatusOK, responses.FromDomain(resp))
+}
+
+// Delete handles DELETE /v1/responses/:id
+// @Summary Delete/Cancel a response
+// @Tags Responses
+// @Produce json
+// @Param response_id path string true "Response ID"
+// @Success 200 {object} responses.ResponsePayload
+// @Router /v1/responses/{response_id} [delete]
+func (h *ResponseHandler) Delete(c *gin.Context) {
+	h.Cancel(c)
+}
+
+// ListInputItems handles GET /v1/responses/:id/input_items
+// @Summary List conversation input items
+// @Tags Responses
+// @Produce json
+// @Param response_id path string true "Response ID"
+// @Success 200 {array} response.ConversationItem
+// @Router /v1/responses/{response_id}/input_items [get]
+func (h *ResponseHandler) ListInputItems(c *gin.Context) {
+	id := c.Param("response_id")
+	items, err := h.service.ListConversationItems(c.Request.Context(), id)
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
+		return
+	}
+	// Return items array directly, wrapped in data object for consistency
+	c.JSON(http.StatusOK, gin.H{"data": items})
+}
+
+func (h *ResponseHandler) streamResponse(c *gin.Context, params response.CreateParams) {
+	writer := c.Writer
+	flusher, ok := writer.(http.Flusher)
+	if !ok {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "streaming not supported"})
+		return
+	}
+
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+
+	c.Header("Content-Type", "text/event-stream")
+	c.Header("Cache-Control", "no-cache")
+	c.Header("Connection", "keep-alive")
+
+	observer := newSSEObserver(writer, flusher, h.log)
+	params.StreamObserver = observer
+
+	resp, err := h.service.Create(c.Request.Context(), params)
+	if err != nil {
+		observer.SendError(err)
+		c.Status(http.StatusInternalServerError)
+		return
+	}
+	observer.SendCompleted(resp)
+}
+
+func extractSubject(c *gin.Context) string {
+	tokenValue, exists := c.Get("auth_token")
+	if !exists {
+		return ""
+	}
+	token, ok := tokenValue.(*jwt.Token)
+	if !ok {
+		return ""
+	}
+	if claims, ok := token.Claims.(jwt.MapClaims); ok {
+		if sub, ok := claims["sub"].(string); ok {
+			return sub
+		}
+	}
+	return ""
+}
+
+func mapTools(tools []requests.ToolDefinition) []llm.ToolDefinition {
+	if len(tools) == 0 {
+		return nil
+	}
+	result := make([]llm.ToolDefinition, 0, len(tools))
+	for _, t := range tools {
+		result = append(result, llm.ToolDefinition{
+			Type: t.Type,
+			Function: llm.ToolFunctionSchema{
+				Name:        t.Function.Name,
+				Description: t.Function.Description,
+				Parameters:  t.Function.Parameters,
+			},
+		})
+	}
+	return result
+}
+
+func mapToolChoice(choice *requests.ToolChoice) *llm.ToolChoice {
+	if choice == nil {
+		return nil
+	}
+	return &llm.ToolChoice{
+		Type: choice.Type,
+		Function: struct {
+			Name string `json:"name"`
+		}{
+			Name: choice.Function.Name,
+		},
+	}
+}
+
+type sseObserver struct {
+	writer     http.ResponseWriter
+	flusher    http.Flusher
+	log        zerolog.Logger
+	mu         sync.Mutex
+	responseID string
+}
+
+func newSSEObserver(w http.ResponseWriter, flusher http.Flusher, log zerolog.Logger) *sseObserver {
+	return &sseObserver{
+		writer:  w,
+		flusher: flusher,
+		log:     log,
+	}
+}
+
+func (o *sseObserver) OnResponseCreated(resp *response.Response) {
+	o.responseID = resp.PublicID
+	o.sendEvent("response.created", responses.FromDomain(resp))
+}
+
+func (o *sseObserver) OnDelta(delta llm.ChatCompletionDelta) {
+	text := extractDeltaText(delta)
+	if text == "" {
+		return
+	}
+	payload := map[string]interface{}{
+		"id":    o.responseID,
+		"delta": text,
+	}
+	o.sendEvent("response.output_text.delta", payload)
+}
+
+func (o *sseObserver) OnToolCall(call tool.Call) {
+	payload := map[string]interface{}{
+		"id":   o.responseID,
+		"call": call,
+	}
+	o.sendEvent("response.tool_call", payload)
+}
+
+func (o *sseObserver) OnToolResult(callID string, result *tool.Result) {
+	payload := map[string]interface{}{
+		"id":      o.responseID,
+		"call_id": callID,
+		"result":  result,
+	}
+	o.sendEvent("response.tool_result", payload)
+}
+
+func (o *sseObserver) SendCompleted(resp *response.Response) {
+	o.sendEvent("response.completed", responses.FromDomain(resp))
+}
+
+func (o *sseObserver) SendError(err error) {
+	o.sendEvent("response.error", map[string]string{
+		"message": err.Error(),
+	})
+}
+
+func (o *sseObserver) sendEvent(name string, payload interface{}) {
+	o.mu.Lock()
+	defer o.mu.Unlock()
+
+	data, err := json.Marshal(payload)
+	if err != nil {
+		o.log.Error().Err(err).Msg("marshal SSE payload")
+		return
+	}
+
+	fmt.Fprintf(o.writer, "event: %s\n", name)
+	fmt.Fprintf(o.writer, "data: %s\n\n", data)
+	o.flusher.Flush()
+}
+
+func extractDeltaText(delta llm.ChatCompletionDelta) string {
+	for _, choice := range delta.Choices {
+		if choice.Delta.Content == nil {
+			continue
+		}
+		if text := normalizeContent(choice.Delta.Content); text != "" {
+			return text
+		}
+	}
+	return ""
+}
+
+func normalizeContent(content interface{}) string {
+	switch v := content.(type) {
+	case string:
+		return v
+	case []interface{}:
+		builder := strings.Builder{}
+		for _, item := range v {
+			builder.WriteString(normalizeContent(item))
+		}
+		return builder.String()
+	case map[string]interface{}:
+		if text, ok := v["text"].(string); ok {
+			return text
+		}
+	}
+	return ""
+}
+
+var _ response.StreamObserver = (*sseObserver)(nil)
diff --git a/services/response-api/internal/interfaces/httpserver/http_server.go b/services/response-api/internal/interfaces/httpserver/http_server.go
new file mode 100644
index 00000000..3966a3b4
--- /dev/null
+++ b/services/response-api/internal/interfaces/httpserver/http_server.go
@@ -0,0 +1,124 @@
+package httpserver
+
+import (
+	"context"
+	"errors"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+	swaggerFiles "github.com/swaggo/files"
+	ginSwagger "github.com/swaggo/gin-swagger"
+
+	responseapidocs "jan-server/services/response-api/docs/swagger"
+	"jan-server/services/response-api/internal/config"
+	domain "jan-server/services/response-api/internal/domain/response"
+	"jan-server/services/response-api/internal/infrastructure/auth"
+	"jan-server/services/response-api/internal/interfaces/httpserver/handlers"
+	"jan-server/services/response-api/internal/interfaces/httpserver/routes"
+)
+
+// HTTPServer wraps the gin engine with graceful shutdown helpers.
+type HTTPServer struct {
+	cfg         *config.Config
+	engine      *gin.Engine
+	log         zerolog.Logger
+	handlerProv *handlers.Provider
+	routeProv   *routes.Provider
+	auth        *auth.Validator
+}
+
+// New constructs the HTTP server with default middleware and routes.
+func New(cfg *config.Config, log zerolog.Logger, responseService domain.Service, authValidator *auth.Validator) *HTTPServer {
+	if cfg.Environment == "production" {
+		gin.SetMode(gin.ReleaseMode)
+	}
+	responseapidocs.SwaggerInfo.BasePath = "/"
+
+	engine := gin.New()
+	engine.Use(gin.Recovery())
+	engine.Use(gin.Logger())
+
+	handlerProvider := handlers.NewProvider(responseService, log)
+	routeProvider := routes.NewProvider(handlerProvider)
+
+	// Register public routes (health checks, swagger) without authentication
+	registerPublicRoutes(engine, cfg, authValidator)
+
+	// Apply authentication middleware before protected routes
+	if authValidator != nil {
+		engine.Use(authValidator.Middleware())
+	}
+
+	// Register protected API routes
+	routeProvider.Register(engine)
+
+	return &HTTPServer{
+		cfg:         cfg,
+		engine:      engine,
+		log:         log,
+		handlerProv: handlerProvider,
+		routeProv:   routeProvider,
+		auth:        authValidator,
+	}
+}
+
+// Run starts the HTTP listener and handles graceful shutdown via context cancellation.
+func (s *HTTPServer) Run(ctx context.Context) error {
+	server := &http.Server{
+		Addr:    s.cfg.Addr(),
+		Handler: s.engine,
+	}
+
+	errCh := make(chan error, 1)
+	go func() {
+		s.log.Info().Str("addr", s.cfg.Addr()).Msg("HTTP server listening")
+		err := server.ListenAndServe()
+		if err != nil && !errors.Is(err, http.ErrServerClosed) {
+			s.log.Error().Err(err).Msg("HTTP server error")
+			errCh <- err
+			return
+		}
+		errCh <- nil
+	}()
+
+	select {
+	case <-ctx.Done():
+		s.log.Info().Msg("Context cancelled, shutting down HTTP server")
+	case err := <-errCh:
+		return err
+	}
+
+	shutdownCtx, cancel := context.WithTimeout(context.Background(), s.cfg.ShutdownTimeout)
+	defer cancel()
+	if err := server.Shutdown(shutdownCtx); err != nil {
+		return err
+	}
+	return nil
+}
+
+func registerPublicRoutes(engine *gin.Engine, cfg *config.Config, authValidator *auth.Validator) {
+	engine.GET("/", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{
+			"service": cfg.ServiceName,
+			"status":  "ok",
+		})
+	})
+
+	engine.GET("/healthz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "healthy"})
+	})
+
+	engine.GET("/readyz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "ready"})
+	})
+	engine.GET("/health/auth", func(c *gin.Context) {
+		if authValidator == nil || authValidator.Ready() {
+			c.JSON(http.StatusOK, gin.H{"status": "ready"})
+			return
+		}
+		c.JSON(http.StatusServiceUnavailable, gin.H{"status": "initializing"})
+	})
+
+	engine.GET("/swagger/*any", ginSwagger.WrapHandler(swaggerFiles.Handler))
+}
diff --git a/services/response-api/internal/interfaces/httpserver/middlewares/middlewares.go b/services/response-api/internal/interfaces/httpserver/middlewares/middlewares.go
new file mode 100644
index 00000000..44cc7bfe
--- /dev/null
+++ b/services/response-api/internal/interfaces/httpserver/middlewares/middlewares.go
@@ -0,0 +1,28 @@
+package middlewares
+
+import (
+	"github.com/gin-gonic/gin"
+)
+
+// CORS middleware for handling cross-origin requests
+func CORS() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
+		c.Writer.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
+		c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
+
+		if c.Request.Method == "OPTIONS" {
+			c.AbortWithStatus(204)
+			return
+		}
+
+		c.Next()
+	}
+}
+
+// RequestLogger logs incoming requests
+func RequestLogger() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		c.Next()
+	}
+}
diff --git a/services/response-api/internal/interfaces/httpserver/requests/requests.go b/services/response-api/internal/interfaces/httpserver/requests/requests.go
new file mode 100644
index 00000000..7143edd1
--- /dev/null
+++ b/services/response-api/internal/interfaces/httpserver/requests/requests.go
@@ -0,0 +1,40 @@
+package requests
+
+// ToolFunctionDefinition describes a function passed to OpenAI compatible APIs.
+type ToolFunctionDefinition struct {
+	Name        string                 `json:"name"`
+	Description string                 `json:"description,omitempty"`
+	Parameters  map[string]interface{} `json:"parameters"`
+}
+
+// ToolDefinition describes a tool in the HTTP contract.
+type ToolDefinition struct {
+	Type     string                 `json:"type"`
+	Function ToolFunctionDefinition `json:"function"`
+}
+
+// ToolChoice allows callers to force or disable tools.
+type ToolChoice struct {
+	Type     string `json:"type"`
+	Function struct {
+		Name string `json:"name"`
+	} `json:"function"`
+}
+
+// CreateResponseRequest models POST /v1/responses input.
+type CreateResponseRequest struct {
+	Model              string                 `json:"model" binding:"required"`
+	Input              interface{}            `json:"input" binding:"required"`
+	SystemPrompt       *string                `json:"system_prompt,omitempty"`
+	MaxTokens          *int                   `json:"max_tokens,omitempty"`
+	Temperature        *float64               `json:"temperature,omitempty"`
+	Tools              []ToolDefinition       `json:"tools,omitempty"`
+	ToolChoice         *ToolChoice            `json:"tool_choice,omitempty"`
+	Stream             *bool                  `json:"stream,omitempty"`
+	Background         *bool                  `json:"background,omitempty"`
+	Store              *bool                  `json:"store,omitempty"`
+	PreviousResponseID *string                `json:"previous_response_id,omitempty"`
+	Conversation       *string                `json:"conversation,omitempty"`
+	Metadata           map[string]interface{} `json:"metadata,omitempty"`
+	User               string                 `json:"user,omitempty"`
+}
diff --git a/services/response-api/internal/interfaces/httpserver/responses/responses.go b/services/response-api/internal/interfaces/httpserver/responses/responses.go
new file mode 100644
index 00000000..98fbfc3f
--- /dev/null
+++ b/services/response-api/internal/interfaces/httpserver/responses/responses.go
@@ -0,0 +1,109 @@
+package responses
+
+import (
+	"errors"
+	"net/http"
+
+	"jan-server/services/response-api/internal/domain/response"
+	"jan-server/services/response-api/internal/utils/platformerrors"
+
+	"github.com/gin-gonic/gin"
+)
+
+// ErrorResponse represents an error response with platform error details
+type ErrorResponse struct {
+	Code          string `json:"code"` // UUID from PlatformError
+	Error         string `json:"error"`
+	Message       string `json:"message,omitempty"`
+	ErrorInstance error  `json:"-"`
+	RequestID     string `json:"request_id,omitempty"`
+}
+
+// HandleError handles domain errors and returns appropriate HTTP responses
+func HandleError(reqCtx *gin.Context, err error, message string) {
+	var domainErr *platformerrors.PlatformError
+	if errors.As(err, &domainErr) {
+		statusCode := platformerrors.ErrorTypeToHTTPStatus(domainErr.GetErrorType())
+
+		errResp := ErrorResponse{
+			Code:          domainErr.GetUUID(),
+			Error:         message,
+			Message:       message,
+			ErrorInstance: domainErr,
+			RequestID:     domainErr.GetRequestID(),
+		}
+
+		reqCtx.AbortWithStatusJSON(statusCode, errResp)
+		return
+	}
+	// Non-platform errors
+	errResp := ErrorResponse{
+		Error:         message,
+		Message:       message,
+		ErrorInstance: err,
+	}
+	reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, errResp)
+}
+
+// HandleNewError creates a new typed error at the route layer and handles it
+func HandleNewError(reqCtx *gin.Context, errorType platformerrors.ErrorType, message string, uuid string) {
+	ctx := reqCtx.Request.Context()
+	err := platformerrors.NewError(ctx, platformerrors.LayerRoute, errorType, message, nil, uuid)
+
+	statusCode := platformerrors.ErrorTypeToHTTPStatus(err.GetErrorType())
+
+	errResp := ErrorResponse{
+		Code:          err.GetUUID(),
+		Error:         message,
+		Message:       message,
+		ErrorInstance: err,
+		RequestID:     err.GetRequestID(),
+	}
+
+	reqCtx.AbortWithStatusJSON(statusCode, errResp)
+}
+
+// ResponsePayload is returned to clients.
+type ResponsePayload struct {
+	ID                 string                 `json:"id"`
+	Object             string                 `json:"object"`
+	Created            int64                  `json:"created"`
+	CreatedAt          int64                  `json:"created_at"` // Same as Created, for compatibility
+	Model              string                 `json:"model"`
+	Status             string                 `json:"status"`
+	Input              interface{}            `json:"input"`
+	Output             interface{}            `json:"output,omitempty"`
+	Usage              interface{}            `json:"usage,omitempty"`
+	Metadata           map[string]interface{} `json:"metadata,omitempty"`
+	ConversationID     *string                `json:"conversation_id,omitempty"`
+	PreviousResponseID *string                `json:"previous_response_id,omitempty"`
+	SystemPrompt       *string                `json:"system_prompt,omitempty"`
+	Stream             bool                   `json:"stream"`
+	Background         bool                   `json:"background"`
+	Store              bool                   `json:"store"`
+	Error              interface{}            `json:"error,omitempty"`
+}
+
+// FromDomain maps the domain response to DTO.
+func FromDomain(r *response.Response) ResponsePayload {
+	createdUnix := r.CreatedAt.Unix()
+	return ResponsePayload{
+		ID:                 r.PublicID,
+		Object:             r.Object,
+		Created:            createdUnix,
+		CreatedAt:          createdUnix, // Duplicate for compatibility
+		Model:              r.Model,
+		Status:             string(r.Status),
+		Input:              r.Input,
+		Output:             r.Output,
+		Usage:              r.Usage,
+		Metadata:           r.Metadata,
+		ConversationID:     r.ConversationPublicID,
+		PreviousResponseID: r.PreviousResponseID,
+		SystemPrompt:       r.SystemPrompt,
+		Stream:             r.Stream,
+		Background:         r.Background,
+		Store:              r.Store,
+		Error:              r.Error,
+	}
+}
diff --git a/services/response-api/internal/interfaces/httpserver/routes/provider.go b/services/response-api/internal/interfaces/httpserver/routes/provider.go
new file mode 100644
index 00000000..69398ddd
--- /dev/null
+++ b/services/response-api/internal/interfaces/httpserver/routes/provider.go
@@ -0,0 +1,25 @@
+package routes
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/response-api/internal/interfaces/httpserver/handlers"
+	v1 "jan-server/services/response-api/internal/interfaces/httpserver/routes/v1"
+)
+
+// Provider coordinates all route registrations.
+type Provider struct {
+	V1 *v1.Routes
+}
+
+// NewProvider constructs the route provider.
+func NewProvider(handlerProvider *handlers.Provider) *Provider {
+	return &Provider{
+		V1: v1.NewRoutes(handlerProvider),
+	}
+}
+
+// Register attaches all available routes to the gin engine.
+func (p *Provider) Register(engine *gin.Engine) {
+	p.V1.Register(engine)
+}
diff --git a/services/response-api/internal/interfaces/httpserver/routes/v1/responses_routes.go b/services/response-api/internal/interfaces/httpserver/routes/v1/responses_routes.go
new file mode 100644
index 00000000..016dd669
--- /dev/null
+++ b/services/response-api/internal/interfaces/httpserver/routes/v1/responses_routes.go
@@ -0,0 +1,15 @@
+package v1
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/response-api/internal/interfaces/httpserver/handlers"
+)
+
+func registerResponseRoutes(router gin.IRoutes, handler *handlers.ResponseHandler) {
+	router.POST("/responses", handler.Create)
+	router.GET("/responses/:response_id", handler.Get)
+	router.DELETE("/responses/:response_id", handler.Delete)
+	router.POST("/responses/:response_id/cancel", handler.Cancel)
+	router.GET("/responses/:response_id/input_items", handler.ListInputItems)
+}
diff --git a/services/response-api/internal/interfaces/httpserver/routes/v1/routes.go b/services/response-api/internal/interfaces/httpserver/routes/v1/routes.go
new file mode 100644
index 00000000..3227e873
--- /dev/null
+++ b/services/response-api/internal/interfaces/httpserver/routes/v1/routes.go
@@ -0,0 +1,25 @@
+package v1
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/response-api/internal/interfaces/httpserver/handlers"
+)
+
+// Routes encapsulates versioned route registration.
+type Routes struct {
+	handlers *handlers.Provider
+}
+
+// NewRoutes builds the v1 route registrar.
+func NewRoutes(handlerProvider *handlers.Provider) *Routes {
+	return &Routes{
+		handlers: handlerProvider,
+	}
+}
+
+// Register attaches all v1 routes under /v1 prefix.
+func (r *Routes) Register(engine *gin.Engine) {
+	group := engine.Group("/v1")
+	registerResponseRoutes(group, r.handlers.Response)
+}
diff --git a/services/response-api/internal/utils/platformerrors/errors.go b/services/response-api/internal/utils/platformerrors/errors.go
new file mode 100644
index 00000000..d47c0709
--- /dev/null
+++ b/services/response-api/internal/utils/platformerrors/errors.go
@@ -0,0 +1,210 @@
+package platformerrors
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+)
+
+// getRequestIDFromContext extracts request ID from context
+func getRequestIDFromContext(ctx context.Context) string {
+	val := ctx.Value("requestID")
+	if requestID, ok := val.(string); ok {
+		return requestID
+	}
+	return ""
+}
+
+// ErrorType represents the category of error
+type ErrorType string
+
+const (
+	ErrorTypeNotFound       ErrorType = "NOT_FOUND"
+	ErrorTypeTooManyRecords ErrorType = "TOO_MANY_RECORDS"
+	ErrorTypeValidation     ErrorType = "VALIDATION"
+	ErrorTypeConflict       ErrorType = "CONFLICT"
+	ErrorTypeUnauthorized   ErrorType = "UNAUTHORIZED"
+	ErrorTypeForbidden      ErrorType = "FORBIDDEN"
+	ErrorTypeInternal       ErrorType = "INTERNAL"
+	ErrorTypeExternal       ErrorType = "EXTERNAL"
+	ErrorTypeDatabaseError  ErrorType = "DATABASE_ERROR"
+	ErrorTypeNotImplemented ErrorType = "NOT_IMPLEMENTED"
+)
+
+// Layer represents the application layer where the error occurred
+type Layer string
+
+const (
+	LayerRepository     Layer = "repository"
+	LayerDomain         Layer = "domain"
+	LayerHandler        Layer = "handler"
+	LayerRoute          Layer = "route"
+	LayerInfrastructure Layer = "infrastructure"
+	LayerCommon         Layer = "common"
+)
+
+// PlatformError represents an error with context and metadata
+type PlatformError struct {
+	UUID      string
+	Type      ErrorType
+	Message   string
+	Err       error
+	Context   map[string]any
+	RequestID string
+	Layer     Layer
+	Timestamp time.Time
+}
+
+// Error implements the error interface
+func (e *PlatformError) Error() string {
+	if e.Err != nil {
+		return fmt.Sprintf("[%s][%s][%s] %s: %v", e.Layer, e.Type, e.UUID, e.Message, e.Err)
+	}
+	return fmt.Sprintf("[%s][%s][%s] %s", e.Layer, e.Type, e.UUID, e.Message)
+}
+
+// Unwrap returns the underlying error
+func (e *PlatformError) Unwrap() error {
+	return e.Err
+}
+
+// GetErrorType returns the error type
+func (e *PlatformError) GetErrorType() ErrorType {
+	return e.Type
+}
+
+// GetRequestID returns the request ID
+func (e *PlatformError) GetRequestID() string {
+	return e.RequestID
+}
+
+// GetUUID returns the error UUID
+func (e *PlatformError) GetUUID() string {
+	return e.UUID
+}
+
+// NewError creates a new PlatformError with the specified parameters
+func NewError(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string) *PlatformError {
+	return NewErrorWithContext(ctx, layer, errorType, message, err, customUUID, nil)
+}
+
+// NewErrorWithContext creates a new PlatformError with additional context fields
+func NewErrorWithContext(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string, contextFields map[string]any) *PlatformError {
+	requestID := getRequestIDFromContext(ctx)
+
+	errorUUID := customUUID
+	if errorUUID == "" {
+		errorUUID = "auto-generated-uuid"
+	}
+
+	errorContext := make(map[string]any)
+	for k, v := range contextFields {
+		errorContext[k] = v
+	}
+
+	platformError := &PlatformError{
+		UUID:      errorUUID,
+		Type:      errorType,
+		Message:   message,
+		Err:       err,
+		RequestID: requestID,
+		Layer:     layer,
+		Timestamp: time.Now().UTC(),
+		Context:   errorContext,
+	}
+
+	return platformError
+}
+
+// AsError wraps an error with layer context
+func AsError(ctx context.Context, layer Layer, err error, message string) *PlatformError {
+	if err == nil {
+		return nil
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return NewError(ctx, layer, platformErr.Type, fmt.Sprintf("%s: %s", message, platformErr.Message), platformErr, platformErr.UUID)
+	}
+
+	errorType := ErrorTypeInternal
+	if errors.Is(err, gorm.ErrRecordNotFound) {
+		errorType = ErrorTypeNotFound
+	}
+
+	return NewError(ctx, layer, errorType, message, err, "")
+}
+
+// ErrorTypeToHTTPStatus maps error types to HTTP status codes
+func ErrorTypeToHTTPStatus(errorType ErrorType) int {
+	switch errorType {
+	case ErrorTypeNotFound:
+		return http.StatusNotFound
+	case ErrorTypeValidation:
+		return http.StatusBadRequest
+	case ErrorTypeConflict:
+		return http.StatusConflict
+	case ErrorTypeUnauthorized:
+		return http.StatusUnauthorized
+	case ErrorTypeForbidden:
+		return http.StatusForbidden
+	case ErrorTypeNotImplemented:
+		return http.StatusNotImplemented
+	case ErrorTypeTooManyRecords:
+		return http.StatusInternalServerError
+	case ErrorTypeDatabaseError:
+		return http.StatusInternalServerError
+	case ErrorTypeExternal:
+		return http.StatusBadGateway
+	case ErrorTypeInternal:
+		fallthrough
+	default:
+		return http.StatusInternalServerError
+	}
+}
+
+// IsErrorType checks if an error is a PlatformError with the specified type
+func IsErrorType(err error, errorType ErrorType) bool {
+	if err == nil {
+		return false
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return platformErr.Type == errorType
+	}
+
+	return false
+}
+
+// LogError logs a platform error with proper structure
+func LogError(logger zerolog.Logger, err *PlatformError) {
+	if err == nil {
+		return
+	}
+
+	event := logger.Error().
+		Str("error_uuid", err.UUID).
+		Str("error_type", string(err.Type)).
+		Str("layer", string(err.Layer)).
+		Time("timestamp_utc", err.Timestamp)
+
+	if err.RequestID != "" {
+		event = event.Str("request_id", err.RequestID)
+	}
+
+	for k, v := range err.Context {
+		event = event.Interface(k, v)
+	}
+
+	if err.Err != nil {
+		event = event.Err(err.Err)
+	}
+
+	event.Msg(err.Message)
+}
diff --git a/services/response-api/internal/webhook/http.go b/services/response-api/internal/webhook/http.go
new file mode 100644
index 00000000..a440124b
--- /dev/null
+++ b/services/response-api/internal/webhook/http.go
@@ -0,0 +1,139 @@
+package webhook
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/rs/zerolog"
+)
+
+// HTTPService implements webhook notifications via HTTP POST.
+type HTTPService struct {
+	httpClient *http.Client
+	log        zerolog.Logger
+	maxRetries int
+	retryDelay time.Duration
+}
+
+// NewHTTPService creates a new HTTP-based webhook service.
+func NewHTTPService(log zerolog.Logger) *HTTPService {
+	return &HTTPService{
+		httpClient: &http.Client{
+			Timeout: 10 * time.Second,
+		},
+		log:        log.With().Str("component", "webhook").Logger(),
+		maxRetries: 3,
+		retryDelay: 2 * time.Second,
+	}
+}
+
+// NotifyCompleted sends a webhook notification when a response completes.
+func (s *HTTPService) NotifyCompleted(ctx context.Context, responseID string, output interface{}, metadata map[string]interface{}, completedAt *time.Time) error {
+	webhookURL := extractWebhookURL(metadata)
+	if webhookURL == "" {
+		s.log.Debug().Str("response_id", responseID).Msg("no webhook URL configured, skipping notification")
+		return nil
+	}
+
+	payload := WebhookPayload{
+		ID:          responseID,
+		Event:       "response.completed",
+		Status:      "completed",
+		Output:      output,
+		Metadata:    metadata,
+		CompletedAt: formatTime(completedAt),
+	}
+
+	return s.sendWebhook(ctx, webhookURL, payload, responseID)
+}
+
+// NotifyFailed sends a webhook notification when a response fails.
+func (s *HTTPService) NotifyFailed(ctx context.Context, responseID string, errorCode string, errorMessage string, metadata map[string]interface{}) error {
+	webhookURL := extractWebhookURL(metadata)
+	if webhookURL == "" {
+		s.log.Debug().Str("response_id", responseID).Msg("no webhook URL configured, skipping notification")
+		return nil
+	}
+
+	payload := WebhookPayload{
+		ID:       responseID,
+		Event:    "response.failed",
+		Status:   "failed",
+		Error:    &ErrorDetails{Code: errorCode, Message: errorMessage},
+		Metadata: metadata,
+	}
+
+	return s.sendWebhook(ctx, webhookURL, payload, responseID)
+}
+
+func (s *HTTPService) sendWebhook(ctx context.Context, url string, payload WebhookPayload, responseID string) error {
+	body, err := json.Marshal(payload)
+	if err != nil {
+		return fmt.Errorf("marshal webhook payload: %w", err)
+	}
+
+	var lastErr error
+	for attempt := 1; attempt <= s.maxRetries; attempt++ {
+		req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
+		if err != nil {
+			return fmt.Errorf("create webhook request: %w", err)
+		}
+
+		req.Header.Set("Content-Type", "application/json")
+		req.Header.Set("User-Agent", "jan-response-api/1.0")
+		req.Header.Set("X-Jan-Event", payload.Event)
+		req.Header.Set("X-Jan-Response-ID", responseID)
+
+		resp, err := s.httpClient.Do(req)
+		if err != nil {
+			lastErr = fmt.Errorf("send webhook (attempt %d/%d): %w", attempt, s.maxRetries, err)
+			s.log.Warn().Err(err).Str("url", url).Int("attempt", attempt).Msg("webhook delivery failed")
+
+			if attempt < s.maxRetries {
+				time.Sleep(s.retryDelay)
+				continue
+			}
+			break
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode >= 200 && resp.StatusCode < 300 {
+			s.log.Info().Str("url", url).Int("status", resp.StatusCode).Str("response_id", responseID).Msg("webhook delivered successfully")
+			return nil
+		}
+
+		lastErr = fmt.Errorf("webhook returned status %d (attempt %d/%d)", resp.StatusCode, attempt, s.maxRetries)
+		s.log.Warn().Int("status", resp.StatusCode).Str("url", url).Int("attempt", attempt).Msg("webhook delivery failed")
+
+		if attempt < s.maxRetries {
+			time.Sleep(s.retryDelay)
+		}
+	}
+
+	return lastErr
+}
+
+func extractWebhookURL(metadata map[string]interface{}) string {
+	if metadata == nil {
+		return ""
+	}
+	if url, ok := metadata["webhook_url"].(string); ok {
+		return url
+	}
+	if url, ok := metadata["webhookUrl"].(string); ok {
+		return url
+	}
+	return ""
+}
+
+func formatTime(t *time.Time) *string {
+	if t == nil {
+		return nil
+	}
+	formatted := t.Format(time.RFC3339)
+	return &formatted
+}
diff --git a/services/response-api/internal/webhook/webhook.go b/services/response-api/internal/webhook/webhook.go
new file mode 100644
index 00000000..4b871d23
--- /dev/null
+++ b/services/response-api/internal/webhook/webhook.go
@@ -0,0 +1,32 @@
+package webhook
+
+import (
+	"context"
+	"time"
+)
+
+// Service handles webhook notifications for response events.
+type Service interface {
+	// NotifyCompleted sends a webhook notification when a response completes.
+	NotifyCompleted(ctx context.Context, responseID string, output interface{}, metadata map[string]interface{}, completedAt *time.Time) error
+
+	// NotifyFailed sends a webhook notification when a response fails.
+	NotifyFailed(ctx context.Context, responseID string, errorCode string, errorMessage string, metadata map[string]interface{}) error
+}
+
+// ErrorDetails contains machine readable error info.
+type ErrorDetails struct {
+	Code    string `json:"code"`
+	Message string `json:"message"`
+}
+
+// WebhookPayload is the structure sent to webhook URLs.
+type WebhookPayload struct {
+	ID          string                 `json:"id"`
+	Event       string                 `json:"event"` // "response.completed" or "response.failed"
+	Status      string                 `json:"status"`
+	Output      interface{}            `json:"output,omitempty"`
+	Error       *ErrorDetails          `json:"error,omitempty"`
+	Metadata    map[string]interface{} `json:"metadata,omitempty"`
+	CompletedAt *string                `json:"completed_at,omitempty"`
+}
diff --git a/services/response-api/internal/worker/pool.go b/services/response-api/internal/worker/pool.go
new file mode 100644
index 00000000..7c20a51d
--- /dev/null
+++ b/services/response-api/internal/worker/pool.go
@@ -0,0 +1,104 @@
+package worker
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/rs/zerolog"
+
+	"jan-server/services/response-api/internal/domain/response"
+	"jan-server/services/response-api/internal/infrastructure/queue"
+)
+
+// Pool manages multiple background workers.
+type Pool struct {
+	workers         []*Worker
+	queue           queue.TaskQueue
+	responseService response.Service
+	workerCount     int
+	taskTimeout     time.Duration
+	log             zerolog.Logger
+	wg              sync.WaitGroup
+	stopChan        chan struct{}
+}
+
+// Config contains worker pool configuration.
+type Config struct {
+	WorkerCount int
+	TaskTimeout time.Duration
+}
+
+// NewPool creates a new worker pool.
+func NewPool(
+	queue queue.TaskQueue,
+	responseService response.Service,
+	cfg Config,
+	log zerolog.Logger,
+) *Pool {
+	return &Pool{
+		queue:           queue,
+		responseService: responseService,
+		workerCount:     cfg.WorkerCount,
+		taskTimeout:     cfg.TaskTimeout,
+		log:             log.With().Str("component", "worker-pool").Logger(),
+		stopChan:        make(chan struct{}),
+	}
+}
+
+// Start initializes and starts all workers.
+func (p *Pool) Start(ctx context.Context) error {
+	p.log.Info().Int("worker_count", p.workerCount).Msg("starting worker pool")
+
+	p.workers = make([]*Worker, p.workerCount)
+	for i := 0; i < p.workerCount; i++ {
+		worker := NewWorker(
+			i+1,
+			p.queue,
+			p.responseService,
+			p.taskTimeout,
+			p.log,
+		)
+		p.workers[i] = worker
+
+		p.wg.Add(1)
+		go func(w *Worker) {
+			defer p.wg.Done()
+			w.Start(ctx)
+		}(worker)
+	}
+
+	p.log.Info().Msg("worker pool started")
+
+	return nil
+}
+
+// Stop gracefully shuts down all workers.
+func (p *Pool) Stop() {
+	p.log.Info().Msg("stopping worker pool")
+
+	// Signal all workers to stop
+	for _, worker := range p.workers {
+		worker.Stop()
+	}
+
+	// Wait for all workers to finish
+	done := make(chan struct{})
+	go func() {
+		p.wg.Wait()
+		close(done)
+	}()
+
+	// Wait with timeout
+	select {
+	case <-done:
+		p.log.Info().Msg("all workers stopped gracefully")
+	case <-time.After(30 * time.Second):
+		p.log.Warn().Msg("worker pool shutdown timed out")
+	}
+}
+
+// GetQueueDepth returns the current queue depth.
+func (p *Pool) GetQueueDepth(ctx context.Context) (int64, error) {
+	return p.queue.GetQueueDepth(ctx)
+}
diff --git a/services/response-api/internal/worker/worker.go b/services/response-api/internal/worker/worker.go
new file mode 100644
index 00000000..6bde0c98
--- /dev/null
+++ b/services/response-api/internal/worker/worker.go
@@ -0,0 +1,120 @@
+package worker
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/rs/zerolog"
+
+	"jan-server/services/response-api/internal/domain/response"
+	"jan-server/services/response-api/internal/infrastructure/queue"
+)
+
+// Worker processes background tasks from the queue.
+type Worker struct {
+	id              int
+	queue           queue.TaskQueue
+	responseService response.Service
+	taskTimeout     time.Duration
+	log             zerolog.Logger
+	stopChan        chan struct{}
+}
+
+// NewWorker creates a new background worker.
+func NewWorker(
+	id int,
+	queue queue.TaskQueue,
+	responseService response.Service,
+	taskTimeout time.Duration,
+	log zerolog.Logger,
+) *Worker {
+	return &Worker{
+		id:              id,
+		queue:           queue,
+		responseService: responseService,
+		taskTimeout:     taskTimeout,
+		log:             log.With().Int("worker_id", id).Str("component", "worker").Logger(),
+		stopChan:        make(chan struct{}),
+	}
+}
+
+// Start begins processing tasks from the queue.
+func (w *Worker) Start(ctx context.Context) {
+	w.log.Info().Msg("worker started")
+
+	ticker := time.NewTicker(2 * time.Second) // Poll every 2 seconds
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			w.log.Info().Msg("worker stopped by context")
+			return
+		case <-w.stopChan:
+			w.log.Info().Msg("worker stopped")
+			return
+		case <-ticker.C:
+			w.processNextTask(ctx)
+		}
+	}
+}
+
+// Stop gracefully stops the worker.
+func (w *Worker) Stop() {
+	close(w.stopChan)
+}
+
+func (w *Worker) processNextTask(ctx context.Context) {
+	// Dequeue next task
+	task, err := w.queue.Dequeue(ctx)
+	if err != nil {
+		w.log.Error().Err(err).Msg("failed to dequeue task")
+		return
+	}
+
+	if task == nil {
+		// No tasks available
+		return
+	}
+
+	w.log.Info().
+		Str("response_id", task.PublicID).
+		Str("user_id", task.UserID).
+		Str("model", task.Model).
+		Msg("processing background task")
+
+	// Mark as processing
+	if err := w.queue.MarkProcessing(ctx, task.PublicID); err != nil {
+		w.log.Error().Err(err).Str("response_id", task.PublicID).Msg("failed to mark processing")
+		return
+	}
+
+	// Execute task with timeout
+	taskCtx, cancel := context.WithTimeout(ctx, w.taskTimeout)
+	defer cancel()
+
+	// Execute the background task using the service's ExecuteBackground method
+	if err := w.executeTask(taskCtx, task.PublicID); err != nil {
+		w.log.Error().Err(err).Str("response_id", task.PublicID).Msg("task execution failed")
+		if markErr := w.queue.MarkFailed(ctx, task.PublicID, err); markErr != nil {
+			w.log.Error().Err(markErr).Str("response_id", task.PublicID).Msg("failed to mark task as failed")
+		}
+		return
+	}
+
+	w.log.Info().Str("response_id", task.PublicID).Msg("task completed successfully")
+}
+
+func (w *Worker) executeTask(ctx context.Context, publicID string) error {
+	// Check if service has ExecuteBackground method
+	type backgroundExecutor interface {
+		ExecuteBackground(ctx context.Context, publicID string) error
+	}
+
+	if executor, ok := w.responseService.(backgroundExecutor); ok {
+		return executor.ExecuteBackground(ctx, publicID)
+	}
+
+	return fmt.Errorf("response service does not support background execution")
+}
diff --git a/services/response-api/migrations/000001_init_schema.down.sql b/services/response-api/migrations/000001_init_schema.down.sql
new file mode 100644
index 00000000..cbba9c3a
--- /dev/null
+++ b/services/response-api/migrations/000001_init_schema.down.sql
@@ -0,0 +1,8 @@
+-- Drop tables in reverse order (respecting foreign key constraints)
+DROP TABLE IF EXISTS response_api.tool_executions CASCADE;
+DROP TABLE IF EXISTS response_api.conversation_items CASCADE;
+DROP TABLE IF EXISTS response_api.responses CASCADE;
+DROP TABLE IF EXISTS response_api.conversations CASCADE;
+
+-- Drop schema
+DROP SCHEMA IF EXISTS response_api CASCADE;
diff --git a/services/response-api/migrations/000001_init_schema.up.sql b/services/response-api/migrations/000001_init_schema.up.sql
new file mode 100644
index 00000000..d0f092cf
--- /dev/null
+++ b/services/response-api/migrations/000001_init_schema.up.sql
@@ -0,0 +1,95 @@
+-- Create schema
+CREATE SCHEMA IF NOT EXISTS response_api;
+
+-- Set search path to response_api schema
+SET search_path TO response_api;
+
+-- ============================================================================
+-- CONVERSATIONS
+-- ============================================================================
+CREATE TABLE response_api.conversations (
+    id SERIAL PRIMARY KEY,
+    public_id VARCHAR(64) NOT NULL UNIQUE,
+    user_id VARCHAR(64),
+    metadata JSONB,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX idx_conversations_user_id ON response_api.conversations(user_id);
+CREATE INDEX idx_conversations_created_at ON response_api.conversations(created_at);
+
+-- ============================================================================
+-- RESPONSES
+-- ============================================================================
+CREATE TABLE response_api.responses (
+    id SERIAL PRIMARY KEY,
+    public_id VARCHAR(64) NOT NULL UNIQUE,
+    user_id VARCHAR(64),
+    model VARCHAR(128),
+    system_prompt TEXT,
+    input JSONB,
+    output JSONB,
+    status VARCHAR(32) NOT NULL,
+    stream BOOLEAN NOT NULL DEFAULT false,
+    background BOOLEAN NOT NULL DEFAULT false,
+    store BOOLEAN NOT NULL DEFAULT false,
+    api_key TEXT,
+    metadata JSONB,
+    usage JSONB,
+    error JSONB,
+    conversation_id INTEGER REFERENCES response_api.conversations(id),
+    previous_response_id VARCHAR(64),
+    object VARCHAR(32) NOT NULL DEFAULT 'response',
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    queued_at TIMESTAMPTZ,
+    started_at TIMESTAMPTZ,
+    completed_at TIMESTAMPTZ,
+    cancelled_at TIMESTAMPTZ,
+    failed_at TIMESTAMPTZ
+);
+
+CREATE INDEX idx_responses_user_id ON response_api.responses(user_id);
+CREATE INDEX idx_responses_status ON response_api.responses(status);
+CREATE INDEX idx_responses_conversation_id ON response_api.responses(conversation_id);
+CREATE INDEX idx_responses_created_at ON response_api.responses(created_at);
+CREATE INDEX idx_responses_background_status ON response_api.responses(background, status) WHERE background = true;
+CREATE INDEX idx_responses_queued_at ON response_api.responses(queued_at) WHERE status = 'queued';
+
+-- ============================================================================
+-- CONVERSATION ITEMS
+-- ============================================================================
+CREATE TABLE response_api.conversation_items (
+    id SERIAL PRIMARY KEY,
+    conversation_id INTEGER NOT NULL REFERENCES response_api.conversations(id) ON DELETE CASCADE,
+    role VARCHAR(32) NOT NULL,
+    status VARCHAR(32),
+    content JSONB,
+    sequence INTEGER NOT NULL,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX idx_conversation_items_conversation_id ON response_api.conversation_items(conversation_id);
+CREATE INDEX idx_conversation_items_sequence ON response_api.conversation_items(conversation_id, sequence);
+
+-- ============================================================================
+-- TOOL EXECUTIONS
+-- ============================================================================
+CREATE TABLE response_api.tool_executions (
+    id SERIAL PRIMARY KEY,
+    response_id INTEGER NOT NULL REFERENCES response_api.responses(id) ON DELETE CASCADE,
+    call_id VARCHAR(64),
+    tool_name VARCHAR(128) NOT NULL,
+    arguments JSONB,
+    result JSONB,
+    status VARCHAR(32) NOT NULL,
+    error_message TEXT,
+    execution_order INTEGER,
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX idx_tool_executions_response_id ON response_api.tool_executions(response_id);
+CREATE INDEX idx_tool_executions_execution_order ON response_api.tool_executions(response_id, execution_order);
+CREATE INDEX idx_tool_executions_status ON response_api.tool_executions(status);
diff --git a/services/response-api/migrations/migrations.go b/services/response-api/migrations/migrations.go
new file mode 100644
index 00000000..91cca1c3
--- /dev/null
+++ b/services/response-api/migrations/migrations.go
@@ -0,0 +1,6 @@
+package migrations
+
+import "embed"
+
+//go:embed *.sql
+var FS embed.FS
diff --git a/services/template-api/Dockerfile b/services/template-api/Dockerfile
new file mode 100644
index 00000000..19cd187f
--- /dev/null
+++ b/services/template-api/Dockerfile
@@ -0,0 +1,13 @@
+ARG GO_VERSION=1.25
+
+FROM golang:${GO_VERSION} as build
+WORKDIR /src
+COPY go.mod go.sum ./
+RUN go mod download
+COPY . .
+RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /out/template-api ./cmd/server
+
+FROM gcr.io/distroless/base-debian12
+COPY --from=build /out/template-api /template-api
+EXPOSE 8185
+ENTRYPOINT ["/template-api"]
diff --git a/services/template-api/Makefile b/services/template-api/Makefile
new file mode 100644
index 00000000..a958a76c
--- /dev/null
+++ b/services/template-api/Makefile
@@ -0,0 +1,28 @@
+SERVICE := template-api
+BIN := bin/$(SERVICE)
+
+.PHONY: run build test tidy wire swagger clean
+
+run:
+	@echo "Starting $(SERVICE)..."
+	go run ./cmd/server
+
+build:
+	go build -o $(BIN) ./cmd/server
+
+test:
+	go test ./...
+
+tidy:
+	go mod tidy
+
+wire:
+	@if ! command -v wire >/dev/null 2>&1; then echo "wire not installed (go install github.com/google/wire/cmd/wire@latest)"; exit 1; fi
+	wire ./cmd/server
+
+swagger:
+	@if ! command -v swag >/dev/null 2>&1; then echo "swag CLI not installed (go install github.com/swaggo/swag/cmd/swag@latest)"; exit 1; fi
+	swag init -g cmd/server/server.go -o docs/swagger
+
+clean:
+	rm -rf $(BIN)
diff --git a/services/template-api/README.md b/services/template-api/README.md
new file mode 100644
index 00000000..981222ca
--- /dev/null
+++ b/services/template-api/README.md
@@ -0,0 +1,38 @@
+# template-api
+
+`template-api` is a Go microservice skeleton that mirrors the production layout used by Jan services. Copy this directory when creating a new backend to inherit:
+
+- Environment-driven config loader (`internal/config`).
+- Structured logging via Zerolog.
+- Optional OpenTelemetry tracing.
+- PostgreSQL access via GORM with auto-migrations and seed helpers.
+- Missing databases are auto-created when using standard `postgres://` URLs.
+- Optional Keycloak JWT auth (toggle `AUTH_ENABLED` and set issuer/audience/JWKS).
+- Gin-powered HTTP server with health endpoints.
+- Wire-ready dependency injection entrypoint.
+- Makefile, Dockerfile, and example environment file for local dev.
+
+## Quick start
+
+```bash
+cd services/template-api
+go mod tidy
+make run
+curl http://localhost:8185/healthz
+# Optional:
+make wire      # regenerate dependency injection
+make swagger   # regenerate OpenAPI docs
+```
+
+Set configuration values via exported environment variables or by editing the repo-level `.env` generated by `make env-create`. See `docs/guides/services-template.md` for detailed migration steps.
+
+## Database
+
+- Point `DB_POSTGRESQL_WRITE_DSN` at your PostgreSQL instance (default assumes `postgres:postgres@localhost:5432/template_api`).
+- On startup the service runs GORM auto-migrations for the `samples` table and seeds a single row, which powers the `/v1/sample` endpoint.
+
+## Authentication
+
+- Set `AUTH_ENABLED=true` to require Bearer tokens issued by Keycloak (or any OIDC provider).
+- Provide `AUTH_ISSUER`, `ACCOUNT`, and `AUTH_JWKS_URL`; the server caches JWKS keys and rejects missing/invalid tokens.
+- Leave `AUTH_ENABLED=false` for open endpoints during local development.
diff --git a/services/template-api/bin/template-api b/services/template-api/bin/template-api
new file mode 100644
index 00000000..a96cedc4
Binary files /dev/null and b/services/template-api/bin/template-api differ
diff --git a/services/template-api/cmd/server/server.go b/services/template-api/cmd/server/server.go
new file mode 100644
index 00000000..114844fa
--- /dev/null
+++ b/services/template-api/cmd/server/server.go
@@ -0,0 +1,111 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"github.com/joho/godotenv"
+	"github.com/rs/zerolog"
+	gormlogger "gorm.io/gorm/logger"
+
+	"jan-server/services/template-api/internal/config"
+	domain "jan-server/services/template-api/internal/domain/sample"
+	"jan-server/services/template-api/internal/infrastructure/auth"
+	"jan-server/services/template-api/internal/infrastructure/database"
+	"jan-server/services/template-api/internal/infrastructure/logger"
+	"jan-server/services/template-api/internal/infrastructure/observability"
+	repo "jan-server/services/template-api/internal/infrastructure/repository/sample"
+	"jan-server/services/template-api/internal/interfaces/httpserver"
+)
+
+// @title Template API
+// @version 1.0
+// @description Reference Go microservice skeleton for Jan Server
+// @BasePath /
+type Application struct {
+	httpServer *httpserver.HTTPServer
+	log        zerolog.Logger
+}
+
+func NewApplication(httpServer *httpserver.HTTPServer, log zerolog.Logger) *Application {
+	return &Application{
+		httpServer: httpServer,
+		log:        log,
+	}
+}
+
+func (a *Application) Start(ctx context.Context) error {
+	return a.httpServer.Run(ctx)
+}
+
+func main() {
+	loadEnvFiles()
+
+	cfg, err := config.Load()
+	if err != nil {
+		panic(err)
+	}
+
+	log := logger.New(cfg)
+
+	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
+	defer stop()
+
+	shutdownTelemetry, err := observability.Setup(ctx, cfg, log)
+	if err != nil {
+		log.Fatal().Err(err).Msg("initialize observability")
+	}
+	defer func() {
+		shutdownCtx, cancel := context.WithTimeout(context.Background(), cfg.ShutdownTimeout)
+		defer cancel()
+		if err := shutdownTelemetry(shutdownCtx); err != nil {
+			log.Error().Err(err).Msg("shutdown telemetry")
+		}
+	}()
+
+	db, err := database.Connect(database.Config{
+		DSN:             cfg.DatabaseURL,
+		MaxIdleConns:    cfg.DBMaxIdleConns,
+		MaxOpenConns:    cfg.DBMaxOpenConns,
+		ConnMaxLifetime: cfg.DBConnLifetime,
+		LogLevel:        gormlogger.Warn,
+	})
+	if err != nil {
+		log.Fatal().Err(err).Msg("connect database")
+	}
+
+	if err := database.AutoMigrate(ctx, db, log); err != nil {
+		log.Fatal().Err(err).Msg("migrate database")
+	}
+
+	authValidator, err := auth.NewValidator(ctx, cfg, log)
+	if err != nil {
+		log.Fatal().Err(err).Msg("initialize auth validator")
+	}
+
+	sampleRepository := repo.NewPostgresRepository(db)
+	sampleService := domain.NewService(sampleRepository, log)
+
+	httpServer := httpserver.New(cfg, log, sampleService, authValidator)
+	app := NewApplication(httpServer, log)
+
+	if err := app.Start(ctx); err != nil {
+		log.Fatal().Err(err).Msg("application stopped with error")
+	}
+
+	log.Info().Msg("application exited cleanly")
+}
+
+func loadEnvFiles() {
+	paths := []string{".env", "../.env"}
+	for _, path := range paths {
+		if _, err := os.Stat(path); err == nil {
+			if err := godotenv.Overload(path); err != nil {
+				fmt.Fprintf(os.Stderr, "warning: failed to load %s: %v\n", path, err)
+			}
+		}
+	}
+}
diff --git a/services/template-api/cmd/server/wire.go b/services/template-api/cmd/server/wire.go
new file mode 100644
index 00000000..2c28bc32
--- /dev/null
+++ b/services/template-api/cmd/server/wire.go
@@ -0,0 +1,66 @@
+//go:build wireinject
+
+package main
+
+import (
+	"context"
+
+	"github.com/google/wire"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+	gormlogger "gorm.io/gorm/logger"
+
+	"jan-server/services/template-api/internal/config"
+	domain "jan-server/services/template-api/internal/domain/sample"
+	"jan-server/services/template-api/internal/infrastructure/auth"
+	"jan-server/services/template-api/internal/infrastructure/database"
+	"jan-server/services/template-api/internal/infrastructure/logger"
+	repo "jan-server/services/template-api/internal/infrastructure/repository/sample"
+	"jan-server/services/template-api/internal/interfaces/httpserver"
+)
+
+var sampleSet = wire.NewSet(
+	repo.NewPostgresRepository,
+	wire.Bind(new(domain.Repository), new(*repo.PostgresRepository)),
+	domain.NewService,
+)
+
+// BuildApplication demonstrates how to assemble the template service with Wire.
+func BuildApplication(ctx context.Context) (*Application, error) {
+	wire.Build(
+		config.Load,
+		logger.New,
+		newDatabaseConfig,
+		newGormDB,
+		newAuthValidator,
+		sampleSet,
+		httpserver.New,
+		NewApplication,
+	)
+	return nil, nil
+}
+
+func newDatabaseConfig(cfg *config.Config) database.Config {
+	return database.Config{
+		DSN:             cfg.DatabaseURL,
+		MaxIdleConns:    cfg.DBMaxIdleConns,
+		MaxOpenConns:    cfg.DBMaxOpenConns,
+		ConnMaxLifetime: cfg.DBConnLifetime,
+		LogLevel:        gormlogger.Warn,
+	}
+}
+
+func newGormDB(ctx context.Context, cfg database.Config, log zerolog.Logger) (*gorm.DB, error) {
+	db, err := database.Connect(cfg)
+	if err != nil {
+		return nil, err
+	}
+	if err := database.AutoMigrate(ctx, db, log); err != nil {
+		return nil, err
+	}
+	return db, nil
+}
+
+func newAuthValidator(ctx context.Context, cfg *config.Config, log zerolog.Logger) (*auth.Validator, error) {
+	return auth.NewValidator(ctx, cfg, log)
+}
diff --git a/services/template-api/cmd/server/wire_gen.go b/services/template-api/cmd/server/wire_gen.go
new file mode 100644
index 00000000..88686ad3
--- /dev/null
+++ b/services/template-api/cmd/server/wire_gen.go
@@ -0,0 +1,76 @@
+// Code generated by Wire. DO NOT EDIT.
+
+//go:generate go run -mod=mod github.com/google/wire/cmd/wire
+//go:build !wireinject
+// +build !wireinject
+
+package main
+
+import (
+	"context"
+	"github.com/google/wire"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+	logger2 "gorm.io/gorm/logger"
+	"jan-server/services/template-api/internal/config"
+	sample2 "jan-server/services/template-api/internal/domain/sample"
+	"jan-server/services/template-api/internal/infrastructure/auth"
+	"jan-server/services/template-api/internal/infrastructure/database"
+	"jan-server/services/template-api/internal/infrastructure/logger"
+	"jan-server/services/template-api/internal/infrastructure/repository/sample"
+	"jan-server/services/template-api/internal/interfaces/httpserver"
+)
+
+// Injectors from wire.go:
+
+// BuildApplication demonstrates how to assemble the template service with Wire.
+func BuildApplication(ctx context.Context) (*Application, error) {
+	configConfig, err := config.Load()
+	if err != nil {
+		return nil, err
+	}
+	zerologLogger := logger.New(configConfig)
+	databaseConfig := newDatabaseConfig(configConfig)
+	db, err := newGormDB(ctx, databaseConfig, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	postgresRepository := sample.NewPostgresRepository(db)
+	service := sample2.NewService(postgresRepository, zerologLogger)
+	validator, err := newAuthValidator(ctx, configConfig, zerologLogger)
+	if err != nil {
+		return nil, err
+	}
+	httpServer := httpserver.New(configConfig, zerologLogger, service, validator)
+	application := NewApplication(httpServer, zerologLogger)
+	return application, nil
+}
+
+// wire.go:
+
+var sampleSet = wire.NewSet(sample.NewPostgresRepository, wire.Bind(new(sample2.Repository), new(*sample.PostgresRepository)), sample2.NewService)
+
+func newDatabaseConfig(cfg *config.Config) database.Config {
+	return database.Config{
+		DSN:             cfg.DatabaseURL,
+		MaxIdleConns:    cfg.DBMaxIdleConns,
+		MaxOpenConns:    cfg.DBMaxOpenConns,
+		ConnMaxLifetime: cfg.DBConnLifetime,
+		LogLevel:        logger2.Warn,
+	}
+}
+
+func newGormDB(ctx context.Context, cfg database.Config, log zerolog.Logger) (*gorm.DB, error) {
+	db, err := database.Connect(cfg)
+	if err != nil {
+		return nil, err
+	}
+	if err := database.AutoMigrate(ctx, db, log); err != nil {
+		return nil, err
+	}
+	return db, nil
+}
+
+func newAuthValidator(ctx context.Context, cfg *config.Config, log zerolog.Logger) (*auth.Validator, error) {
+	return auth.NewValidator(ctx, cfg, log)
+}
diff --git a/services/template-api/doc.go b/services/template-api/doc.go
new file mode 100644
index 00000000..a63b7e83
--- /dev/null
+++ b/services/template-api/doc.go
@@ -0,0 +1,2 @@
+// Package templateapi provides module-level docs to satisfy tooling.
+package templateapi
diff --git a/services/template-api/docs/swagger/docs.go b/services/template-api/docs/swagger/docs.go
new file mode 100644
index 00000000..1d475142
--- /dev/null
+++ b/services/template-api/docs/swagger/docs.go
@@ -0,0 +1,22 @@
+// Package swagger provides API documentation
+package swagger
+
+// SwaggerInfo holds exported Swagger Info so clients can modify it
+var SwaggerInfo = &struct {
+	Version     string
+	Host        string
+	BasePath    string
+	Schemes     []string
+	Title       string
+	Description string
+}{
+	Version:     "1.0",
+	Host:        "",
+	BasePath:    "/",
+	Schemes:     []string{},
+	Title:       "Template API",
+	Description: "Template API Service",
+}
+
+// Placeholder for swagger documentation
+// Run 'swag init' to generate complete API documentation
diff --git a/services/template-api/go.mod b/services/template-api/go.mod
new file mode 100644
index 00000000..aef64b3b
--- /dev/null
+++ b/services/template-api/go.mod
@@ -0,0 +1,83 @@
+module jan-server/services/template-api
+
+go 1.25.0
+
+require (
+	github.com/MicahParks/keyfunc/v2 v2.1.0
+	github.com/caarlos0/env/v10 v10.0.0
+	github.com/gin-gonic/gin v1.10.0
+	github.com/golang-jwt/jwt/v5 v5.3.0
+	github.com/google/uuid v1.4.0
+	github.com/google/wire v0.7.0
+	github.com/joho/godotenv v1.5.1
+	github.com/lib/pq v1.10.9
+	github.com/rs/zerolog v1.31.0
+	github.com/swaggo/files v1.0.1
+	github.com/swaggo/gin-swagger v1.6.0
+	go.opentelemetry.io/otel v1.24.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0
+	go.opentelemetry.io/otel/sdk v1.24.0
+	gorm.io/driver/postgres v1.5.7
+	gorm.io/gorm v1.26.0
+)
+
+require (
+	github.com/KyleBanks/depth v1.2.1 // indirect
+	github.com/PuerkitoBio/purell v1.1.1 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
+	github.com/bytedance/sonic v1.11.6 // indirect
+	github.com/bytedance/sonic/loader v0.1.1 // indirect
+	github.com/cenkalti/backoff/v4 v4.2.1 // indirect
+	github.com/cloudwego/base64x v0.1.4 // indirect
+	github.com/cloudwego/iasm v0.2.0 // indirect
+	github.com/gabriel-vasile/mimetype v1.4.3 // indirect
+	github.com/gin-contrib/sse v0.1.0 // indirect
+	github.com/go-logr/logr v1.4.1 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-openapi/jsonpointer v0.19.5 // indirect
+	github.com/go-openapi/jsonreference v0.19.6 // indirect
+	github.com/go-openapi/spec v0.20.4 // indirect
+	github.com/go-openapi/swag v0.19.15 // indirect
+	github.com/go-playground/locales v0.14.1 // indirect
+	github.com/go-playground/universal-translator v0.18.1 // indirect
+	github.com/go-playground/validator/v10 v10.20.0 // indirect
+	github.com/goccy/go-json v0.10.2 // indirect
+	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 // indirect
+	github.com/jackc/pgpassfile v1.0.0 // indirect
+	github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
+	github.com/jackc/pgx/v5 v5.7.6 // indirect
+	github.com/jackc/puddle/v2 v2.2.2 // indirect
+	github.com/jinzhu/inflection v1.0.0 // indirect
+	github.com/jinzhu/now v1.1.5 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.7 // indirect
+	github.com/leodido/go-urn v1.4.0 // indirect
+	github.com/mailru/easyjson v0.7.6 // indirect
+	github.com/mattn/go-colorable v0.1.13 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
+	github.com/swaggo/swag v1.16.4 // indirect
+	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
+	github.com/ugorji/go/codec v1.2.12 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0 // indirect
+	go.opentelemetry.io/otel/metric v1.24.0 // indirect
+	go.opentelemetry.io/otel/trace v1.24.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.1.0 // indirect
+	golang.org/x/arch v0.8.0 // indirect
+	golang.org/x/crypto v0.45.0 // indirect
+	golang.org/x/net v0.47.0 // indirect
+	golang.org/x/sync v0.18.0 // indirect
+	golang.org/x/sys v0.38.0 // indirect
+	golang.org/x/text v0.31.0 // indirect
+	golang.org/x/tools v0.38.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20240102182953-50ed04b92917 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 // indirect
+	google.golang.org/grpc v1.61.1 // indirect
+	google.golang.org/protobuf v1.34.1 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)
diff --git a/services/template-api/go.sum b/services/template-api/go.sum
new file mode 100644
index 00000000..07cc1a61
--- /dev/null
+++ b/services/template-api/go.sum
@@ -0,0 +1,251 @@
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
+github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
+github.com/MicahParks/keyfunc/v2 v2.1.0 h1:6ZXKb9Rp6qp1bDbJefnG7cTH8yMN1IC/4nf+GVjO99k=
+github.com/MicahParks/keyfunc/v2 v2.1.0/go.mod h1:rW42fi+xgLJ2FRRXAfNx9ZA8WpD4OeE/yHVMteCkw9k=
+github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI=
+github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
+github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
+github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
+github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
+github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
+github.com/caarlos0/env/v10 v10.0.0 h1:yIHUBZGsyqCnpTkbjk8asUlx6RFhhEs+h7TOBdgdzXA=
+github.com/caarlos0/env/v10 v10.0.0/go.mod h1:ZfulV76NvVPw3tm591U4SwL3Xx9ldzBP9aGxzeN7G18=
+github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
+github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
+github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
+github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
+github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
+github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
+github.com/gin-contrib/gzip v0.0.6 h1:NjcunTcGAj5CO1gn4N8jHOSIeRFHIbn51z6K+xaN4d4=
+github.com/gin-contrib/gzip v0.0.6/go.mod h1:QOJlmV2xmayAjkNS2Y8NQsMneuRShOU/kjovCXNuzzk=
+github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
+github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
+github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
+github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ=
+github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
+github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
+github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs=
+github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns=
+github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M=
+github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I=
+github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
+github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
+github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
+github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
+github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
+github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
+github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
+github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
+github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
+github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
+github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
+github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
+github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
+github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
+github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
+github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
+github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
+github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/wire v0.7.0 h1:JxUKI6+CVBgCO2WToKy/nQk0sS+amI9z9EjVmdaocj4=
+github.com/google/wire v0.7.0/go.mod h1:n6YbUQD9cPKTnHXEBN2DXlOp/mVADhVErcMFb0v3J18=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 h1:Wqo399gCIufwto+VfwCSvsnfGpF/w5E9CNxSwbpD6No=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0/go.mod h1:qmOFXW2epJhM0qSnUUYpldc7gVz2KMQwJ/QYCDIa7XU=
+github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
+github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
+github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
+github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
+github.com/jackc/pgx/v5 v5.7.6 h1:rWQc5FwZSPX58r1OQmkuaNicxdmExaEz5A2DO2hUuTk=
+github.com/jackc/pgx/v5 v5.7.6/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
+github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
+github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
+github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E=
+github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc=
+github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ=
+github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8=
+github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
+github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
+github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
+github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
+github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
+github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
+github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
+github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
+github.com/mailru/easyjson v0.7.6 h1:8yTIVnZgCoiM1TgqoeTl+LfU5Jg6/xL3QhGQnimLYnA=
+github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
+github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
+github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
+github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
+github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A=
+github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
+github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg=
+github.com/swaggo/gin-swagger v1.6.0 h1:y8sxvQ3E20/RCyrXeFfg60r6H0Z+SwpTjMYsMm+zy8M=
+github.com/swaggo/gin-swagger v1.6.0/go.mod h1:BG00cCEy294xtVpyIAHG6+e2Qzj/xKlRdOqDkvq0uzo=
+github.com/swaggo/swag v1.16.4 h1:clWJtd9LStiG3VeijiCfOVODP6VpHtKdQy9ELFG3s1A=
+github.com/swaggo/swag v1.16.4/go.mod h1:VBsHJRsDvfYvqoiMKnsdwhNV9LEMHgEDZcyVYX0sxPg=
+github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
+github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
+github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
+github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo=
+go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0 h1:t6wl9SPayj+c7lEIFgm4ooDBZVb01IhLB4InpomhRw8=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.24.0/go.mod h1:iSDOcsnSA5INXzZtwaBPrKp/lWu/V14Dd+llD0oI2EA=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0 h1:Xw8U6u2f8DK2XAkGRFV7BBLENgnTGX9i4rQRxJf+/vs=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0/go.mod h1:6KW1Fm6R/s6Z3PGXwSJN2K4eT6wQB3vXX6CVnYX9NmM=
+go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI=
+go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco=
+go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucgoDw=
+go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg=
+go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI=
+go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU=
+go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI=
+go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY=
+golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
+golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
+golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q=
+golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
+golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
+golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/genproto v0.0.0-20231212172506-995d672761c0 h1:YJ5pD9rF8o9Qtta0Cmy9rdBwkSjrTCT6XTiUQVOtIos=
+google.golang.org/genproto v0.0.0-20231212172506-995d672761c0/go.mod h1:l/k7rMz0vFTBPy+tFSGvXEd3z+BcoG1k7EHbqm+YBsY=
+google.golang.org/genproto/googleapis/api v0.0.0-20240102182953-50ed04b92917 h1:rcS6EyEaoCO52hQDupoSfrxI3R6C2Tq741is7X8OvnM=
+google.golang.org/genproto/googleapis/api v0.0.0-20240102182953-50ed04b92917/go.mod h1:CmlNWB9lSezaYELKS5Ym1r44VrrbPUa7JTvw+6MbpJ0=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917 h1:6G8oQ016D88m1xAKljMlBOOGWDZkes4kMhgGFlf8WcQ=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240102182953-50ed04b92917/go.mod h1:xtjpI3tXFPP051KaWnhvxkiubL/6dJ18vLVf7q2pTOU=
+google.golang.org/grpc v1.61.1 h1:kLAiWrZs7YeDM6MumDe7m3y4aM6wacLzM1Y/wiLP9XY=
+google.golang.org/grpc v1.61.1/go.mod h1:VUbo7IFqmF1QtCAstipjG0GIoq49KvMe9+h1jFLBNJs=
+google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
+google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
+google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
+google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gorm.io/driver/postgres v1.5.7 h1:8ptbNJTDbEmhdr62uReG5BGkdQyeasu/FZHxI0IMGnM=
+gorm.io/driver/postgres v1.5.7/go.mod h1:3e019WlBaYI5o5LIdNV+LyxCMNtLOQETBXL2h4chKpA=
+gorm.io/gorm v1.26.0 h1:9lqQVPG5aNNS6AyHdRiwScAVnXHg/L/Srzx55G5fOgs=
+gorm.io/gorm v1.26.0/go.mod h1:8Z33v652h4//uMA76KjeDH8mJXPm1QNCYrMeatR0DOE=
+nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/services/template-api/internal/config/config.go b/services/template-api/internal/config/config.go
new file mode 100644
index 00000000..e314d5c1
--- /dev/null
+++ b/services/template-api/internal/config/config.go
@@ -0,0 +1,77 @@
+package config
+
+import (
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/caarlos0/env/v10"
+)
+
+// Config holds the environment driven configuration for the template service.
+//
+// NOTE: This service uses the traditional env-based approach for demonstration.
+// For new services, consider using the central configuration system at pkg/config
+// which provides:
+//   - Type-safe configuration with validation
+//   - YAML defaults with env var overrides
+//   - Automatic documentation generation
+//   - Kubernetes values generation
+//   - Configuration provenance tracking
+//
+// See docs/configuration/ for migration guide and examples.
+type Config struct {
+	ServiceName     string        `env:"SERVICE_NAME" envDefault:"template-api"`
+	Environment     string        `env:"ENVIRONMENT" envDefault:"development"`
+	HTTPPort        int           `env:"HTTP_PORT" envDefault:"8185"`
+	LogLevel        string        `env:"LOG_LEVEL" envDefault:"info"`
+	EnableTracing   bool          `env:"ENABLE_TRACING" envDefault:"false"`
+	OTLPEndpoint    string        `env:"OTEL_EXPORTER_OTLP_ENDPOINT" envDefault:""`
+	ShutdownTimeout time.Duration `env:"SHUTDOWN_TIMEOUT" envDefault:"10s"`
+	DatabaseURL     string        `env:"DB_POSTGRESQL_WRITE_DSN" envDefault:"postgres://postgres:postgres@localhost:5432/template_api?sslmode=disable"`
+	DBMaxIdleConns  int           `env:"DB_MAX_IDLE_CONNS" envDefault:"5"`
+	DBMaxOpenConns  int           `env:"DB_MAX_OPEN_CONNS" envDefault:"15"`
+	DBConnLifetime  time.Duration `env:"DB_CONN_MAX_LIFETIME" envDefault:"30m"`
+	AuthEnabled     bool          `env:"AUTH_ENABLED" envDefault:"false"`
+	AuthIssuer      string        `env:"AUTH_ISSUER"`
+	Account         string        `env:"ACCOUNT"`
+	AuthJWKSURL     string        `env:"AUTH_JWKS_URL"`
+}
+
+// Load parses environment variables into Config.
+//
+// Configuration Loading Order (highest to lowest priority):
+// 1. Environment variables
+// 2. .env file (if present)
+// 3. Default values from struct tags
+//
+// For production deployments, environment variables should be set via:
+//   - Docker Compose (docker-compose.yml env_file or environment)
+//   - Kubernetes ConfigMaps/Secrets
+//   - System environment variables
+func Load() (*Config, error) {
+	cfg := &Config{}
+	if err := env.Parse(cfg); err != nil {
+		return nil, fmt.Errorf("parse env config: %w", err)
+	}
+
+	// Validate auth configuration
+	if cfg.AuthEnabled {
+		if strings.TrimSpace(cfg.AuthIssuer) == "" {
+			return nil, fmt.Errorf("AUTH_ISSUER is required when AUTH_ENABLED is true")
+		}
+		if strings.TrimSpace(cfg.Account) == "" {
+			return nil, fmt.Errorf("ACCOUNT is required when AUTH_ENABLED is true")
+		}
+		if strings.TrimSpace(cfg.AuthJWKSURL) == "" {
+			return nil, fmt.Errorf("AUTH_JWKS_URL is required when AUTH_ENABLED is true")
+		}
+	}
+
+	return cfg, nil
+}
+
+// Addr returns the HTTP listen address.
+func (c *Config) Addr() string {
+	return fmt.Sprintf(":%d", c.HTTPPort)
+}
diff --git a/services/template-api/internal/domain/sample/model.go b/services/template-api/internal/domain/sample/model.go
new file mode 100644
index 00000000..e39af362
--- /dev/null
+++ b/services/template-api/internal/domain/sample/model.go
@@ -0,0 +1,7 @@
+package sample
+
+// Sample represents a domain entity returned by the sample use case.
+type Sample struct {
+	ID      string `json:"id"`
+	Message string `json:"message"`
+}
diff --git a/services/template-api/internal/domain/sample/repository.go b/services/template-api/internal/domain/sample/repository.go
new file mode 100644
index 00000000..a4f21749
--- /dev/null
+++ b/services/template-api/internal/domain/sample/repository.go
@@ -0,0 +1,8 @@
+package sample
+
+import "context"
+
+// Repository exposes data access for Sample entities.
+type Repository interface {
+	FetchLatest(ctx context.Context) (Sample, error)
+}
diff --git a/services/template-api/internal/domain/sample/service.go b/services/template-api/internal/domain/sample/service.go
new file mode 100644
index 00000000..d4e0988d
--- /dev/null
+++ b/services/template-api/internal/domain/sample/service.go
@@ -0,0 +1,34 @@
+package sample
+
+import (
+	"context"
+
+	"github.com/rs/zerolog"
+)
+
+// Service describes the business logic surface for sample operations.
+type Service interface {
+	GetSample(ctx context.Context) (Sample, error)
+}
+
+type service struct {
+	repo Repository
+	log  zerolog.Logger
+}
+
+// NewService wires the sample service with its repository.
+func NewService(repo Repository, log zerolog.Logger) Service {
+	return &service{
+		repo: repo,
+		log:  log.With().Str("component", "sample-service").Logger(),
+	}
+}
+
+func (s *service) GetSample(ctx context.Context) (Sample, error) {
+	result, err := s.repo.FetchLatest(ctx)
+	if err != nil {
+		s.log.Error().Err(err).Msg("fetch latest sample")
+		return Sample{}, err
+	}
+	return result, nil
+}
diff --git a/services/template-api/internal/infrastructure/auth/auth.go b/services/template-api/internal/infrastructure/auth/auth.go
new file mode 100644
index 00000000..913ade97
--- /dev/null
+++ b/services/template-api/internal/infrastructure/auth/auth.go
@@ -0,0 +1,96 @@
+package auth
+
+import (
+	"context"
+	"net/http"
+	"strings"
+	"time"
+
+	"github.com/MicahParks/keyfunc/v2"
+	"github.com/gin-gonic/gin"
+	"github.com/golang-jwt/jwt/v5"
+	"github.com/rs/zerolog"
+
+	"jan-server/services/template-api/internal/config"
+)
+
+// Validator validates JWTs using JWKS.
+type Validator struct {
+	cfg  *config.Config
+	log  zerolog.Logger
+	jwks *keyfunc.JWKS
+}
+
+// NewValidator initializes JWKS fetching when auth is enabled.
+func NewValidator(ctx context.Context, cfg *config.Config, log zerolog.Logger) (*Validator, error) {
+	if !cfg.AuthEnabled {
+		return &Validator{cfg: cfg, log: log}, nil
+	}
+
+	options := keyfunc.Options{
+		Ctx:               ctx,
+		RefreshInterval:   time.Hour,
+		RefreshUnknownKID: true,
+		RefreshErrorHandler: func(err error) {
+			log.Error().Err(err).Msg("jwks refresh error")
+		},
+	}
+
+	jwks, err := keyfunc.Get(cfg.AuthJWKSURL, options)
+	if err != nil {
+		return nil, err
+	}
+
+	return &Validator{
+		cfg:  cfg,
+		log:  log,
+		jwks: jwks,
+	}, nil
+}
+
+// Middleware enforces JWT auth when enabled.
+func (v *Validator) Middleware() gin.HandlerFunc {
+	if v == nil || !v.cfg.AuthEnabled {
+		return func(c *gin.Context) {
+			c.Next()
+		}
+	}
+
+	return func(c *gin.Context) {
+		tokenString := bearerToken(c.GetHeader("Authorization"))
+		if tokenString == "" {
+			abortUnauthorized(c, "missing bearer token")
+			return
+		}
+
+		token, err := jwt.Parse(tokenString, v.jwks.Keyfunc,
+			jwt.WithAudience(v.cfg.Account),
+			jwt.WithIssuer(v.cfg.AuthIssuer),
+			jwt.WithValidMethods([]string{"RS256", "RS384", "RS512"}),
+		)
+		if err != nil || !token.Valid {
+			abortUnauthorized(c, "invalid token")
+			return
+		}
+
+		c.Set("auth_token", token)
+		c.Next()
+	}
+}
+
+func bearerToken(header string) string {
+	if header == "" {
+		return ""
+	}
+	parts := strings.SplitN(header, " ", 2)
+	if len(parts) != 2 || !strings.EqualFold(parts[0], "Bearer") {
+		return ""
+	}
+	return strings.TrimSpace(parts[1])
+}
+
+func abortUnauthorized(c *gin.Context, message string) {
+	c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{
+		"error": message,
+	})
+}
diff --git a/services/template-api/internal/infrastructure/database/database.go b/services/template-api/internal/infrastructure/database/database.go
new file mode 100644
index 00000000..9be433e9
--- /dev/null
+++ b/services/template-api/internal/infrastructure/database/database.go
@@ -0,0 +1,105 @@
+package database
+
+import (
+	"database/sql"
+	"errors"
+	"fmt"
+	"net/url"
+	"strings"
+	"time"
+
+	_ "github.com/lib/pq"
+	"gorm.io/driver/postgres"
+	"gorm.io/gorm"
+	gormlogger "gorm.io/gorm/logger"
+	"gorm.io/gorm/schema"
+)
+
+// Config controls GORM/PostgreSQL connectivity.
+type Config struct {
+	DSN             string
+	MaxIdleConns    int
+	MaxOpenConns    int
+	ConnMaxLifetime time.Duration
+	LogLevel        gormlogger.LogLevel
+}
+
+// Connect initializes a GORM connection using the provided config.
+func Connect(cfg Config) (*gorm.DB, error) {
+	if cfg.DSN == "" {
+		return nil, fmt.Errorf("database DSN is empty")
+	}
+
+	if err := ensureDatabaseExists(cfg.DSN); err != nil {
+		return nil, fmt.Errorf("ensure database: %w", err)
+	}
+
+	if cfg.LogLevel == 0 {
+		cfg.LogLevel = gormlogger.Warn
+	}
+
+	db, err := gorm.Open(postgres.Open(cfg.DSN), &gorm.Config{
+		PrepareStmt: true,
+		NamingStrategy: schema.NamingStrategy{
+			SingularTable: true,
+		},
+		Logger: gormlogger.Default.LogMode(cfg.LogLevel),
+	})
+	if err != nil {
+		return nil, fmt.Errorf("connect database: %w", err)
+	}
+
+	sqlDB, err := db.DB()
+	if err != nil {
+		return nil, fmt.Errorf("retrieve sql db: %w", err)
+	}
+
+	if cfg.MaxIdleConns > 0 {
+		sqlDB.SetMaxIdleConns(cfg.MaxIdleConns)
+	}
+	if cfg.MaxOpenConns > 0 {
+		sqlDB.SetMaxOpenConns(cfg.MaxOpenConns)
+	}
+	if cfg.ConnMaxLifetime > 0 {
+		sqlDB.SetConnMaxLifetime(cfg.ConnMaxLifetime)
+	}
+
+	return db, nil
+}
+
+func ensureDatabaseExists(dsn string) error {
+	parsedURL, err := url.Parse(dsn)
+	if err != nil {
+		return nil // non-URL formats are ignored
+	}
+
+	dbName := strings.TrimPrefix(parsedURL.Path, "/")
+	if dbName == "" || dbName == "postgres" {
+		return nil
+	}
+
+	adminURL := *parsedURL
+	adminURL.Path = "/postgres"
+
+	sqlDB, err := sql.Open("postgres", adminURL.String())
+	if err != nil {
+		return err
+	}
+	defer sqlDB.Close()
+
+	var exists bool
+	err = sqlDB.QueryRow("SELECT EXISTS (SELECT 1 FROM pg_database WHERE datname = $1)", dbName).Scan(&exists)
+	if err != nil && !errors.Is(err, sql.ErrNoRows) {
+		return err
+	}
+	if exists {
+		return nil
+	}
+
+	_, err = sqlDB.Exec("CREATE DATABASE " + pqQuoteIdentifier(dbName))
+	return err
+}
+
+func pqQuoteIdentifier(ident string) string {
+	return `"` + strings.ReplaceAll(ident, `"`, `""`) + `"`
+}
diff --git a/services/template-api/internal/infrastructure/database/entities/sample.go b/services/template-api/internal/infrastructure/database/entities/sample.go
new file mode 100644
index 00000000..1aa7ce42
--- /dev/null
+++ b/services/template-api/internal/infrastructure/database/entities/sample.go
@@ -0,0 +1,15 @@
+package entities
+
+import "time"
+
+// Sample models the persisted representation of the sample domain entity.
+type Sample struct {
+	ID        string    `gorm:"type:uuid;primaryKey"`
+	Message   string    `gorm:"type:text;not null"`
+	CreatedAt time.Time `gorm:"autoCreateTime"`
+	UpdatedAt time.Time `gorm:"autoUpdateTime"`
+}
+
+func (Sample) TableName() string {
+	return "samples"
+}
diff --git a/services/template-api/internal/infrastructure/database/migrate.go b/services/template-api/internal/infrastructure/database/migrate.go
new file mode 100644
index 00000000..4a1883aa
--- /dev/null
+++ b/services/template-api/internal/infrastructure/database/migrate.go
@@ -0,0 +1,38 @@
+package database
+
+import (
+	"context"
+
+	"github.com/google/uuid"
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+
+	"jan-server/services/template-api/internal/infrastructure/database/entities"
+)
+
+// AutoMigrate applies database schema changes and seeds baseline rows.
+func AutoMigrate(ctx context.Context, db *gorm.DB, log zerolog.Logger) error {
+	if err := db.WithContext(ctx).AutoMigrate(&entities.Sample{}); err != nil {
+		return err
+	}
+
+	var count int64
+	if err := db.Model(&entities.Sample{}).Count(&count).Error; err != nil {
+		return err
+	}
+
+	if count == 0 {
+		defaultSample := entities.Sample{
+			ID:      uuid.NewString(),
+			Message: "Hello from PostgreSQL! Replace this with your own repository logic.",
+		}
+		if err := db.Create(&defaultSample).Error; err != nil {
+			return err
+		}
+		log.Info().Str("sample_id", defaultSample.ID).Msg("seeded default sample row")
+	} else {
+		log.Debug().Int64("rows", count).Msg("sample table already seeded")
+	}
+
+	return nil
+}
diff --git a/services/template-api/internal/infrastructure/logger/logger.go b/services/template-api/internal/infrastructure/logger/logger.go
new file mode 100644
index 00000000..23f7100d
--- /dev/null
+++ b/services/template-api/internal/infrastructure/logger/logger.go
@@ -0,0 +1,40 @@
+package logger
+
+import (
+	"os"
+	"strings"
+	"time"
+
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+
+	"jan-server/services/template-api/internal/config"
+)
+
+// New creates a zerolog.Logger configured for the template service.
+func New(cfg *config.Config) zerolog.Logger {
+	level := parseLevel(cfg.LogLevel)
+	output := zerolog.ConsoleWriter{
+		Out:        os.Stdout,
+		TimeFormat: time.RFC3339,
+	}
+	base := log.Output(output).
+		With().
+		Timestamp().
+		Str("service", cfg.ServiceName).
+		Str("environment", cfg.Environment).
+		Logger().
+		Level(level)
+	return base
+}
+
+func parseLevel(levelString string) zerolog.Level {
+	if levelString == "" {
+		return zerolog.InfoLevel
+	}
+	level, err := zerolog.ParseLevel(strings.ToLower(levelString))
+	if err != nil {
+		return zerolog.InfoLevel
+	}
+	return level
+}
diff --git a/services/template-api/internal/infrastructure/observability/observability.go b/services/template-api/internal/infrastructure/observability/observability.go
new file mode 100644
index 00000000..f18d27ea
--- /dev/null
+++ b/services/template-api/internal/infrastructure/observability/observability.go
@@ -0,0 +1,56 @@
+package observability
+
+import (
+	"context"
+
+	"github.com/rs/zerolog"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
+	"go.opentelemetry.io/otel/sdk/resource"
+	sdktrace "go.opentelemetry.io/otel/sdk/trace"
+	semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
+
+	"jan-server/services/template-api/internal/config"
+)
+
+// Shutdown is a function that releases telemetry resources.
+type Shutdown func(ctx context.Context) error
+
+// Setup configures OpenTelemetry tracing if enabled.
+func Setup(ctx context.Context, cfg *config.Config, log zerolog.Logger) (Shutdown, error) {
+	if !cfg.EnableTracing || cfg.OTLPEndpoint == "" {
+		log.Info().Msg("Tracing disabled")
+		return func(context.Context) error { return nil }, nil
+	}
+
+	exporter, err := otlptracehttp.New(ctx,
+		otlptracehttp.WithEndpoint(cfg.OTLPEndpoint),
+		otlptracehttp.WithInsecure(),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	res, err := resource.New(ctx,
+		resource.WithAttributes(
+			semconv.ServiceName(cfg.ServiceName),
+			semconv.DeploymentEnvironment(cfg.Environment),
+		),
+	)
+	if err != nil {
+		return nil, err
+	}
+
+	tp := sdktrace.NewTracerProvider(
+		sdktrace.WithSampler(sdktrace.AlwaysSample()),
+		sdktrace.WithBatcher(exporter),
+		sdktrace.WithResource(res),
+	)
+	otel.SetTracerProvider(tp)
+
+	log.Info().Str("endpoint", cfg.OTLPEndpoint).Msg("Tracing enabled")
+
+	return func(ctx context.Context) error {
+		return tp.Shutdown(ctx)
+	}, nil
+}
diff --git a/services/template-api/internal/infrastructure/repository/sample/inmemory_repository.go b/services/template-api/internal/infrastructure/repository/sample/inmemory_repository.go
new file mode 100644
index 00000000..9ba804e3
--- /dev/null
+++ b/services/template-api/internal/infrastructure/repository/sample/inmemory_repository.go
@@ -0,0 +1,43 @@
+package sample
+
+import (
+	"context"
+	"errors"
+	"sync"
+
+	domain "jan-server/services/template-api/internal/domain/sample"
+)
+
+// InMemoryRepository is a thread-safe repository useful for demos/tests.
+type InMemoryRepository struct {
+	mu      sync.RWMutex
+	entries []domain.Sample
+}
+
+// NewInMemoryRepository seeds demo data.
+func NewInMemoryRepository() *InMemoryRepository {
+	return &InMemoryRepository{
+		entries: []domain.Sample{
+			{ID: "sample-1", Message: "Hello from the repository layer"},
+			{ID: "sample-2", Message: "Customize this implementation for real data sources"},
+		},
+	}
+}
+
+// FetchLatest returns the most recent entry.
+func (r *InMemoryRepository) FetchLatest(ctx context.Context) (domain.Sample, error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	if len(r.entries) == 0 {
+		return domain.Sample{}, errors.New("no sample data available")
+	}
+	return r.entries[len(r.entries)-1], nil
+}
+
+// Store adds a new entry (optional helper if you extend the sample).
+func (r *InMemoryRepository) Store(ctx context.Context, sample domain.Sample) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.entries = append(r.entries, sample)
+}
diff --git a/services/template-api/internal/infrastructure/repository/sample/postgres_repository.go b/services/template-api/internal/infrastructure/repository/sample/postgres_repository.go
new file mode 100644
index 00000000..0e0a4026
--- /dev/null
+++ b/services/template-api/internal/infrastructure/repository/sample/postgres_repository.go
@@ -0,0 +1,53 @@
+package sample
+
+import (
+	"context"
+	"errors"
+
+	"gorm.io/gorm"
+
+	domain "jan-server/services/template-api/internal/domain/sample"
+	"jan-server/services/template-api/internal/infrastructure/database/entities"
+	"jan-server/services/template-api/internal/utils/platformerrors"
+)
+
+// PostgresRepository persists samples via PostgreSQL using GORM.
+type PostgresRepository struct {
+	db *gorm.DB
+}
+
+// NewPostgresRepository creates a repository backed by the provided DB.
+func NewPostgresRepository(db *gorm.DB) *PostgresRepository {
+	return &PostgresRepository{db: db}
+}
+
+// FetchLatest returns the most recently created sample row.
+func (r *PostgresRepository) FetchLatest(ctx context.Context) (domain.Sample, error) {
+	var record entities.Sample
+	err := r.db.WithContext(ctx).Order("created_at DESC").First(&record).Error
+	if err != nil {
+		if errors.Is(err, gorm.ErrRecordNotFound) {
+			return domain.Sample{}, platformerrors.NewError(
+				ctx,
+				platformerrors.LayerRepository,
+				platformerrors.ErrorTypeNotFound,
+				"no sample records found",
+				err,
+				"3e4f5a6b-7c8d-4e9f-0a1b-2c3d4e5f6a7b",
+			)
+		}
+		return domain.Sample{}, platformerrors.NewError(
+			ctx,
+			platformerrors.LayerRepository,
+			platformerrors.ErrorTypeDatabaseError,
+			"failed to fetch latest sample",
+			err,
+			"4f5a6b7c-8d9e-4f0a-1b2c-3d4e5f6a7b8c",
+		)
+	}
+
+	return domain.Sample{
+		ID:      record.ID,
+		Message: record.Message,
+	}, nil
+}
diff --git a/services/template-api/internal/interfaces/httpserver/handlers/provider.go b/services/template-api/internal/interfaces/httpserver/handlers/provider.go
new file mode 100644
index 00000000..e1741a52
--- /dev/null
+++ b/services/template-api/internal/interfaces/httpserver/handlers/provider.go
@@ -0,0 +1,17 @@
+package handlers
+
+import (
+	domain "jan-server/services/template-api/internal/domain/sample"
+)
+
+// Provider wires all HTTP handlers for dependency injection.
+type Provider struct {
+	Sample *SampleHandler
+}
+
+// NewProvider constructs the handler provider with domain services.
+func NewProvider(sampleService domain.Service) *Provider {
+	return &Provider{
+		Sample: NewSampleHandler(sampleService),
+	}
+}
diff --git a/services/template-api/internal/interfaces/httpserver/handlers/sample_handler.go b/services/template-api/internal/interfaces/httpserver/handlers/sample_handler.go
new file mode 100644
index 00000000..32efc17e
--- /dev/null
+++ b/services/template-api/internal/interfaces/httpserver/handlers/sample_handler.go
@@ -0,0 +1,24 @@
+package handlers
+
+import (
+	"context"
+
+	domain "jan-server/services/template-api/internal/domain/sample"
+)
+
+// SampleHandler invokes domain logic for sample use cases.
+type SampleHandler struct {
+	service domain.Service
+}
+
+// NewSampleHandler wires dependencies for sample routes.
+func NewSampleHandler(service domain.Service) *SampleHandler {
+	return &SampleHandler{
+		service: service,
+	}
+}
+
+// GetSample executes the domain use case and returns the response.
+func (h *SampleHandler) GetSample(ctx context.Context) (domain.Sample, error) {
+	return h.service.GetSample(ctx)
+}
diff --git a/services/template-api/internal/interfaces/httpserver/http_server.go b/services/template-api/internal/interfaces/httpserver/http_server.go
new file mode 100644
index 00000000..1eba23b8
--- /dev/null
+++ b/services/template-api/internal/interfaces/httpserver/http_server.go
@@ -0,0 +1,109 @@
+package httpserver
+
+import (
+	"context"
+	"errors"
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+	"github.com/rs/zerolog"
+	swaggerFiles "github.com/swaggo/files"
+	ginSwagger "github.com/swaggo/gin-swagger"
+
+	templateapidocs "jan-server/services/template-api/docs/swagger"
+	"jan-server/services/template-api/internal/config"
+	domain "jan-server/services/template-api/internal/domain/sample"
+	"jan-server/services/template-api/internal/infrastructure/auth"
+	"jan-server/services/template-api/internal/interfaces/httpserver/handlers"
+	"jan-server/services/template-api/internal/interfaces/httpserver/routes"
+)
+
+// HTTPServer wraps the gin engine with graceful shutdown helpers.
+type HTTPServer struct {
+	cfg         *config.Config
+	engine      *gin.Engine
+	log         zerolog.Logger
+	handlerProv *handlers.Provider
+	routeProv   *routes.Provider
+}
+
+// New constructs the HTTP server with default middleware and routes.
+func New(cfg *config.Config, log zerolog.Logger, sampleService domain.Service, authValidator *auth.Validator) *HTTPServer {
+	if cfg.Environment == "production" {
+		gin.SetMode(gin.ReleaseMode)
+	}
+	templateapidocs.SwaggerInfo.BasePath = "/"
+
+	engine := gin.New()
+	engine.Use(gin.Recovery())
+	engine.Use(gin.Logger())
+	if authValidator != nil {
+		engine.Use(authValidator.Middleware())
+	}
+	handlerProvider := handlers.NewProvider(sampleService)
+	routeProvider := routes.NewProvider(handlerProvider)
+	registerCoreRoutes(engine, cfg, routeProvider)
+
+	return &HTTPServer{
+		cfg:         cfg,
+		engine:      engine,
+		log:         log,
+		handlerProv: handlerProvider,
+		routeProv:   routeProvider,
+	}
+}
+
+// Run starts the HTTP listener and handles graceful shutdown via context cancellation.
+func (s *HTTPServer) Run(ctx context.Context) error {
+	server := &http.Server{
+		Addr:    s.cfg.Addr(),
+		Handler: s.engine,
+	}
+
+	errCh := make(chan error, 1)
+	go func() {
+		s.log.Info().Str("addr", s.cfg.Addr()).Msg("HTTP server listening")
+		err := server.ListenAndServe()
+		if err != nil && !errors.Is(err, http.ErrServerClosed) {
+			s.log.Error().Err(err).Msg("HTTP server error")
+			errCh <- err
+			return
+		}
+		errCh <- nil
+	}()
+
+	select {
+	case <-ctx.Done():
+		s.log.Info().Msg("Context cancelled, shutting down HTTP server")
+	case err := <-errCh:
+		return err
+	}
+
+	shutdownCtx, cancel := context.WithTimeout(context.Background(), s.cfg.ShutdownTimeout)
+	defer cancel()
+	if err := server.Shutdown(shutdownCtx); err != nil {
+		return err
+	}
+	return nil
+}
+
+func registerCoreRoutes(engine *gin.Engine, cfg *config.Config, routeProvider *routes.Provider) {
+	engine.GET("/", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{
+			"service": cfg.ServiceName,
+			"status":  "ok",
+		})
+	})
+
+	engine.GET("/healthz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "healthy"})
+	})
+
+	engine.GET("/readyz", func(c *gin.Context) {
+		c.JSON(http.StatusOK, gin.H{"status": "ready"})
+	})
+
+	engine.GET("/swagger/*any", ginSwagger.WrapHandler(swaggerFiles.Handler))
+
+	routeProvider.Register(engine)
+}
diff --git a/services/template-api/internal/interfaces/httpserver/middlewares/middlewares.go b/services/template-api/internal/interfaces/httpserver/middlewares/middlewares.go
new file mode 100644
index 00000000..44cc7bfe
--- /dev/null
+++ b/services/template-api/internal/interfaces/httpserver/middlewares/middlewares.go
@@ -0,0 +1,28 @@
+package middlewares
+
+import (
+	"github.com/gin-gonic/gin"
+)
+
+// CORS middleware for handling cross-origin requests
+func CORS() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		c.Writer.Header().Set("Access-Control-Allow-Origin", "*")
+		c.Writer.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS")
+		c.Writer.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
+
+		if c.Request.Method == "OPTIONS" {
+			c.AbortWithStatus(204)
+			return
+		}
+
+		c.Next()
+	}
+}
+
+// RequestLogger logs incoming requests
+func RequestLogger() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		c.Next()
+	}
+}
diff --git a/services/template-api/internal/interfaces/httpserver/requests/requests.go b/services/template-api/internal/interfaces/httpserver/requests/requests.go
new file mode 100644
index 00000000..8391faf8
--- /dev/null
+++ b/services/template-api/internal/interfaces/httpserver/requests/requests.go
@@ -0,0 +1,4 @@
+package requests
+
+// Placeholder for future request DTOs
+// Add request structures as the API evolves
diff --git a/services/template-api/internal/interfaces/httpserver/responses/responses.go b/services/template-api/internal/interfaces/httpserver/responses/responses.go
new file mode 100644
index 00000000..559faa71
--- /dev/null
+++ b/services/template-api/internal/interfaces/httpserver/responses/responses.go
@@ -0,0 +1,63 @@
+package responses
+
+import (
+	"errors"
+	"net/http"
+
+	"jan-server/services/template-api/internal/utils/platformerrors"
+
+	"github.com/gin-gonic/gin"
+)
+
+// ErrorResponse represents an error response with platform error details
+type ErrorResponse struct {
+	Code          string `json:"code"` // UUID from PlatformError
+	Error         string `json:"error"`
+	Message       string `json:"message,omitempty"`
+	ErrorInstance error  `json:"-"`
+	RequestID     string `json:"request_id,omitempty"`
+}
+
+// HandleError handles domain errors and returns appropriate HTTP responses
+func HandleError(reqCtx *gin.Context, err error, message string) {
+	var domainErr *platformerrors.PlatformError
+	if errors.As(err, &domainErr) {
+		statusCode := platformerrors.ErrorTypeToHTTPStatus(domainErr.GetErrorType())
+
+		errResp := ErrorResponse{
+			Code:          domainErr.GetUUID(),
+			Error:         message,
+			Message:       message,
+			ErrorInstance: domainErr,
+			RequestID:     domainErr.GetRequestID(),
+		}
+
+		reqCtx.AbortWithStatusJSON(statusCode, errResp)
+		return
+	}
+	// Non-platform errors
+	errResp := ErrorResponse{
+		Error:         message,
+		Message:       message,
+		ErrorInstance: err,
+	}
+	reqCtx.AbortWithStatusJSON(http.StatusInternalServerError, errResp)
+}
+
+// HandleNewError creates a new typed error at the route layer and handles it
+func HandleNewError(reqCtx *gin.Context, errorType platformerrors.ErrorType, message string, uuid string) {
+	ctx := reqCtx.Request.Context()
+	err := platformerrors.NewError(ctx, platformerrors.LayerRoute, errorType, message, nil, uuid)
+
+	statusCode := platformerrors.ErrorTypeToHTTPStatus(err.GetErrorType())
+
+	errResp := ErrorResponse{
+		Code:          err.GetUUID(),
+		Error:         message,
+		Message:       message,
+		ErrorInstance: err,
+		RequestID:     err.GetRequestID(),
+	}
+
+	reqCtx.AbortWithStatusJSON(statusCode, errResp)
+}
diff --git a/services/template-api/internal/interfaces/httpserver/routes/provider.go b/services/template-api/internal/interfaces/httpserver/routes/provider.go
new file mode 100644
index 00000000..aaee23e6
--- /dev/null
+++ b/services/template-api/internal/interfaces/httpserver/routes/provider.go
@@ -0,0 +1,25 @@
+package routes
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/template-api/internal/interfaces/httpserver/handlers"
+	v1 "jan-server/services/template-api/internal/interfaces/httpserver/routes/v1"
+)
+
+// Provider coordinates all route registrations.
+type Provider struct {
+	V1 *v1.Routes
+}
+
+// NewProvider constructs the route provider.
+func NewProvider(handlerProvider *handlers.Provider) *Provider {
+	return &Provider{
+		V1: v1.NewRoutes(handlerProvider),
+	}
+}
+
+// Register attaches all available routes to the gin engine.
+func (p *Provider) Register(engine *gin.Engine) {
+	p.V1.Register(engine)
+}
diff --git a/services/template-api/internal/interfaces/httpserver/routes/v1/routes.go b/services/template-api/internal/interfaces/httpserver/routes/v1/routes.go
new file mode 100644
index 00000000..6ad643ee
--- /dev/null
+++ b/services/template-api/internal/interfaces/httpserver/routes/v1/routes.go
@@ -0,0 +1,25 @@
+package v1
+
+import (
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/template-api/internal/interfaces/httpserver/handlers"
+)
+
+// Routes encapsulates versioned route registration.
+type Routes struct {
+	handlers *handlers.Provider
+}
+
+// NewRoutes builds the v1 route registrar.
+func NewRoutes(handlerProvider *handlers.Provider) *Routes {
+	return &Routes{
+		handlers: handlerProvider,
+	}
+}
+
+// Register attaches all v1 routes under /v1 prefix.
+func (r *Routes) Register(engine *gin.Engine) {
+	group := engine.Group("/v1")
+	registerSampleRoutes(group, r.handlers.Sample)
+}
diff --git a/services/template-api/internal/interfaces/httpserver/routes/v1/sample_routes.go b/services/template-api/internal/interfaces/httpserver/routes/v1/sample_routes.go
new file mode 100644
index 00000000..66a42244
--- /dev/null
+++ b/services/template-api/internal/interfaces/httpserver/routes/v1/sample_routes.go
@@ -0,0 +1,41 @@
+package v1
+
+import (
+	"net/http"
+
+	"github.com/gin-gonic/gin"
+
+	"jan-server/services/template-api/internal/interfaces/httpserver/handlers"
+)
+
+type sampleResponse struct {
+	ID      string `json:"id" example:"sample-2"`
+	Message string `json:"message" example:"Customize this implementation for real data sources"`
+}
+
+type errorResponse struct {
+	Error string `json:"error"`
+}
+
+func registerSampleRoutes(router gin.IRoutes, handler *handlers.SampleHandler) {
+	router.GET("/sample", getSample(handler))
+}
+
+// getSample godoc
+// @Summary      Fetch sample payload
+// @Description  Demonstrates route -> handler -> domain -> repository wiring.
+// @Tags         sample
+// @Produce      json
+// @Success      200  {object}  sampleResponse
+// @Failure      500  {object}  errorResponse
+// @Router       /v1/sample [get]
+func getSample(handler *handlers.SampleHandler) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		result, err := handler.GetSample(c.Request.Context())
+		if err != nil {
+			c.JSON(http.StatusInternalServerError, errorResponse{Error: err.Error()})
+			return
+		}
+		c.JSON(http.StatusOK, result)
+	}
+}
diff --git a/services/template-api/internal/utils/platformerrors/errors.go b/services/template-api/internal/utils/platformerrors/errors.go
new file mode 100644
index 00000000..d47c0709
--- /dev/null
+++ b/services/template-api/internal/utils/platformerrors/errors.go
@@ -0,0 +1,210 @@
+package platformerrors
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/rs/zerolog"
+	"gorm.io/gorm"
+)
+
+// getRequestIDFromContext extracts request ID from context
+func getRequestIDFromContext(ctx context.Context) string {
+	val := ctx.Value("requestID")
+	if requestID, ok := val.(string); ok {
+		return requestID
+	}
+	return ""
+}
+
+// ErrorType represents the category of error
+type ErrorType string
+
+const (
+	ErrorTypeNotFound       ErrorType = "NOT_FOUND"
+	ErrorTypeTooManyRecords ErrorType = "TOO_MANY_RECORDS"
+	ErrorTypeValidation     ErrorType = "VALIDATION"
+	ErrorTypeConflict       ErrorType = "CONFLICT"
+	ErrorTypeUnauthorized   ErrorType = "UNAUTHORIZED"
+	ErrorTypeForbidden      ErrorType = "FORBIDDEN"
+	ErrorTypeInternal       ErrorType = "INTERNAL"
+	ErrorTypeExternal       ErrorType = "EXTERNAL"
+	ErrorTypeDatabaseError  ErrorType = "DATABASE_ERROR"
+	ErrorTypeNotImplemented ErrorType = "NOT_IMPLEMENTED"
+)
+
+// Layer represents the application layer where the error occurred
+type Layer string
+
+const (
+	LayerRepository     Layer = "repository"
+	LayerDomain         Layer = "domain"
+	LayerHandler        Layer = "handler"
+	LayerRoute          Layer = "route"
+	LayerInfrastructure Layer = "infrastructure"
+	LayerCommon         Layer = "common"
+)
+
+// PlatformError represents an error with context and metadata
+type PlatformError struct {
+	UUID      string
+	Type      ErrorType
+	Message   string
+	Err       error
+	Context   map[string]any
+	RequestID string
+	Layer     Layer
+	Timestamp time.Time
+}
+
+// Error implements the error interface
+func (e *PlatformError) Error() string {
+	if e.Err != nil {
+		return fmt.Sprintf("[%s][%s][%s] %s: %v", e.Layer, e.Type, e.UUID, e.Message, e.Err)
+	}
+	return fmt.Sprintf("[%s][%s][%s] %s", e.Layer, e.Type, e.UUID, e.Message)
+}
+
+// Unwrap returns the underlying error
+func (e *PlatformError) Unwrap() error {
+	return e.Err
+}
+
+// GetErrorType returns the error type
+func (e *PlatformError) GetErrorType() ErrorType {
+	return e.Type
+}
+
+// GetRequestID returns the request ID
+func (e *PlatformError) GetRequestID() string {
+	return e.RequestID
+}
+
+// GetUUID returns the error UUID
+func (e *PlatformError) GetUUID() string {
+	return e.UUID
+}
+
+// NewError creates a new PlatformError with the specified parameters
+func NewError(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string) *PlatformError {
+	return NewErrorWithContext(ctx, layer, errorType, message, err, customUUID, nil)
+}
+
+// NewErrorWithContext creates a new PlatformError with additional context fields
+func NewErrorWithContext(ctx context.Context, layer Layer, errorType ErrorType, message string, err error, customUUID string, contextFields map[string]any) *PlatformError {
+	requestID := getRequestIDFromContext(ctx)
+
+	errorUUID := customUUID
+	if errorUUID == "" {
+		errorUUID = "auto-generated-uuid"
+	}
+
+	errorContext := make(map[string]any)
+	for k, v := range contextFields {
+		errorContext[k] = v
+	}
+
+	platformError := &PlatformError{
+		UUID:      errorUUID,
+		Type:      errorType,
+		Message:   message,
+		Err:       err,
+		RequestID: requestID,
+		Layer:     layer,
+		Timestamp: time.Now().UTC(),
+		Context:   errorContext,
+	}
+
+	return platformError
+}
+
+// AsError wraps an error with layer context
+func AsError(ctx context.Context, layer Layer, err error, message string) *PlatformError {
+	if err == nil {
+		return nil
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return NewError(ctx, layer, platformErr.Type, fmt.Sprintf("%s: %s", message, platformErr.Message), platformErr, platformErr.UUID)
+	}
+
+	errorType := ErrorTypeInternal
+	if errors.Is(err, gorm.ErrRecordNotFound) {
+		errorType = ErrorTypeNotFound
+	}
+
+	return NewError(ctx, layer, errorType, message, err, "")
+}
+
+// ErrorTypeToHTTPStatus maps error types to HTTP status codes
+func ErrorTypeToHTTPStatus(errorType ErrorType) int {
+	switch errorType {
+	case ErrorTypeNotFound:
+		return http.StatusNotFound
+	case ErrorTypeValidation:
+		return http.StatusBadRequest
+	case ErrorTypeConflict:
+		return http.StatusConflict
+	case ErrorTypeUnauthorized:
+		return http.StatusUnauthorized
+	case ErrorTypeForbidden:
+		return http.StatusForbidden
+	case ErrorTypeNotImplemented:
+		return http.StatusNotImplemented
+	case ErrorTypeTooManyRecords:
+		return http.StatusInternalServerError
+	case ErrorTypeDatabaseError:
+		return http.StatusInternalServerError
+	case ErrorTypeExternal:
+		return http.StatusBadGateway
+	case ErrorTypeInternal:
+		fallthrough
+	default:
+		return http.StatusInternalServerError
+	}
+}
+
+// IsErrorType checks if an error is a PlatformError with the specified type
+func IsErrorType(err error, errorType ErrorType) bool {
+	if err == nil {
+		return false
+	}
+
+	var platformErr *PlatformError
+	if errors.As(err, &platformErr) {
+		return platformErr.Type == errorType
+	}
+
+	return false
+}
+
+// LogError logs a platform error with proper structure
+func LogError(logger zerolog.Logger, err *PlatformError) {
+	if err == nil {
+		return
+	}
+
+	event := logger.Error().
+		Str("error_uuid", err.UUID).
+		Str("error_type", string(err.Type)).
+		Str("layer", string(err.Layer)).
+		Time("timestamp_utc", err.Timestamp)
+
+	if err.RequestID != "" {
+		event = event.Str("request_id", err.RequestID)
+	}
+
+	for k, v := range err.Context {
+		event = event.Interface(k, v)
+	}
+
+	if err.Err != nil {
+		event = event.Err(err.Err)
+	}
+
+	event.Msg(err.Message)
+}
diff --git a/tests/.env.example b/tests/.env.example
deleted file mode 100644
index 4813e206..00000000
--- a/tests/.env.example
+++ /dev/null
@@ -1,47 +0,0 @@
-# Load Test Environment Configuration
-# Copy this file to .env and customize as needed
-
-# ===========================================
-# Target Service Configuration
-# ===========================================
-BASE=https://api-dev.jan.ai
-MODEL=jan-v1-4b
-
-# ===========================================
-# Test Configuration
-# ===========================================
-# Test duration in minutes
-DURATION_MIN=5
-
-# Requests per second for non-streaming tests
-NONSTREAM_RPS=2
-
-# Requests per second for streaming tests  
-STREAM_RPS=1
-
-# ===========================================
-# Prometheus Remote Write Configuration  
-# ===========================================
-# k6 Prometheus remote write endpoint (follows k6 official docs)
-# Example: https://prometheus.example.com/api/v1/write
-K6_PROMETHEUS_RW_SERVER_URL=
-
-# Basic auth credentials for Prometheus endpoint (optional)
-K6_PROMETHEUS_RW_USERNAME=
-K6_PROMETHEUS_RW_PASSWORD=
-
-# Trend metrics to export (default: p(99))
-# Available stats: count,sum,min,max,avg,med,p(X) where X is percentile
-K6_PROMETHEUS_RW_TREND_STATS=p(95),p(99),min,max
-
-# How often to push metrics to Prometheus (default: 5s)
-K6_PROMETHEUS_RW_PUSH_INTERVAL=5s
-
-# ===========================================
-# Advanced Configuration (Optional)
-# ===========================================
-# Custom tags for metrics (comma-separated)
-# CUSTOM_TAGS=environment:dev,team:backend
-
-# Results retention days
-# RESULTS_RETENTION_DAYS=30
diff --git a/tests/.gitignore b/tests/.gitignore
deleted file mode 100644
index 5436a254..00000000
--- a/tests/.gitignore
+++ /dev/null
@@ -1,17 +0,0 @@
-# Environment configuration
-.env
-!.env.example
-
-# Test results
-results/
-*.json
-!grafana-dashboard.json
-*.log
-
-# Temporary files
-*.tmp
-*.temp
-
-# OS files
-.DS_Store
-Thumbs.db
diff --git a/tests/Dockerfile b/tests/Dockerfile
deleted file mode 100644
index 5286d060..00000000
--- a/tests/Dockerfile
+++ /dev/null
@@ -1,24 +0,0 @@
-# Simple Docker image to run k6-based load tests for Jan Server
-# Supports two modes:
-# 1) Mount your local tests directory to /tests to use local files and persist results
-# 2) Use the baked-in copy of the tests (copied at build time)
-
-FROM grafana/k6:latest
-
-USER root
-RUN apk update \
-    && apk add --no-cache bash jq ca-certificates \
-    && rm -rf /var/cache/apk/*
-
-# Baked-in tests (fallback when no volume is mounted)
-WORKDIR /app
-COPY run-loadtest.sh /app/run-loadtest.sh
-COPY src /app/src
-RUN chmod +x /app/run-loadtest.sh
-
-# Lightweight entrypoint that prefers mounted /tests when available
-COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
-RUN chmod +x /usr/local/bin/docker-entrypoint.sh
-
-ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
-CMD [""]
diff --git a/tests/HOW_TO_CREATE_NEW_TEST_SCENARIOS.md b/tests/HOW_TO_CREATE_NEW_TEST_SCENARIOS.md
deleted file mode 100644
index 02e95d15..00000000
--- a/tests/HOW_TO_CREATE_NEW_TEST_SCENARIOS.md
+++ /dev/null
@@ -1,670 +0,0 @@
-# How to Create New Test Scenarios
-
-This guide explains how to create new K6 test scenarios for the Jan Server API.
-
-## 🎯 Overview
-
-Our test framework supports three main test types:
-1. **Standard Completion Tests** - Basic API functionality
-2. **Conversation Management Tests** - Conversation lifecycle
-3. **Response API Tests** - Response endpoint testing
-
-## 📁 File Structure
-
-```
-tests/
-├── src/
-│   ├── test-completion-standard.js      # Basic completion flows
-│   ├── test-completion-conversation.js  # Conversation management
-│   ├── test-responses.js               # Response API testing
-│   └── your-new-test.js                 # Your new test scenario
-├── grafana/                             # Monitoring setup
-├── results/                             # Test results
-└── run-loadtest.sh                      # Test runner
-```
-
-## 🚀 Creating a New Test
-
-### Step 1: Copy Template
-
-Start by copying an existing test file:
-
-```bash
-cp src/test-completion-standard.js src/test-your-scenario.js
-```
-
-### Step 2: Update Test Configuration
-
-```javascript
-// ====== Test Configuration ======
-const TEST_ID = `test-your-scenario-${Date.now()}`;
-const TEST_CASE = 'your-scenario';
-
-// ====== Custom metrics ======
-const yourMetric = new Trend('your_metric_ms', true);
-const errors = new Counter('your_errors');
-const successes = new Counter('your_successes');
-
-// ====== Options ======
-export const options = {
-  iterations: 1,
-  vus: 1,
-  thresholds: {
-    'http_req_failed': ['rate<0.05'],
-    'your_metric_ms': ['p(95)<10000'],
-  },
-  discardResponseBodies: false,
-  tags: {
-    testid: TEST_ID,
-    test_case: TEST_CASE,
-  },
-};
-```
-
-### Step 3: Implement Test Functions
-
-```javascript
-// ====== Test Functions ======
-function yourTestFunction() {
-  console.log('[YOUR TEST] Starting your test...');
-  
-  const startTime = Date.now();
-  
-  // Your test logic here
-  const res = http.post(`${BASE}/v1/your-endpoint`, {
-    // Your payload
-  }, {
-    headers: buildHeaders()
-  });
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  yourMetric.add(duration);
-  
-  const ok = check(res, {
-    'your test status 200': (r) => r.status === 200,
-    'your test has data': (r) => r.body && r.body.length > 0
-  });
-  
-  if (ok) {
-    successes.add(1);
-    console.log('[YOUR TEST] ✅ Success!');
-  } else {
-    errors.add(1);
-    console.log('[YOUR TEST] ❌ Failed!');
-  }
-}
-```
-
-### Step 4: Create Main Test Function
-
-```javascript
-export default function() {
-  console.log('========================================');
-  console.log('   YOUR SCENARIO TESTS');
-  console.log('========================================');
-  console.log(`Base URL: ${BASE}`);
-  console.log(`Model: ${MODEL}`);
-  console.log(`Debug Mode: ${DEBUG ? 'ENABLED' : 'DISABLED'}`);
-  console.log();
-  
-  // Authentication
-  guestLogin();
-  refreshToken();
-  
-  // Your test steps
-  yourTestFunction();
-  
-  console.log();
-  console.log('========================================');
-  console.log('            TEST SUMMARY');
-  console.log('========================================');
-  console.log('✅ Your test completed!');
-  console.log('========================================');
-}
-```
-
-## 🔧 Helper Functions
-
-### Authentication Helpers
-
-**Complete Guest Authentication Pattern:**
-
-```javascript
-// Global state for authentication
-let accessToken = '';
-let refreshToken = '';
-
-function guestLogin() {
-  console.log('[GUEST LOGIN] Starting guest login...');
-  
-  const headers = buildHeaders();
-  const res = http.post(`${BASE}/v1/auth/guest-login`, {}, { headers });
-  
-  debugResponse(res);
-  
-  const ok = check(res, {
-    'guest login status 200': (r) => r.status === 200,
-    'guest login has access_token': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.access_token && body.access_token.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    const body = JSON.parse(res.body);
-    accessToken = body.access_token;
-    __ENV.ACCESS_TOKEN = accessToken;
-    
-    // Extract refresh token from Set-Cookie header
-    const setCookieHeader = res.headers['Set-Cookie'];
-    if (setCookieHeader) {
-      const refreshTokenMatch = setCookieHeader.match(/jan_refresh_token=([^;]+)/);
-      if (refreshTokenMatch) {
-        refreshToken = refreshTokenMatch[1];
-      }
-    }
-    
-    console.log('[GUEST LOGIN] ✅ Success!');
-  } else {
-    console.log('[GUEST LOGIN] ❌ Failed!');
-  }
-}
-
-function refreshToken() {
-  console.log('[REFRESH TOKEN] Refreshing access token...');
-  
-  const headers = {
-    'Content-Type': 'application/json',
-    'Cookie': `jan_refresh_token=${refreshToken}`,
-    'Authorization': `Bearer ${accessToken}`
-  };
-  
-  const res = http.get(`${BASE}/v1/auth/refresh-token`, { headers });
-  
-  debugResponse(res);
-  
-  const ok = check(res, {
-    'refresh token status 200': (r) => r.status === 200,
-    'refresh token has access_token': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.access_token && body.access_token.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    const body = JSON.parse(res.body);
-    accessToken = body.access_token;
-    __ENV.ACCESS_TOKEN = accessToken;
-    
-    // Update refresh token from new Set-Cookie header
-    const setCookieHeader = res.headers['Set-Cookie'];
-    if (setCookieHeader) {
-      const refreshTokenMatch = setCookieHeader.match(/jan_refresh_token=([^;]+)/);
-      if (refreshTokenMatch) {
-        refreshToken = refreshTokenMatch[1];
-      }
-    }
-    
-    console.log('[REFRESH TOKEN] ✅ Success!');
-  } else {
-    console.log('[REFRESH TOKEN] ❌ Failed!');
-  }
-}
-```
-
-**Key Points:**
-- **No API keys needed**: All tests use guest authentication automatically
-- **Token refresh**: Always refresh tokens before requests to prevent timeouts
-- **Cookie handling**: Extract refresh tokens from Set-Cookie headers
-- **Global state**: Store tokens in global variables for reuse
-
-### Utility Functions
-
-```javascript
-function buildHeaders() {
-  const headers = {
-    'Content-Type': 'application/json'
-  };
-  
-  if (__ENV.ACCESS_TOKEN) {
-    headers['Authorization'] = `Bearer ${__ENV.ACCESS_TOKEN}`;
-  }
-  
-  return headers;
-}
-
-function debugResponse(response) {
-  if (DEBUG) {
-    console.log('[DEBUG] ====== RESPONSE ======');
-    console.log(`[DEBUG] Status: ${response.status}`);
-    console.log(`[DEBUG] Headers:`, response.headers);
-    console.log(`[DEBUG] Body:`, response.body);
-    console.log('[DEBUG] =====================');
-  }
-}
-```
-
-## 📊 Metrics and Monitoring
-
-### Custom Metrics
-
-```javascript
-// ====== Custom metrics ======
-const yourMetric = new Trend('your_metric_ms', true);
-const yourCounter = new Counter('your_counter');
-const yourGauge = new Gauge('your_gauge');
-
-// Usage
-yourMetric.add(duration);
-yourCounter.add(1);
-yourGauge.add(value);
-```
-
-### Thresholds
-
-```javascript
-thresholds: {
-  'http_req_failed': ['rate<0.05'],           // Error rate < 5%
-  'your_metric_ms': ['p(95)<10000'],          // 95th percentile < 10s
-  'http_req_duration': ['p(99)<15000'],       // 99th percentile < 15s
-}
-```
-
-### Tags for Filtering
-
-```javascript
-tags: {
-  testid: TEST_ID,           // Unique test identifier
-  test_case: TEST_CASE,      // Test category
-  scenario: 'your-scenario', // Specific scenario
-  method: 'POST',            // HTTP method
-  status: '200'              // Response status
-}
-```
-
-## 🧪 Test Patterns
-
-### API Endpoint Testing
-
-```javascript
-function testApiEndpoint() {
-  const startTime = Date.now();
-  
-  const res = http.post(`${BASE}/v1/your-endpoint`, {
-    // Your payload
-  }, {
-    headers: buildHeaders()
-  });
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  
-  const ok = check(res, {
-    'endpoint status 200': (r) => r.status === 200,
-    'endpoint has expected data': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.expected_field !== undefined;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  return { ok, duration, response: res };
-}
-```
-
-### Streaming Response Testing
-
-```javascript
-function testStreamingEndpoint() {
-  const startTime = Date.now();
-  
-  const res = http.post(`${BASE}/v1/streaming-endpoint`, {
-    stream: true
-  }, {
-    headers: buildHeaders()
-  });
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  
-  // Check for streaming response
-  const isStreaming = res.headers['Content-Type'] && 
-                     res.headers['Content-Type'].includes('text/event-stream');
-  
-  // Check for completion signal
-  const hasCompletionSignal = res.body.includes('data: [DONE]');
-  
-  const ok = check(res, {
-    'streaming status 200': (r) => r.status === 200,
-    'streaming content type': (r) => isStreaming,
-    'streaming completion signal': (r) => hasCompletionSignal
-  });
-  
-  return { ok, duration, response: res };
-}
-```
-
-### Error Handling
-
-```javascript
-function testWithErrorHandling() {
-  try {
-    const res = http.post(`${BASE}/v1/endpoint`, payload, { headers });
-    
-    if (res.status >= 400) {
-      console.log(`[ERROR] HTTP ${res.status}: ${res.body}`);
-      return false;
-    }
-    
-    return check(res, {
-      'successful response': (r) => r.status === 200
-    });
-    
-  } catch (error) {
-    console.log(`[ERROR] Request failed: ${error.message}`);
-    return false;
-  }
-}
-```
-
-## 🔄 Integration with Test Runner
-
-### Update run-loadtest.sh
-
-Add your test to the test runner:
-
-```bash
-# In run-loadtest.sh
-case "$TEST_CASE" in
-  "test-completion-standard")
-    TEST_FILE="src/test-completion-standard.js"
-    ;;
-  "test-completion-conversation")
-    TEST_FILE="src/test-completion-conversation.js"
-    ;;
-  "test-responses")
-    TEST_FILE="src/test-responses.js"
-    ;;
-  "test-your-scenario")                    # Add your test
-    TEST_FILE="src/test-your-scenario.js"
-    ;;
-  *)
-    echo "Unknown test case: $TEST_CASE"
-    exit 1
-    ;;
-esac
-```
-
-### Run Your Test
-
-```bash
-# Using test runner
-./run-loadtest.sh test-your-scenario
-
-# Direct execution
-k6 run src/test-your-scenario.js
-```
-
-## 📈 Best Practices
-
-### 1. Test Structure
-- **Single Responsibility**: Each test should focus on one scenario
-- **Clear Naming**: Use descriptive names for functions and variables
-- **Consistent Format**: Follow existing test patterns
-- **Auto-Detection**: Tests are automatically detected by scanning `src/*.js` files
-
-### 2. Error Handling
-- **Graceful Failures**: Handle errors without crashing the test
-- **Meaningful Messages**: Provide clear error descriptions
-- **Debug Information**: Include relevant debugging data
-
-### 3. Performance
-- **Reasonable Thresholds**: Set achievable performance targets
-- **Resource Management**: Don't overwhelm the API
-- **Monitoring**: Include relevant metrics
-
-### 4. Documentation
-- **Comments**: Explain complex logic
-- **README**: Document your test's purpose
-- **Examples**: Provide usage examples
-
-## 🎯 Threshold Guidelines
-
-### Standard Response Times
-- **Guest login**: `p(95)<2000ms` (2 seconds)
-- **Token refresh**: `p(95)<2000ms` (2 seconds)
-- **Regular responses**: `p(95)<10000ms` (10 seconds)
-- **Streaming responses**: `p(95)<15000ms` (15 seconds)
-
-### Tool Call Response Times
-- **Tool call responses**: `p(95)<300000ms` (5 minutes)
-- **Tool call streaming**: `p(95)<300000ms` (5 minutes)
-
-Tool calls require extended timeouts because they may involve external API calls and complex processing.
-
-### Example Threshold Configuration
-```javascript
-thresholds: {
-  'http_req_failed': ['rate<0.05'],           // Error rate < 5%
-  'guest_login_time_ms': ['p(95)<2000'],      // Guest login < 2s
-  'refresh_token_time_ms': ['p(95)<2000'],    // Token refresh < 2s
-  'completion_time_ms': ['p(95)<10000'],      // Regular completion < 10s
-  'streaming_time_ms': ['p(95)<15000'],       // Streaming < 15s
-  'response_time_with_tools_ms': ['p(95)<300000'], // Tool calls < 5min
-}
-```
-
-## 🔄 Auto-Detection System
-
-The framework automatically:
-- Scans `src/*.js` files for test scripts
-- Extracts test case names from filenames
-- Makes them available in CLI and reports
-- Validates file existence before running
-- **No manual registration required** - just add your `.js` file and it will be available immediately
-
-### Example: Adding a Health Check Test
-```bash
-# Copy the example template
-cp src/health-check.js.example src/health-check.js
-
-# Edit the file as needed
-# The test is now automatically available:
-k6 run src/health-check.js
-./run-loadtest.sh health-check
-```
-
-## 🧪 Testing Your New Test
-
-### 1. Basic Validation
-```bash
-# Test syntax
-k6 run --dry-run src/test-your-scenario.js
-
-# Test execution
-k6 run src/test-your-scenario.js
-```
-
-### 2. Debug Mode
-```bash
-# Enable debug logging
-DEBUG=true k6 run src/test-your-scenario.js
-```
-
-### 3. Performance Testing
-```bash
-# Load test
-DURATION_MIN=5 NONSTREAM_RPS=5 k6 run src/test-your-scenario.js
-```
-
-### 4. Integration Testing
-```bash
-# Use test runner
-./run-loadtest.sh test-your-scenario
-```
-
-## 📝 Example: Complete Test File
-
-```javascript
-import http from 'k6/http';
-import { check } from 'k6';
-import { Trend, Counter } from 'k6/metrics';
-
-// ====== Test Configuration ======
-const TEST_ID = `test-example-${Date.now()}`;
-const TEST_CASE = 'example';
-
-// ====== Environment Variables ======
-const BASE = __ENV.BASE || 'https://api-dev.jan.ai';
-const MODEL = __ENV.MODEL || 'jan-v1-4b';
-const DEBUG = __ENV.DEBUG === 'true';
-
-// ====== Custom metrics ======
-const exampleTime = new Trend('example_time_ms', true);
-const errors = new Counter('example_errors');
-const successes = new Counter('example_successes');
-
-// ====== Options ======
-export const options = {
-  iterations: 1,
-  vus: 1,
-  thresholds: {
-    'http_req_failed': ['rate<0.05'],
-    'example_time_ms': ['p(95)<5000'],
-  },
-  discardResponseBodies: false,
-  tags: {
-    testid: TEST_ID,
-    test_case: TEST_CASE,
-  },
-};
-
-// ====== Helper Functions ======
-function buildHeaders() {
-  const headers = { 'Content-Type': 'application/json' };
-  if (__ENV.ACCESS_TOKEN) {
-    headers['Authorization'] = `Bearer ${__ENV.ACCESS_TOKEN}`;
-  }
-  return headers;
-}
-
-function debugResponse(response) {
-  if (DEBUG) {
-    console.log('[DEBUG] ====== RESPONSE ======');
-    console.log(`[DEBUG] Status: ${response.status}`);
-    console.log(`[DEBUG] Body:`, response.body);
-    console.log('[DEBUG] =====================');
-  }
-}
-
-// ====== Test Functions ======
-function guestLogin() {
-  console.log('[GUEST LOGIN] Starting guest login...');
-  
-  const headers = buildHeaders();
-  const res = http.post(`${BASE}/v1/auth/guest-login`, {}, { headers });
-  
-  debugResponse(res);
-  
-  const ok = check(res, {
-    'guest login status 200': (r) => r.status === 200,
-    'guest login has access_token': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.access_token && body.access_token.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    const body = JSON.parse(res.body);
-    __ENV.ACCESS_TOKEN = body.access_token;
-    console.log('[GUEST LOGIN] ✅ Success!');
-  } else {
-    console.log('[GUEST LOGIN] ❌ Failed!');
-  }
-}
-
-function testExampleEndpoint() {
-  console.log('[EXAMPLE] Testing example endpoint...');
-  
-  const startTime = Date.now();
-  
-  const res = http.post(`${BASE}/v1/example`, {
-    message: 'Hello, world!'
-  }, {
-    headers: buildHeaders()
-  });
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  exampleTime.add(duration);
-  
-  debugResponse(res);
-  
-  const ok = check(res, {
-    'example status 200': (r) => r.status === 200,
-    'example has response': (r) => r.body && r.body.length > 0
-  });
-  
-  if (ok) {
-    successes.add(1);
-    console.log('[EXAMPLE] ✅ Success!');
-  } else {
-    errors.add(1);
-    console.log('[EXAMPLE] ❌ Failed!');
-  }
-}
-
-// ====== Main Test Function ======
-export default function() {
-  console.log('========================================');
-  console.log('   EXAMPLE TESTS');
-  console.log('========================================');
-  console.log(`Base URL: ${BASE}`);
-  console.log(`Model: ${MODEL}`);
-  console.log(`Debug Mode: ${DEBUG ? 'ENABLED' : 'DISABLED'}`);
-  console.log();
-  
-  // Test steps
-  guestLogin();
-  testExampleEndpoint();
-  
-  console.log();
-  console.log('========================================');
-  console.log('            TEST SUMMARY');
-  console.log('========================================');
-  console.log('✅ Example tests completed!');
-  console.log('========================================');
-}
-```
-
-## 🚀 Next Steps
-
-1. **Create your test file** using the template above
-2. **Implement your test logic** following the patterns
-3. **Add to test runner** for easy execution
-4. **Test thoroughly** with different configurations
-5. **Document your test** in the README
-6. **Share with team** for review and integration
-
-## 📚 Additional Resources
-
-- **K6 Documentation**: [k6.io/docs](https://k6.io/docs)
-- **Existing Tests**: Study `src/test-*.js` files
-- **Test Runner**: See `run-loadtest.sh`
-- **Monitoring**: See `grafana/README.md`
diff --git a/tests/HOW_TO_RUN_TESTS_LOCALLY.md b/tests/HOW_TO_RUN_TESTS_LOCALLY.md
deleted file mode 100644
index 8ad39ada..00000000
--- a/tests/HOW_TO_RUN_TESTS_LOCALLY.md
+++ /dev/null
@@ -1,384 +0,0 @@
-# How to Run Load Tests Locally
-
-This guide explains how to run K6 load tests locally on your machine.
-
-## 🚀 Quick Start
-
-### Prerequisites
-
-- **K6 installed** (see installation section below)
-- **Docker** (optional, for Grafana monitoring)
-- **Internet connection** (tests run against live APIs)
-
-### Basic Test Run
-
-```bash
-# Run a single test
-k6 run src/test-completion-standard.js
-
-# Run with specific environment variables
-BASE=https://api-dev.jan.ai MODEL=jan-v1-4b k6 run src/test-completion-standard.js
-```
-
-### Using Test Runner Scripts
-
-```bash
-# Linux/Mac
-./run-loadtest.sh test-completion-standard
-
-# Windows
-.\run-loadtest.bat test-completion-standard
-```
-
-## 📋 Available Test Scenarios
-
-### 1. Standard Completion Tests
-```bash
-k6 run src/test-completion-standard.js
-```
-**What it tests:**
-- Guest authentication
-- Token refresh
-- Model listing
-- Non-streaming completions
-- Streaming completions
-
-### 2. Conversation Management Tests
-```bash
-k6 run src/test-completion-conversation.js
-```
-**What it tests:**
-- Guest authentication
-- Conversation creation
-- Message addition (non-streaming)
-- Message addition (streaming)
-- Conversation listing
-- Conversation items retrieval
-
-### 3. Response API Tests
-```bash
-k6 run src/test-responses.js
-```
-**What it tests:**
-- Guest authentication
-- Non-streaming responses (without tools)
-- Non-streaming responses (with tools)
-- Streaming responses (without tools)
-- Streaming responses (with tools)
-
-## ⚙️ Configuration
-
-### Environment Variables
-
-Create a `.env` file in the `tests` directory:
-
-```bash
-# API Configuration
-BASE=https://api-dev.jan.ai
-MODEL=jan-v1-4b
-
-# Cloudflare Configuration (Required)
-LOADTEST_TOKEN=your_cloudflare_token
-
-# Test Configuration
-DEBUG=true
-DURATION_MIN=1
-NONSTREAM_RPS=2
-STREAM_RPS=1
-SINGLE_RUN=true
-
-# Optional: API Keys (not required for guest auth)
-# API_KEY=your_api_key
-```
-
-### Test Parameters
-
-| Variable | Description | Default | Example |
-|----------|-------------|---------|---------|
-| `BASE` | API base URL | `https://api-dev.jan.ai` | `https://api-stag.jan.ai` |
-| `MODEL` | LLM model to test | `jan-v1-4b` | `gpt-oss-20b` |
-| `LOADTEST_TOKEN` | Cloudflare load test token (required) | - | `cf_1234567890abcdef` |
-| `DEBUG` | Enable debug logging | `false` | `true` |
-| `DURATION_MIN` | Test duration in minutes | `1` | `5` |
-| `NONSTREAM_RPS` | Non-streaming requests per second | `2` | `5` |
-| `STREAM_RPS` | Streaming requests per second | `1` | `3` |
-| `SINGLE_RUN` | Run once instead of load test | `false` | `true` |
-
-### Custom Environment Variables
-
-You can add test-specific variables to `.env`:
-
-```bash
-# Health check specific
-HEALTH_RPS=10
-HEALTH_TIMEOUT=30
-
-# Your custom test specific  
-YOUR_TEST_PARAM=value
-```
-
-Then access in your k6 script:
-
-```javascript
-const HEALTH_RPS = Number(__ENV.HEALTH_RPS || 5);
-const YOUR_PARAM = __ENV.YOUR_TEST_PARAM || 'default';
-```
-
-**Important Notes:**
-- **LOADTEST_TOKEN is required!** This token is necessary for Cloudflare API access
-- Tests handle authentication automatically via guest login
-- Refresh tokens are managed automatically to prevent timeouts
-
-## 🔧 Installation
-
-### Install K6
-
-**macOS (Homebrew):**
-```bash
-brew install k6
-```
-
-**Ubuntu/Debian:**
-```bash
-sudo apt-get update && sudo apt-get install -y gnupg ca-certificates
-curl -fsSL https://dl.k6.io/key.gpg | sudo gpg --dearmor -o /usr/share/keyrings/k6-archive-keyring.gpg
-echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | sudo tee /etc/apt/sources.list.d/k6.list
-sudo apt-get update && sudo apt-get install -y k6
-```
-
-**Windows:**
-Download from [k6.io/docs/get-started/installation](https://k6.io/docs/get-started/installation)
-
-**Docker (Alternative):**
-```bash
-docker run --rm -i grafana/k6 run - <src/test-completion-standard.js
-```
-
-### Docker Testing with Custom Image
-
-**Building the Docker Image:**
-```bash
-# From the tests directory
-docker build -t janai/k6-tests:local .
-```
-
-**Running Tests with Docker:**
-```bash
-# Run all tests
-docker run --rm -it -e BASE=https://api-stag.jan.ai -e MODEL=jan-v1-4b -e DEBUG=true janai/k6-tests:local
-
-# Run specific test
-docker run --rm -it -e BASE=https://api-stag.jan.ai -e MODEL=jan-v1-4b -e DEBUG=true janai/k6-tests:local test-responses
-
-# Run with volume mount (for local development)
-docker run --rm -it -e BASE=https://api-stag.jan.ai -e MODEL=jan-v1-4b -e DEBUG=true -v "${PWD}":/tests janai/k6-tests:local
-```
-
-**Windows PowerShell Commands:**
-```powershell
-# Build
-docker build -t janai/k6-tests:local .
-
-# Run all tests
-docker run --rm -it -e BASE=https://api-stag.jan.ai -e MODEL=jan-v1-4b -e DEBUG=true janai/k6-tests:local
-
-# Run specific test
-docker run --rm -it -e BASE=https://api-stag.jan.ai -e MODEL=jan-v1-4b -e DEBUG=true janai/k6-tests:local test-responses
-```
-
-**Docker Features:**
-- **Alpine Linux base**: Lightweight and secure
-- **Bash support**: Full bash scripting capabilities
-- **jq included**: JSON parsing for metrics
-- **Auto-detection**: Automatically finds and runs test scripts
-- **Volume mounting**: Use local files with `-v` flag
-- **Environment variables**: Full support for all test configuration
-
-### Verify Installation
-
-```bash
-k6 version
-```
-
-## 📊 Output Formats
-
-### JSON Output
-```bash
-k6 run --out json=results/test-results.json src/test-completion-standard.js
-```
-
-### Console Output
-```bash
-k6 run src/test-completion-standard.js
-```
-
-### With Grafana Monitoring
-
-```bash
-# Start Grafana monitoring with Prometheus
-./setup-monitoring.sh
-
-# Run test with metrics automatically sent to Grafana
-./run-test-with-monitoring.sh test-completion-standard
-```
-
-**Access Dashboards:**
-- **Grafana**: http://localhost:3000 (admin/admin)
-- **Prometheus**: http://localhost:9090
-
-### Custom Metrics
-```bash
-k6 run --out experimental-prometheus-rw src/test-completion-standard.js
-```
-
-## 🐛 Troubleshooting
-
-### Common Issues
-
-**1. "No connection could be made"**
-- Check your internet connection
-- Verify the `BASE` URL is correct
-- Ensure the API server is running
-
-**2. "Authentication failed"**
-- Tests use guest authentication (no API key required)
-- **Check LOADTEST_TOKEN**: Ensure it's set correctly for Cloudflare access
-- Check if the API server supports guest login
-- Verify the API endpoint is accessible
-
-**3. "Model not found"**
-- Check available models: `curl $BASE/v1/models`
-- Update the `MODEL` environment variable
-- Ensure the model is available on the server
-
-**4. "Test timeout"**
-- Increase timeout in test configuration
-- Check API response times
-- Reduce load (`NONSTREAM_RPS`, `STREAM_RPS`)
-
-### Debug Mode
-
-Enable debug logging to see detailed request/response information:
-
-```bash
-DEBUG=true k6 run src/test-completion-standard.js
-```
-
-### Verbose Output
-
-```bash
-k6 run --verbose src/test-completion-standard.js
-```
-
-## 📈 Performance Testing
-
-### Load Test Configuration
-
-```bash
-# High load test
-DURATION_MIN=5 NONSTREAM_RPS=10 STREAM_RPS=5 k6 run src/test-completion-standard.js
-
-# Stress test
-DURATION_MIN=10 NONSTREAM_RPS=20 STREAM_RPS=10 k6 run src/test-completion-standard.js
-```
-
-### Thresholds
-
-Tests include performance thresholds:
-- HTTP error rate < 5%
-- Response times < 10 seconds
-- Authentication time < 2 seconds
-
-### Custom Thresholds
-
-Modify thresholds in test files:
-```javascript
-thresholds: {
-  'http_req_failed': ['rate<0.05'],
-  'http_req_duration': ['p(95)<10000'],
-  'completion_time_ms': ['p(95)<10000'],
-}
-```
-
-## 🔄 Continuous Testing
-
-### Run All Tests
-
-```bash
-# Run all test scenarios
-./run-loadtest.sh test-completion-standard
-./run-loadtest.sh test-completion-conversation  
-./run-loadtest.sh test-responses
-```
-
-### Automated Testing
-
-Create a test script:
-```bash
-#!/bin/bash
-# run-all-tests.sh
-
-echo "Running all load tests..."
-
-./run-loadtest.sh test-completion-standard
-./run-loadtest.sh test-completion-conversation
-./run-loadtest.sh test-responses
-
-echo "All tests completed!"
-```
-
-## 📝 Results Analysis
-
-### Understanding Output
-
-**Thresholds:**
-- ✅ Green checkmark = Test passed
-- ❌ Red X = Test failed
-
-**Metrics:**
-- `http_req_duration`: Response time statistics
-- `http_req_failed`: Error rate
-- `completion_time_ms`: Custom completion timing
-- `checks`: Test validation results
-
-### Saving Results
-
-```bash
-# Save to JSON file
-k6 run --out json=results/my-test.json src/test-completion-standard.js
-
-# Save to CSV
-k6 run --out csv=results/my-test.csv src/test-completion-standard.js
-```
-
-## 🌐 Different Environments
-
-### Development
-```bash
-BASE=https://api-dev.jan.ai k6 run src/test-completion-standard.js
-```
-
-### Staging
-```bash
-BASE=https://api-stag.jan.ai k6 run src/test-completion-standard.js
-```
-
-### Production
-```bash
-BASE=https://api.jan.ai k6 run src/test-completion-standard.js
-```
-
-## 📚 Additional Resources
-
-- **K6 Documentation**: [k6.io/docs](https://k6.io/docs)
-- **Test Scripts**: See `src/` directory
-- **Adding New Tests**: See `HOW_TO_CREATE_NEW_TEST_SCENARIOS.md`
-- **Monitoring Setup**: See `grafana/README.md`
-
-## 🆘 Getting Help
-
-1. **Check logs**: Enable `DEBUG=true` for detailed output
-2. **Verify setup**: Run `k6 version` and check prerequisites
-3. **Test connectivity**: Try `curl $BASE/v1/models`
-4. **Review documentation**: Check test-specific README files
-5. **Check issues**: Look at test thresholds and error messages
diff --git a/tests/README.md b/tests/README.md
deleted file mode 100644
index 21206c98..00000000
--- a/tests/README.md
+++ /dev/null
@@ -1,256 +0,0 @@
-# Jan Server Load Tests
-
-Comprehensive K6 load testing framework for the Jan Server API, including authentication, completions, conversations, and response endpoints.
-
-## 🚀 Quick Start
-
-### Run Tests Locally
-```bash
-# Basic test run
-k6 run src/test-completion-standard.js
-
-# Using test runner
-./run-loadtest.sh test-completion-standard
-```
-
-### With Monitoring
-```bash
-# Start Grafana monitoring with Prometheus
-./setup-monitoring.sh
-
-# Run test with metrics automatically sent to Grafana
-./run-test-with-monitoring.sh test-completion-standard
-```
-
-## 📚 Documentation
-
-- **[HOW_TO_RUN_TESTS_LOCALLY.md](HOW_TO_RUN_TESTS_LOCALLY.md)** - Complete guide for running tests locally
-- **[HOW_TO_CREATE_NEW_TEST_SCENARIOS.md](HOW_TO_CREATE_NEW_TEST_SCENARIOS.md)** - Guide for creating new test scenarios
-- **[grafana/README.md](grafana/README.md)** - Grafana monitoring setup and usage
-
-## 🧪 Test Scenarios
-
-### 1. Standard Completion Tests (`test-completion-standard.js`)
-- Guest authentication with token refresh
-- Model listing and validation
-- Non-streaming chat completions
-- Streaming chat completions
-
-### 2. Conversation Management Tests (`test-completion-conversation.js`)
-- Conversation creation and management
-- Message addition (non-streaming and streaming)
-- Conversation listing and retrieval
-- Message persistence validation
-
-### 3. Response API Tests (`test-responses.js`)
-- Non-streaming responses (with/without tools)
-- Streaming responses (with/without tools)
-- Tool call handling and validation
-
-## ⚙️ Configuration
-
-### Environment Variables
-```bash
-# API Configuration
-BASE=https://api-dev.jan.ai
-MODEL=jan-v1-4b
-
-# Cloudflare Configuration (Required)
-LOADTEST_TOKEN=your_cloudflare_token
-
-# Test Configuration
-DEBUG=true
-DURATION_MIN=1
-NONSTREAM_RPS=2
-STREAM_RPS=1
-SINGLE_RUN=true
-```
-
-### Test Parameters
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `BASE` | API base URL | `https://api-dev.jan.ai` |
-| `MODEL` | LLM model to test | `jan-v1-4b` |
-| `LOADTEST_TOKEN` | Cloudflare load test token (required) | - |
-| `DEBUG` | Enable debug logging | `false` |
-| `DURATION_MIN` | Test duration (minutes) | `1` |
-| `NONSTREAM_RPS` | Non-streaming RPS | `2` |
-| `STREAM_RPS` | Streaming RPS | `1` |
-| `SINGLE_RUN` | Run once vs load test | `false` |
-
-## 📊 Monitoring
-
-### Grafana Dashboard
-- **Status**: ✅ **Working with Prometheus integration**
-- **Location**: `grafana/grafana-dashboard.json`
-- **Setup**: `./setup-monitoring.sh`
-- **Access**: http://localhost:3000 (admin/admin)
-- **Metrics**: Automatically sent to Prometheus and displayed in Grafana
-
-### Available Metrics
-- HTTP performance metrics (response time, throughput, error rates)
-- Custom completion timing metrics
-- Test segmentation by Test ID and Test Case
-- Real-time monitoring with 5s refresh
-
-## 🔧 Installation
-
-### K6 Installation
-
-**macOS:**
-```bash
-brew install k6
-```
-
-**Ubuntu/Debian:**
-```bash
-sudo apt-get update && sudo apt-get install -y gnupg ca-certificates
-curl -fsSL https://dl.k6.io/key.gpg | sudo gpg --dearmor -o /usr/share/keyrings/k6-archive-keyring.gpg
-echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | sudo tee /etc/apt/sources.list.d/k6.list
-sudo apt-get update && sudo apt-get install -y k6
-```
-
-**Windows:**
-Download from [k6.io/docs/get-started/installation](https://k6.io/docs/get-started/installation)
-
-### Docker (Alternative)
-   ```bash
-docker run --rm -i grafana/k6 run - <src/test-completion-standard.js
-   ```
-
-## 🏃‍♂️ Running Tests
-
-### Basic Execution
-   ```bash
-# Single test
-k6 run src/test-completion-standard.js
-
-# All tests
-./run-loadtest.sh test-completion-standard
-./run-loadtest.sh test-completion-conversation
-./run-loadtest.sh test-responses
-```
-
-### With Custom Configuration
-```bash
-BASE=https://api-stag.jan.ai MODEL=gpt-oss-20b k6 run src/test-completion-standard.js
-```
-
-### Load Testing
-```bash
-DURATION_MIN=5 NONSTREAM_RPS=10 STREAM_RPS=5 ./run-loadtest.sh test-completion-standard
-```
-
-## 📈 Performance Thresholds
-
-Tests include built-in performance thresholds:
-- HTTP error rate < 5%
-- Response times < 10 seconds
-- Authentication time < 2 seconds
-- Custom completion timing thresholds
-
-## 🌐 Environment Support
-
-### Development
-```bash
-BASE=https://api-dev.jan.ai ./run-loadtest.sh test-completion-standard
-```
-
-### Staging
-```bash
-BASE=https://api-stag.jan.ai ./run-loadtest.sh test-completion-standard
-```
-
-### Production
-```bash
-BASE=https://api.jan.ai ./run-loadtest.sh test-completion-standard
-```
-
-## 📁 Project Structure
-
-```
-tests/
-├── src/                                    # Test scripts
-│   ├── test-completion-standard.js         # Basic completion flows
-│   ├── test-completion-conversation.js     # Conversation management
-│   └── test-responses.js                   # Response API testing
-├── grafana/                                # Monitoring setup
-│   ├── README.md                           # Grafana documentation
-│   ├── docker-compose.yml                  # Monitoring stack
-│   ├── grafana-dashboard.json              # Pre-built dashboard
-│   └── prometheus.yml                      # Prometheus config
-├── results/                                # Test results
-├── HOW_TO_RUN_TESTS_LOCALLY.md             # Local testing guide
-├── HOW_TO_CREATE_NEW_TEST_SCENARIOS.md     # New test creation guide
-├── setup-monitoring.sh                     # Monitoring setup script
-├── setup-monitoring.bat                    # Windows monitoring setup
-├── run-test-with-monitoring.sh             # Test runner with metrics
-├── run-test-with-monitoring.bat            # Windows test runner with metrics
-├── run-loadtest.sh                         # Test runner script
-└── README.md                               # This file
-```
-
-## 🔍 Troubleshooting
-
-### Common Issues
-1. **Connection errors**: Check internet connection and API URL
-2. **Authentication failures**: Tests use guest auth (no API key needed)
-3. **Model not found**: Verify model availability with `curl $BASE/v1/models`
-4. **Timeouts**: Reduce load or increase timeout thresholds
-
-### Debug Mode
-```bash
-DEBUG=true ./run-loadtest.sh test-completion-standard
-```
-
-### Verbose Output
-```bash
-k6 run --verbose src/test-completion-standard.js
-```
-
-## 📊 Results Analysis
-
-### Understanding Output
-- ✅ Green checkmark = Test passed
-- ❌ Red X = Test failed
-- Metrics show response times, error rates, and custom timing
-
-### Saving Results
-   ```bash
-# JSON output
-k6 run --out json=results/my-test.json src/test-completion-standard.js
-
-# CSV output
-k6 run --out csv=results/my-test.csv src/test-completion-standard.js
-```
-
-## 🤝 Contributing
-
-### Adding New Tests
-1. Follow the guide in `HOW_TO_CREATE_NEW_TEST_SCENARIOS.md`
-2. Use existing tests as templates
-3. Include proper error handling and metrics
-4. Update test runner scripts
-5. Document your test scenario
-
-### Best Practices
-- Single responsibility per test file
-- Clear naming and documentation
-- Reasonable performance thresholds
-- Comprehensive error handling
-- Consistent test structure
-
-## 📚 Additional Resources
-
-- **K6 Documentation**: [k6.io/docs](https://k6.io/docs)
-- **Local Testing Guide**: [HOW_TO_RUN_TESTS_LOCALLY.md](HOW_TO_RUN_TESTS_LOCALLY.md)
-- **New Test Creation**: [HOW_TO_CREATE_NEW_TEST_SCENARIOS.md](HOW_TO_CREATE_NEW_TEST_SCENARIOS.md)
-- **Monitoring Setup**: [grafana/README.md](grafana/README.md)
-
-## 🆘 Support
-
-1. **Check documentation**: Review the specific guides above
-2. **Enable debug mode**: Use `DEBUG=true` for detailed output
-3. **Verify setup**: Run `k6 version` and check prerequisites
-4. **Test connectivity**: Try `curl $BASE/v1/models`
-5. **Review logs**: Check test output for specific error messages
\ No newline at end of file
diff --git a/tests/automation/auth-postman-scripts.json b/tests/automation/auth-postman-scripts.json
new file mode 100644
index 00000000..fb6e47af
--- /dev/null
+++ b/tests/automation/auth-postman-scripts.json
@@ -0,0 +1,1555 @@
+{
+  "info": {
+    "name": "jan-server Auth & LLM API Flows",
+    "_postman_id": "e3b5366c-069a-4d2a-8491-ae9916bd3df1",
+    "description": "Automated tests for jan-server authentication flows: JWT (guest/registered), API keys (create/validate/revoke), and Kong gateway validation. All API requests go through Kong gateway.",
+    "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
+  },
+  "item": [
+    {
+      "name": "Health Checks",
+      "item": [
+        {
+          "name": "LLM API Health Check",
+          "request": {
+            "method": "GET",
+            "header": [],
+            "url": "{{kong_url}}/healthz"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('health status is 200', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('body reports ok', function () {",
+                  "    var data = pm.response.json();",
+                  "    pm.expect(data.status).to.eql('ok');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Setup",
+      "description": "Bootstrap guest and registered user credentials for downstream tests.",
+      "item": [
+        {
+          "name": "Seed Guest Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{}"
+            },
+            "url": "{{kong_url}}/auth/guest-login",
+            "description": "Provision a new guest and capture issued tokens."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('guest token issued', function () {",
+                  "    pm.response.to.have.status(201);",
+                  "    pm.expect(data).to.have.property('access_token');",
+                  "    pm.expect(data.access_token).to.be.a('string').and.not.empty;",
+                  "    pm.collectionVariables.set('guest_access_token', data.access_token);",
+                  "    pm.collectionVariables.set('guest_refresh_token', data.refresh_token || '');",
+                  "    pm.collectionVariables.set('guest_user_id', data.user_id || '');",
+                  "    pm.collectionVariables.set('guest_principal_id', data.principal_id || '');",
+                  "    pm.collectionVariables.set('guest_username', data.username || data.user_id || '');",
+                  "});",
+                  "pm.test('response includes expiry', function () {",
+                  "    pm.expect(data).to.have.property('expires_in');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Seed Obtain Keycloak Admin Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/x-www-form-urlencoded"
+              }
+            ],
+            "body": {
+              "mode": "urlencoded",
+              "urlencoded": [
+                {
+                  "key": "grant_type",
+                  "value": "password"
+                },
+                {
+                  "key": "client_id",
+                  "value": "admin-cli"
+                },
+                {
+                  "key": "username",
+                  "value": "{{keycloak_admin}}"
+                },
+                {
+                  "key": "password",
+                  "value": "{{keycloak_admin_password}}"
+                }
+              ]
+            },
+            "url": "{{keycloak_base_url}}/realms/master/protocol/openid-connect/token",
+            "description": "Retrieve a master realm admin token to manage users."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('admin token issued', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    pm.expect(data.access_token).to.be.a('string');",
+                  "    pm.collectionVariables.set('kc_admin_access_token', data.access_token);",
+                  "});",
+                  "pm.test('token type is bearer', function () {",
+                  "    pm.expect((data.token_type || '').toLowerCase()).to.eql('bearer');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Seed Create Test User",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{kc_admin_access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"username\": \"{{test_user_username}}\",\n  \"email\": \"{{test_user_email}}\",\n  \"enabled\": true,\n  \"attributes\": {\n    \"guest\": [\"false\"],\n    \"pid\": [\"{{test_user_pid}}\"]\n  }\n}"
+            },
+            "url": "{{keycloak_base_url}}/admin/realms/{{realm}}/users",
+            "description": "Create a dedicated automation user in the jan realm."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const status = pm.response.code;",
+                  "const location = pm.response.headers.get('Location');",
+                  "pm.test('user created or already exists', function () {",
+                  "    pm.expect([201, 204, 409]).to.include(status);",
+                  "});",
+                  "if (status === 201 || status === 204) {",
+                  "    pm.test('location header exposes user id', function () {",
+                  "        pm.expect(location, 'Location header').to.be.a('string').and.not.empty;",
+                  "    });",
+                  "    if (location) {",
+                  "        const id = location.substring(location.lastIndexOf('/') + 1);",
+                  "        pm.collectionVariables.set('test_user_id', id);",
+                  "    } else {",
+                  "        pm.expect.fail('Keycloak response missing user Location header');",
+                  "    }",
+                  "} else if (status === 409) {",
+                  "    const existingId = pm.collectionVariables.get('test_user_id');",
+                  "    if (existingId) {",
+                  "        console.warn('Test user already exists, reusing cached id');",
+                  "    } else {",
+                  "        console.warn('Test user already exists, fetching user id');",
+                  "        const username = pm.collectionVariables.get('test_user_username');",
+                  "        const realm = pm.collectionVariables.get('realm');",
+                  "        const baseUrl = pm.collectionVariables.get('keycloak_base_url');",
+                  "        const token = pm.collectionVariables.get('kc_admin_access_token');",
+                  "        if (!username || !realm || !baseUrl || !token) {",
+                  "            pm.expect.fail('missing context to look up existing user');",
+                  "        } else {",
+                  "            const lookupUrl = `${baseUrl}/admin/realms/${realm}/users?username=${username}`;",
+                  "            pm.sendRequest({",
+                  "                url: lookupUrl,",
+                  "                method: 'GET',",
+                  "                header: [{ key: 'Authorization', value: `Bearer ${token}` }]",
+                  "            }, function (err, res) {",
+                  "                if (err) {",
+                  "                    pm.expect.fail('failed to lookup existing user: ' + err.message);",
+                  "                    return;",
+                  "                }",
+                  "                if (!res || typeof res.json !== 'function') {",
+                  "                    pm.expect.fail('invalid lookup response');",
+                  "                    return;",
+                  "                }",
+                  "                const users = res.json();",
+                  "                if (Array.isArray(users) && users.length > 0 && users[0].id) {",
+                  "                    pm.collectionVariables.set('test_user_id', users[0].id);",
+                  "                } else {",
+                  "                    pm.expect.fail('existing user lookup returned empty result');",
+                  "                }",
+                  "            });",
+                  "        }",
+                  "    }",
+                  "} else {",
+                  "    pm.expect.fail('unexpected response status ' + status);",
+                  "}"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Seed Set Test User Password",
+          "request": {
+            "method": "PUT",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{kc_admin_access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"type\": \"password\",\n  \"value\": \"{{test_user_password}}\",\n  \"temporary\": false\n}"
+            },
+            "url": "{{keycloak_base_url}}/admin/realms/{{realm}}/users/{{test_user_id}}/reset-password",
+            "description": "Assign a permanent password to the automation user."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('password set', function () {",
+                  "    pm.expect(pm.response.code).to.eql(204);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Seed Verify Test User Email",
+          "request": {
+            "method": "PUT",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{kc_admin_access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"emailVerified\": true,\n  \"requiredActions\": []\n}"
+            },
+            "url": "{{keycloak_base_url}}/admin/realms/{{realm}}/users/{{test_user_id}}",
+            "description": "Update the test user to mark email as verified and clear required actions."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('user email verified', function () {",
+                  "    pm.expect(pm.response.code).to.eql(204);",
+                  "});",
+                  "",
+                  "// Wait 1 second for Keycloak sync delay",
+                  "setTimeout(function() {}, 1000);"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Seed Obtain Registered User Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/x-www-form-urlencoded"
+              }
+            ],
+            "body": {
+              "mode": "urlencoded",
+              "urlencoded": [
+                {
+                  "key": "grant_type",
+                  "value": "password"
+                },
+                {
+                  "key": "client_id",
+                  "value": "{{client_id_public}}"
+                },
+                {
+                  "key": "username",
+                  "value": "{{test_user_email}}"
+                },
+                {
+                  "key": "password",
+                  "value": "{{test_user_password}}"
+                }
+              ]
+            },
+            "url": "{{keycloak_base_url}}/realms/{{realm}}/protocol/openid-connect/token",
+            "description": "Simulate a registered user authenticating via direct access grant."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('user token issued', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    pm.expect(data.access_token).to.be.a('string');",
+                  "    pm.collectionVariables.set('user_access_token', data.access_token);",
+                  "    pm.collectionVariables.set('user_refresh_token', data.refresh_token || '');",
+                  "});",
+                  "pm.test('token scoped for realm', function () {",
+                  "    pm.expect((data.token_type || '').toLowerCase()).to.eql('bearer');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "LLM API - Guest Token",
+      "item": [
+        {
+          "name": "List Models (Guest Token)",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Validate that the guest token grants read access to model catalogue."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('models request succeeded', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('models payload is non-empty array', function () {",
+                  "    pm.expect(data).to.have.property('data');",
+                  "    pm.expect(data.data).to.be.an('array').that.is.not.empty;",
+                  "});",
+                  "pm.test('auth method header is jwt', function () {",
+                  "    pm.expect((pm.response.headers.get('X-Auth-Method') || '').toLowerCase()).to.eql('jwt');",
+                  "});",
+                  "",
+                  "// Save first model ID for subsequent tests",
+                  "if (data.data && data.data.length > 0) {",
+                  "    pm.environment.set('model_id', data.data[0].id);",
+                  "    pm.environment.set('model_id_encoded', encodeURIComponent(data.data[0].id));",
+                  "    console.log('Set model_id to:', data.data[0].id);",
+                  "}"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Get Model Details (Guest Token)",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models/catalogs/{{model_id_encoded}}",
+            "description": "Fetch metadata for a specific model catalog."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('model catalog lookup succeeded', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('response contains model catalog data', function () {",
+                  "    pm.expect(data).to.have.property('id');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Create Chat Completion (Guest Token)",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Idempotency-Key",
+                "value": "{{collection_timestamp}}-guest-chat"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{model_id}}\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"Reply with the word ok.\"}\n  ],\n  \"max_tokens\": 16\n}"
+            },
+            "url": "{{kong_url}}/v1/chat/completions",
+            "description": "Run a non-streaming chat completion with the guest token."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('chat completion succeeded', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('chat completion returns choices', function () {",
+                  "    pm.expect(data).to.have.property('choices');",
+                  "    pm.expect(data.choices).to.be.an('array').that.is.not.empty;",
+                  "    const message = data.choices[0] && data.choices[0].message;",
+                  "    pm.expect(message).to.have.property('content');",
+                  "});",
+                  "pm.test('auth method header is jwt', function () {",
+                  "    pm.expect((pm.response.headers.get('X-Auth-Method') || '').toLowerCase()).to.eql('jwt');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "LLM API - User Token",
+      "item": [
+        {
+          "name": "List Models (Registered User)",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{user_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Ensure a registered user token also has catalogue access."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('registered user can list models', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    pm.expect(data).to.have.property('data');",
+                  "});",
+                  "pm.test('principal headers returned', function () {",
+                  "    const principal = pm.response.headers.get('X-Principal-Id');",
+                  "    pm.expect(principal).to.be.a('string').and.not.empty;",
+                  "    pm.expect((pm.response.headers.get('X-Auth-Method') || '').toLowerCase()).to.eql('jwt');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Guest Login Flow",
+      "item": [
+        {
+          "name": "Request Guest Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{}"
+            },
+            "url": "{{kong_url}}/auth/guest-login",
+            "description": "Provision a new guest and capture issued tokens."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('guest token issued', function () {",
+                  "    pm.response.to.have.status(201);",
+                  "    pm.expect(data).to.have.property('access_token');",
+                  "    pm.expect(data.access_token).to.be.a('string').and.not.empty;",
+                  "    pm.collectionVariables.set('guest_access_token', data.access_token);",
+                  "    pm.collectionVariables.set('guest_refresh_token', data.refresh_token || '');",
+                  "    pm.collectionVariables.set('guest_user_id', data.user_id || '');",
+                  "    pm.collectionVariables.set('guest_principal_id', data.principal_id || '');",
+                  "    pm.collectionVariables.set('guest_username', data.username || data.user_id || '');",
+                  "});",
+                  "pm.test('response includes expiry', function () {",
+                  "    pm.expect(data).to.have.property('expires_in');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Upgrade Guest Account",
+          "event": [
+            {
+              "listen": "prerequest",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "let upgradeUsername = pm.collectionVariables.get('guest_upgrade_username');",
+                  "if (!upgradeUsername) {",
+                  "    const base = pm.collectionVariables.get('guest_username') || `guest-${Date.now()}`;",
+                  "    upgradeUsername = `${base}-upgraded`;",
+                  "    pm.collectionVariables.set('guest_upgrade_username', upgradeUsername);",
+                  "    pm.collectionVariables.set('guest_upgrade_email', `${upgradeUsername}@example.com`);",
+                  "}",
+                  "pm.variables.set('upgrade_username', upgradeUsername);",
+                  "pm.variables.set('upgrade_email', pm.collectionVariables.get('guest_upgrade_email'));"
+                ]
+              }
+            },
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('upgrade accepted', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('upgrade response confirms status', function () {",
+                  "    pm.expect(data.status).to.eql('upgraded');",
+                  "});"
+                ]
+              }
+            }
+          ],
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"username\": \"{{upgrade_username}}\",\n  \"email\": \"{{upgrade_email}}\",\n  \"full_name\": \"Guest Automation User\"\n}"
+            },
+            "url": "{{kong_url}}/auth/upgrade",
+            "description": "Upgrade the guest account to a named user."
+          }
+        }
+      ]
+    },
+    {
+      "name": "JWT Login Flow",
+      "item": [
+        {
+          "name": "Obtain Keycloak Admin Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/x-www-form-urlencoded"
+              }
+            ],
+            "body": {
+              "mode": "urlencoded",
+              "urlencoded": [
+                {
+                  "key": "grant_type",
+                  "value": "password"
+                },
+                {
+                  "key": "client_id",
+                  "value": "admin-cli"
+                },
+                {
+                  "key": "username",
+                  "value": "{{keycloak_admin}}"
+                },
+                {
+                  "key": "password",
+                  "value": "{{keycloak_admin_password}}"
+                }
+              ]
+            },
+            "url": "{{keycloak_base_url}}/realms/master/protocol/openid-connect/token",
+            "description": "Retrieve a master realm admin token to manage users."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('admin token issued', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    pm.expect(data.access_token).to.be.a('string');",
+                  "    pm.collectionVariables.set('kc_admin_access_token', data.access_token);",
+                  "});",
+                  "pm.test('token type is bearer', function () {",
+                  "    pm.expect((data.token_type || '').toLowerCase()).to.eql('bearer');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Create Test User",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{kc_admin_access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"username\": \"{{test_user_username}}\",\n  \"email\": \"{{test_user_email}}\",\n  \"enabled\": true,\n  \"attributes\": {\n    \"guest\": [\"false\"],\n    \"pid\": [\"{{test_user_pid}}\"]\n  }\n}"
+            },
+            "url": "{{keycloak_base_url}}/admin/realms/{{realm}}/users",
+            "description": "Create a dedicated automation user in the jan realm."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const status = pm.response.code;",
+                  "const location = pm.response.headers.get('Location');",
+                  "pm.test('user created or already exists', function () {",
+                  "    pm.expect([201, 204, 409]).to.include(status);",
+                  "});",
+                  "if (status === 201 || status === 204) {",
+                  "    pm.test('location header exposes user id', function () {",
+                  "        pm.expect(location, 'Location header').to.be.a('string').and.not.empty;",
+                  "    });",
+                  "    if (location) {",
+                  "        const id = location.substring(location.lastIndexOf('/') + 1);",
+                  "        pm.collectionVariables.set('test_user_id', id);",
+                  "    } else {",
+                  "        pm.expect.fail('Keycloak response missing user Location header');",
+                  "    }",
+                  "} else if (status === 409) {",
+                  "    const existingId = pm.collectionVariables.get('test_user_id');",
+                  "    if (existingId) {",
+                  "        console.warn('Test user already exists, reusing cached id');",
+                  "    } else {",
+                  "        console.warn('Test user already exists, fetching user id');",
+                  "        const username = pm.collectionVariables.get('test_user_username');",
+                  "        const realm = pm.collectionVariables.get('realm');",
+                  "        const baseUrl = pm.collectionVariables.get('keycloak_base_url');",
+                  "        const token = pm.collectionVariables.get('kc_admin_access_token');",
+                  "        if (!username || !realm || !baseUrl || !token) {",
+                  "            pm.expect.fail('missing context to look up existing user');",
+                  "        } else {",
+                  "            const lookupUrl = `${baseUrl}/admin/realms/${realm}/users?username=${username}`;",
+                  "            pm.sendRequest({",
+                  "                url: lookupUrl,",
+                  "                method: 'GET',",
+                  "                header: [{ key: 'Authorization', value: `Bearer ${token}` }]",
+                  "            }, function (err, res) {",
+                  "                if (err) {",
+                  "                    pm.expect.fail('failed to lookup existing user: ' + err.message);",
+                  "                    return;",
+                  "                }",
+                  "                if (!res || typeof res.json !== 'function') {",
+                  "                    pm.expect.fail('invalid lookup response');",
+                  "                    return;",
+                  "                }",
+                  "                const users = res.json();",
+                  "                if (Array.isArray(users) && users.length > 0 && users[0].id) {",
+                  "                    pm.collectionVariables.set('test_user_id', users[0].id);",
+                  "                } else {",
+                  "                    pm.expect.fail('existing user lookup returned empty result');",
+                  "                }",
+                  "            });",
+                  "        }",
+                  "    }",
+                  "} else {",
+                  "    pm.expect.fail('unexpected response status ' + status);",
+                  "}"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Set Test User Password",
+          "request": {
+            "method": "PUT",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{kc_admin_access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"type\": \"password\",\n  \"value\": \"{{test_user_password}}\",\n  \"temporary\": false\n}"
+            },
+            "url": "{{keycloak_base_url}}/admin/realms/{{realm}}/users/{{test_user_id}}/reset-password",
+            "description": "Assign a permanent password to the automation user."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('password set', function () {",
+                  "    pm.expect(pm.response.code).to.eql(204);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Obtain Registered User Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/x-www-form-urlencoded"
+              }
+            ],
+            "body": {
+              "mode": "urlencoded",
+              "urlencoded": [
+                {
+                  "key": "grant_type",
+                  "value": "password"
+                },
+                {
+                  "key": "client_id",
+                  "value": "{{client_id_public}}"
+                },
+                {
+                  "key": "username",
+                  "value": "{{test_user_email}}"
+                },
+                {
+                  "key": "password",
+                  "value": "{{test_user_password}}"
+                }
+              ]
+            },
+            "url": "{{keycloak_base_url}}/realms/{{realm}}/protocol/openid-connect/token",
+            "description": "Simulate a registered user authenticating via direct access grant."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('user token issued', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    pm.expect(data.access_token).to.be.a('string');",
+                  "    pm.collectionVariables.set('user_access_token', data.access_token);",
+                  "    pm.collectionVariables.set('user_refresh_token', data.refresh_token || '');",
+                  "});",
+                  "pm.test('token scoped for realm', function () {",
+                  "    pm.expect((data.token_type || '').toLowerCase()).to.eql('bearer');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "OAuth PKCE Flow (Token-Based)",
+      "description": "Tests for OAuth2 Authorization Code flow with PKCE. Validates token-based authentication with code_challenge/code_verifier exchange.",
+      "item": [
+        {
+          "name": "Initiate OAuth Login (Get Authorization URL)",
+          "request": {
+            "method": "GET",
+            "header": [],
+            "url": {
+              "raw": "{{kong_url}}/auth/login?redirect_url=http://localhost:3000/auth/callback",
+              "host": ["{{kong_url}}"],
+              "path": ["auth", "login"],
+              "query": [
+                {
+                  "key": "redirect_url",
+                  "value": "http://localhost:3000/auth/callback",
+                  "description": "Frontend callback URL for token delivery"
+                }
+              ]
+            },
+            "description": "Initiates OAuth login flow with PKCE. Returns authorization_url with code_challenge parameter."
+          },
+          "event": [
+            {
+              "listen": "prerequest",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "// Clear any cookies from previous tests to ensure fresh state",
+                  "pm.cookies.jar().clear(pm.request.url.toString());"
+                ]
+              }
+            },
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('login initiation succeeds', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('response contains authorization_url', function () {",
+                  "    pm.expect(data).to.have.property('authorization_url');",
+                  "    pm.expect(data.authorization_url).to.be.a('string').and.not.empty;",
+                  "});",
+                  "pm.test('response contains state parameter', function () {",
+                  "    pm.expect(data).to.have.property('state');",
+                  "    pm.expect(data.state).to.be.a('string').and.not.empty;",
+                  "    pm.collectionVariables.set('oauth_state', data.state);",
+                  "});",
+                  "pm.test('authorization_url contains PKCE parameters', function () {",
+                  "    const authUrl = data.authorization_url;",
+                  "    pm.expect(authUrl).to.include('code_challenge=');",
+                  "    pm.expect(authUrl).to.include('code_challenge_method=S256');",
+                  "});",
+                  "pm.test('authorization_url contains required OAuth parameters', function () {",
+                  "    const authUrl = data.authorization_url;",
+                  "    pm.expect(authUrl).to.include('client_id=');",
+                  "    pm.expect(authUrl).to.include('redirect_uri=');",
+                  "    pm.expect(authUrl).to.include('response_type=code');",
+                  "    pm.expect(authUrl).to.include('state=');",
+                  "    pm.expect(authUrl).to.include('scope=openid');",
+                  "});",
+                  "// Store authorization URL for reference",
+                  "pm.collectionVariables.set('oauth_authorization_url', data.authorization_url);"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Verify No Cookie-Based State Storage",
+          "request": {
+            "method": "GET",
+            "header": [],
+            "url": "{{kong_url}}/healthz",
+            "description": "Verifies that no authentication state is stored in cookies (token-based approach)."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('health check succeeds', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('no oauth_state cookie exists', function () {",
+                  "    const cookies = pm.cookies.all();",
+                  "    const stateCookie = cookies.find(c => c.name === 'oauth_state');",
+                  "    pm.expect(stateCookie).to.be.undefined;",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        
+        
+        
+        {
+          "name": "Integration Test - Full OAuth Flow with Real Keycloak",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/x-www-form-urlencoded"
+              }
+            ],
+            "body": {
+              "mode": "urlencoded",
+              "urlencoded": [
+                {
+                  "key": "grant_type",
+                  "value": "password"
+                },
+                {
+                  "key": "client_id",
+                  "value": "{{client_id_public}}"
+                },
+                {
+                  "key": "username",
+                  "value": "{{test_user_email}}"
+                },
+                {
+                  "key": "password",
+                  "value": "{{test_user_password}}"
+                }
+              ]
+            },
+            "url": "{{keycloak_base_url}}/realms/{{realm}}/protocol/openid-connect/token",
+            "description": "Gets a real Keycloak token to verify end-to-end OAuth flow works. This simulates what happens after PKCE exchange completes."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('real keycloak token obtained', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    pm.expect(data.access_token).to.be.a('string').and.not.empty;",
+                  "});",
+                  "pm.test('token can be used with bearer authentication', function () {",
+                  "    pm.expect(data.token_type.toLowerCase()).to.eql('bearer');",
+                  "});",
+                  "// Store for API validation tests",
+                  "pm.collectionVariables.set('oauth_bearer_token', data.access_token);",
+                  "pm.collectionVariables.set('oauth_refresh_token', data.refresh_token || '');"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Test API Call with OAuth Bearer Token",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{oauth_bearer_token}}",
+                "description": "Token obtained via OAuth flow"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Validates that Bearer tokens from OAuth flow work with API endpoints."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('bearer token authentication succeeds', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('models data returned', function () {",
+                  "    const data = pm.response.json();",
+                  "    pm.expect(data).to.have.property('data');",
+                  "    pm.expect(data.data).to.be.an('array');",
+                  "});",
+                  "pm.test('auth method is JWT', function () {",
+                  "    const authMethod = pm.response.headers.get('X-Auth-Method');",
+                  "    pm.expect((authMethod || '').toLowerCase()).to.eql('jwt');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Test Token Refresh Endpoint",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{oauth_refresh_token}}",
+                "description": "Refresh token in Authorization header"
+              }
+            ],
+            "url": "{{kong_url}}/auth/refresh-token",
+            "description": "Tests the token refresh endpoint for renewing access tokens."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const status = pm.response.code;",
+                  "if (status === 200) {",
+                  "    pm.test('token refresh succeeds', function () {",
+                  "        pm.response.to.have.status(200);",
+                  "    });",
+                  "    ",
+                  "    const data = pm.response.json();",
+                  "    pm.test('new tokens returned', function () {",
+                  "        pm.expect(data).to.have.property('access_token');",
+                  "        pm.expect(data.access_token).to.be.a('string').and.not.empty;",
+                  "    });",
+                  "    ",
+                  "    pm.test('refresh token included', function () {",
+                  "        pm.expect(data).to.have.property('refresh_token');",
+                  "    });",
+                  "    ",
+                  "    pm.test('expires_in included', function () {",
+                  "        pm.expect(data).to.have.property('expires_in');",
+                  "        pm.expect(data.expires_in).to.be.a('number').and.above(0);",
+                  "    });",
+                  "} else if (status === 500 || status === 400) {",
+                  "    // May fail if refresh token is invalid/expired",
+                  "    pm.test('refresh endpoint exists', function () {",
+                  "        pm.expect([400, 500]).to.include(status);",
+                  "    });",
+                  "    console.log('Token refresh failed (may be expected):', pm.response.json());",
+                  "} else {",
+                  "    pm.test('unexpected refresh response: ' + status, function () {",
+                  "        pm.expect.fail('Got status: ' + status);",
+                  "    });",
+                  "}"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "API Key Flow",
+      "item": [
+        {
+          "name": "Create API Key",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{user_access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"name\": \"Automation Test Key\",\n  \"expires_in_days\": 90\n}"
+            },
+            "url": "{{kong_url}}/auth/api-keys",
+            "description": "Create a new API key for the authenticated user via Kong gateway."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const body = pm.response.json();",
+                  "pm.test('api key created', function () {",
+                  "    pm.response.to.have.status(201);",
+                  "    pm.expect(body).to.have.property('id');",
+                  "    pm.expect(body).to.have.property('key').that.is.a('string').and.not.empty;",
+                  "});",
+                  "pm.test('key has sk_ prefix', function () {",
+                  "    pm.expect(body.key).to.match(/^sk_/);",
+                  "});",
+                  "pm.test('key metadata returned', function () {",
+                  "    pm.expect(body).to.have.property('name', 'Automation Test Key');",
+                  "    pm.expect(body).to.have.property('prefix');",
+                  "    pm.expect(body).to.have.property('suffix');",
+                  "    pm.expect(body).to.have.property('expires_at');",
+                  "});",
+                  "pm.collectionVariables.set('api_key_id', body.id);",
+                  "pm.collectionVariables.set('api_key_secret', body.key);"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "List API Keys",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{user_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/auth/api-keys",
+            "description": "List active API keys for the authenticated user."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const responseData = pm.response.json();",
+                  "pm.test('list returns items', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    pm.expect(responseData).to.have.property('items').that.is.an('array');",
+                  "});",
+                  "pm.test('created key appears in list', function () {",
+                  "    const keyId = pm.collectionVariables.get('api_key_id');",
+                  "    const found = responseData.items.find(k => k.id === keyId);",
+                  "    pm.expect(found, 'created key not found in list').to.exist;",
+                  "    pm.expect(found.name).to.eql('Automation Test Key');",
+                  "});",
+                  "pm.test('key secrets not exposed in list', function () {",
+                  "    responseData.items.forEach(k => {",
+                  "        pm.expect(k).to.not.have.property('key');",
+                  "        pm.expect(k).to.not.have.property('hash');",
+                  "    });",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Use API Key - List Models",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "X-API-Key",
+                "value": "{{api_key_secret}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Validate API key works to access protected endpoints via Kong."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('api key authenticates successfully', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('models data returned', function () {",
+                  "    pm.expect(data).to.have.property('data').that.is.an('array');",
+                  "});",
+                  "pm.test('auth method header is apikey', function () {",
+                  "    const authMethod = pm.response.headers.get('X-Auth-Method');",
+                  "    pm.expect((authMethod || '').toLowerCase()).to.eql('apikey');",
+                  "});",
+                  "pm.test('user context headers injected', function () {",
+                  "    pm.expect(pm.response.headers.get('X-User-ID')).to.exist;",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Use API Key - Chat Completion",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "X-API-Key",
+                "value": "{{api_key_secret}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{model_id}}\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"Say OK.\"}\n  ],\n  \"max_tokens\": 10\n}"
+            },
+            "url": "{{kong_url}}/v1/chat/completions",
+            "description": "Test API key authentication for chat completion endpoint."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('chat completion with api key succeeds', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('completion returns choices', function () {",
+                  "    pm.expect(data).to.have.property('choices').that.is.an('array').and.not.empty;",
+                  "});",
+                  "pm.test('auth method is apikey', function () {",
+                  "    const authMethod = pm.response.headers.get('X-Auth-Method');",
+                  "    pm.expect((authMethod || '').toLowerCase()).to.eql('apikey');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Test Invalid API Key",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "X-API-Key",
+                "value": "sk_invalid_key_123456789"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Verify invalid API key is rejected by Kong."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('invalid api key rejected', function () {",
+                  "    pm.expect(pm.response.code).to.eql(401);",
+                  "});",
+                  "pm.test('error message indicates auth failure', function () {",
+                  "    const body = pm.response.json();",
+                  "    pm.expect(body).to.have.property('message');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Test No Authentication",
+          "request": {
+            "method": "GET",
+            "header": [],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Verify request without JWT or API key is rejected."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('no auth credentials rejected', function () {",
+                  "    pm.expect(pm.response.code).to.eql(401);",
+                  "});",
+                  "pm.test('unauthorized message returned', function () {",
+                  "    const body = pm.response.json();",
+                  "    pm.expect(body).to.have.property('message');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Delete API Key",
+          "request": {
+            "method": "DELETE",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{user_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/auth/api-keys/{{api_key_id}}",
+            "description": "Revoke the previously created API key."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('api key revoked', function () {",
+                  "    pm.expect(pm.response.code).to.eql(204);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Verify Revoked Key Rejected",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "X-API-Key",
+                "value": "{{api_key_secret}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Confirm revoked API key no longer works."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('revoked api key rejected', function () {",
+                  "    pm.expect(pm.response.code).to.eql(401);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Teardown",
+      "item": [
+        {
+          "name": "Delete Test User",
+          "event": [
+            {
+              "listen": "prerequest",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const userId = pm.collectionVariables.get('test_user_id');",
+                  "if (!userId) {",
+                  "    console.warn('Skipping teardown because test_user_id is not set');",
+                  "    pm.execution.setNextRequest(null);",
+                  "}",
+                  "pm.variables.set('teardown_user_id', userId);"
+                ]
+              }
+            },
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('user removed or already absent', function () {",
+                  "    pm.expect([204, 404]).to.include(pm.response.code);",
+                  "});",
+                  "pm.collectionVariables.unset('test_user_id');",
+                  "pm.collectionVariables.unset('user_access_token');",
+                  "pm.collectionVariables.unset('user_refresh_token');"
+                ]
+              }
+            }
+          ],
+          "request": {
+            "method": "DELETE",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{kc_admin_access_token}}"
+              }
+            ],
+            "url": "{{keycloak_base_url}}/admin/realms/{{realm}}/users/{{teardown_user_id}}",
+            "description": "Remove the automation user created during the run."
+          }
+        }
+      ]
+    }
+  ],
+  "event": [
+    {
+      "listen": "prerequest",
+      "script": {
+        "type": "text/javascript",
+        "exec": [
+          "if (!pm.collectionVariables.get('collection_timestamp')) {",
+          "    pm.collectionVariables.set('collection_timestamp', new Date().toISOString());",
+          "}",
+          "if (!pm.collectionVariables.get('test_user_username')) {",
+          "    pm.collectionVariables.set('test_user_username', `automation-user-${Date.now()}`);",
+          "}",
+          "if (!pm.collectionVariables.get('test_user_password')) {",
+          "    pm.collectionVariables.set('test_user_password', `Passw0rd!${Math.floor(Math.random() * 10000)}`);",
+          "}",
+          "if (!pm.collectionVariables.get('test_user_email')) {",
+          "    pm.collectionVariables.set('test_user_email', `${pm.collectionVariables.get('test_user_username')}@example.com`);",
+          "}",
+          "if (!pm.collectionVariables.get('test_user_pid')) {",
+          "    pm.collectionVariables.set('test_user_pid', pm.collectionVariables.get('test_user_username'));",
+          "}",
+          "const modelId = pm.collectionVariables.get('model_id');",
+          "if (modelId) {",
+          "    pm.collectionVariables.set('model_id_encoded', encodeURIComponent(modelId));",
+          "}"
+        ]
+      }
+    },
+    {
+      "listen": "test",
+      "script": {
+        "type": "text/javascript",
+        "exec": [
+          "try {",
+          "const request = pm.request;",
+          "if (request) {",
+          "    const url = request.url && request.url.toString ? request.url.toString() : String(request.url || '');",
+          "    console.log('>>> Request:', request.method, url);",
+          "    if (request.body) {",
+          "        let bodyText = '';",
+          "        if (request.body.mode === 'raw') {",
+          "            bodyText = request.body.raw || '';",
+          "        } else if (typeof request.body.toJSON === 'function') {",
+          "            try {",
+          "                bodyText = JSON.stringify(request.body.toJSON(), null, 2);",
+          "            } catch (err) {",
+          "                bodyText = String(request.body);",
+          "            }",
+          "        }",
+          "        if (bodyText) {",
+          "            console.log('>>> Request Body:', bodyText);",
+          "        }",
+          "    }",
+          "}",
+          "if (pm.response) {",
+          "    console.log('<<< Response:', pm.response.code, pm.response.status);",
+          "    const raw = pm.response.text();",
+          "    if (raw) {",
+          "        console.log('<<< Response Body:', raw);",
+          "    }",
+          "}",
+          "} catch (err) {",
+          "    console.error('logger failed:', err && err.message ? err.message : err);",
+          "}"
+        ]
+      }
+    }
+  ],
+  "variable": [
+    {
+      "key": "kong_url",
+      "value": "http://localhost:8000",
+      "type": "string",
+      "description": "Kong Gateway URL. All API requests go through Kong for authentication validation."
+    },
+    {
+      "key": "keycloak_base_url",
+      "value": "http://localhost:8085",
+      "type": "string",
+      "description": "Base URL (scheme + host + port) for Keycloak."
+    },
+    {
+      "key": "realm",
+      "value": "jan",
+      "type": "string",
+      "description": "Keycloak realm used by jan-server."
+    },
+    {
+      "key": "client_id_public",
+      "value": "llm-api",
+      "type": "string",
+      "description": "Public client ID used for direct access grants."
+    },
+    {
+      "key": "keycloak_admin",
+      "value": "admin",
+      "type": "string",
+      "description": "Keycloak master realm admin username."
+    },
+    {
+      "key": "keycloak_admin_password",
+      "value": "admin",
+      "type": "string",
+      "description": "Keycloak master realm admin password."
+    }
+  ]
+}
diff --git a/tests/automation/conversations-postman-scripts.json b/tests/automation/conversations-postman-scripts.json
new file mode 100644
index 00000000..91fe65de
--- /dev/null
+++ b/tests/automation/conversations-postman-scripts.json
@@ -0,0 +1,1409 @@
+{
+  "info": {
+    "name": "jan-server Conversations API Flow",
+    "_postman_id": "c4e7f8g9-123b-4c5d-6e7f-8g9h0i1j2k3l",
+    "description": "Automated tests for conversation management including creation, branching, message management, and rating system.",
+    "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
+  },
+  "item": [
+    {
+      "name": "Health Check",
+      "item": [
+        {
+          "name": "LLM API Health Check",
+          "request": {
+            "method": "GET",
+            "header": [],
+            "url": "{{kong_url}}/healthz"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('health status is 200', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('body reports ok', function () {",
+                  "    var data = pm.response.json();",
+                  "    pm.expect(data.status).to.eql('ok');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Authentication",
+      "item": [
+        {
+          "name": "Request Guest Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{}"
+            },
+            "url": "{{kong_url}}/auth/guest-login",
+            "description": "Provision a new guest and capture issued tokens."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('guest token issued', function () {",
+                  "    pm.response.to.have.status(201);",
+                  "    pm.expect(data).to.have.property('access_token');",
+                  "    pm.expect(data.access_token).to.be.a('string').and.not.empty;",
+                  "    pm.collectionVariables.set('access_token', data.access_token);",
+                  "});",
+                  "pm.test('response includes expiry', function () {",
+                  "    pm.expect(data).to.have.property('expires_in');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Model Catalogue",
+      "description": "Capture a usable model id for the chat steps.",
+      "item": [
+        {
+          "name": "List Available Models",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Use the guest token to fetch available models and keep the first id handy."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const payload = pm.response.json();",
+                  "pm.test('models request succeeded', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    pm.expect(payload.data).to.be.an('array').that.is.not.empty;",
+                  "});",
+                  "const defaultModel = payload.data[0];",
+                  "pm.collectionVariables.set('model_id', defaultModel.id);",
+                  "console.log('Saved conversation model_id:', defaultModel.id);"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Project Management",
+      "description": "Comprehensive tests for project CRUD operations, including creation, listing, updates, and deletion.",
+      "item": [
+        {
+          "name": "Create Project - Marketing Campaign",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"name\": \"Marketing Campaign\",\n  \"instruction\": \"You are a marketing expert. Provide creative and data-driven marketing strategies. Always consider ROI and target audience demographics.\"\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project created successfully', function () {",
+                  "    pm.expect(pm.response.code).to.be.oneOf([200, 201]);",
+                  "});",
+                  "",
+                  "pm.test('Response contains project details', function () {",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.object).to.equal('project');",
+                  "    pm.expect(response.id).to.exist;",
+                  "    pm.expect(response.id).to.include('proj_');",
+                  "    pm.expect(response.name).to.equal('Marketing Campaign');",
+                  "    pm.expect(response.instruction).to.include('marketing expert');",
+                  "    pm.expect(response.created_at).to.exist;",
+                  "    pm.expect(response.updated_at).to.exist;",
+                  "    ",
+                  "    pm.collectionVariables.set('project_id_1', response.id);",
+                  "    console.log('Created project_id_1:', response.id);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Create Project - Technical Support",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"name\": \"Technical Support\",\n  \"instruction\": \"You are a technical support specialist. Provide clear, step-by-step troubleshooting guidance. Be patient and ask clarifying questions when needed.\"\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Second project created', function () {",
+                  "    pm.expect(pm.response.code).to.be.oneOf([200, 201]);",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.name).to.equal('Technical Support');",
+                  "    pm.collectionVariables.set('project_id_2', response.id);",
+                  "    console.log('Created project_id_2:', response.id);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Create Project - Personal Assistant",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"name\": \"Personal Assistant\",\n  \"instruction\": \"You are a helpful personal assistant. Help with scheduling, reminders, and general life organization.\"\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Third project created', function () {",
+                  "    pm.expect(pm.response.code).to.be.oneOf([200, 201]);",
+                  "    const response = pm.response.json();",
+                  "    pm.collectionVariables.set('project_id_3', response.id);",
+                  "    console.log('Created project_id_3:', response.id);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Get Single Project",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": {
+              "raw": "{{kong_url}}/v1/projects/{{project_id_1}}",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects", "{{project_id_1}}"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project retrieved successfully', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "",
+                  "pm.test('Project details are correct', function () {",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.id).to.equal(pm.collectionVariables.get('project_id_1'));",
+                  "    pm.expect(response.name).to.equal('Marketing Campaign');",
+                  "    pm.expect(response.instruction).to.exist;",
+                  "    pm.expect(response.object).to.equal('project');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "List All Projects - Page 1",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": {
+              "raw": "{{kong_url}}/v1/projects?limit=2",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects"],
+              "query": [
+                {
+                  "key": "limit",
+                  "value": "2"
+                }
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Projects list retrieved', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "",
+                  "pm.test('Pagination works correctly', function () {",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.object).to.equal('list');",
+                  "    pm.expect(response.data).to.be.an('array');",
+                  "    pm.expect(response.data.length).to.be.at.most(2);",
+                  "    pm.expect(response.has_more).to.exist;",
+                  "    ",
+                  "    if (response.has_more) {",
+                  "        pm.expect(response.next_cursor).to.exist;",
+                  "        pm.collectionVariables.set('next_cursor', response.next_cursor);",
+                  "    }",
+                  "});",
+                  "",
+                  "pm.test('Projects have correct structure', function () {",
+                  "    const response = pm.response.json();",
+                  "    response.data.forEach(project => {",
+                  "        pm.expect(project.object).to.equal('project');",
+                  "        pm.expect(project.id).to.include('proj_');",
+                  "        pm.expect(project.name).to.exist;",
+                  "    });",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "List Projects - Page 2 (with cursor)",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": {
+              "raw": "{{kong_url}}/v1/projects?limit=2&cursor={{next_cursor}}",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects"],
+              "query": [
+                {
+                  "key": "limit",
+                  "value": "2"
+                },
+                {
+                  "key": "cursor",
+                  "value": "{{next_cursor}}"
+                }
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Next page retrieved', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.data).to.be.an('array');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Update Project - Name",
+          "request": {
+            "method": "PATCH",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"name\": \"Marketing Campaign 2024\"\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects/{{project_id_1}}",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects", "{{project_id_1}}"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project name updated', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.name).to.equal('Marketing Campaign 2024');",
+                  "    pm.expect(response.updated_at).to.exist;",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Update Project - Instruction",
+          "request": {
+            "method": "PATCH",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"instruction\": \"You are a marketing expert specializing in digital campaigns. Focus on social media, SEO, and content marketing strategies.\"\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects/{{project_id_1}}",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects", "{{project_id_1}}"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project instruction updated', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.instruction).to.include('digital campaigns');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Update Project - Mark as Favorite",
+          "request": {
+            "method": "PATCH",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"is_favorite\": true\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects/{{project_id_1}}",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects", "{{project_id_1}}"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project marked as favorite', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.is_favorite).to.equal(true);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Update Project - Archive",
+          "request": {
+            "method": "PATCH",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"is_archived\": true\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects/{{project_id_2}}",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects", "{{project_id_2}}"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project archived', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.is_archived).to.equal(true);",
+                  "    pm.expect(response.archived_at).to.exist;",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Update Project - Unarchive",
+          "request": {
+            "method": "PATCH",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"is_archived\": false\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects/{{project_id_2}}",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects", "{{project_id_2}}"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project unarchived', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.is_archived).to.equal(false);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Validation - Create Project with Long Name",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"name\": \"This is a very long project name that exceeds the maximum allowed length of 120 characters and should be rejected by the validation logic in the backend\",\n  \"instruction\": \"Test instruction\"\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Long name rejected', function () {",
+                  "    pm.expect(pm.response.code).to.be.oneOf([400, 422]);",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.error).to.exist;",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Validation - Create Project with Empty Name",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"name\": \"\",\n  \"instruction\": \"Test instruction\"\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/projects",
+              "host": ["{{kong_url}}"],
+              "path": ["v1", "projects"]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Empty name rejected', function () {",
+                  "    pm.expect(pm.response.code).to.be.oneOf([400, 422]);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Basic Conversation Flow",
+      "item": [
+        {
+          "name": "Step 3: Create Conversation",
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "exec": [
+                  "pm.test('[Step 3] Conversation created with title', function () {",
+                  "    pm.expect(pm.response.code).to.be.oneOf([200, 201]);",
+                  "});",
+                  "",
+                  "pm.test('[Step 3] Response contains conversation ID and title', function () {",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.id).to.exist;",
+                  "    pm.expect(response.id).to.include('conv_');",
+                  "    pm.expect(response.title).to.exist;",
+                  "    pm.expect(response.title).to.equal('Test Runner Conversation');",
+                  "    ",
+                  "    // Save conversation ID",
+                  "    pm.collectionVariables.set('conversationId1', response.id);",
+                  "    console.log('Saved conversationId1:', response.id);",
+                  "    console.log('Conversation title:', response.title);",
+                  "});",
+                  "",
+                  "console.log(' Step 3 Complete: Conversation Created with Title');"
+                ],
+                "type": "text/javascript"
+              }
+            }
+          ],
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"title\": \"Test Runner Conversation\" }\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/conversations",
+              "host": [
+                "{{kong_url}}"
+              ],
+              "path": [
+                "v1",
+                "conversations"
+              ]
+            }
+          }
+        },
+        {
+          "name": "Step 4: Verify Conversation Title",
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "exec": [
+                  "pm.test('[Step 4] Get conversation successful', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "",
+                  "pm.test('[Step 4] Conversation title is correct', function () {",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.id).to.equal(pm.collectionVariables.get('conversationId1'));",
+                  "    pm.expect(response.title).to.exist;",
+                  "    pm.expect(response.title).to.equal('Test Runner Conversation');",
+                  "    console.log(' Verified title:', response.title);",
+                  "});",
+                  "",
+                  "console.log(' Step 4 Complete: Title Verified');"
+                ],
+                "type": "text/javascript"
+              }
+            }
+          ],
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": {
+              "raw": "{{kong_url}}/v1/conversations/{{conversationId1}}",
+              "host": [
+                "{{kong_url}}"
+              ],
+              "path": [
+                "v1",
+                "conversations",
+                "{{conversationId1}}"
+              ]
+            }
+          }
+        },
+        {
+          "name": "Step 5: Start Chat with Conversation",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{model_id}}\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"Hello! This is test message #1.\"\n    }\n  ],\n  \"conversation\": {\n    \"id\": \"{{conversationId1}}\"\n  },\n  \"stream\": false,\n  \"max_tokens\": 100\n}"
+            },
+            "url": "{{kong_url}}/v1/chat/completions"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('conversation created with response', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "    pm.expect(data).to.have.property('conversation');",
+                  "});",
+                  "pm.test('conversation has id and title', function () {",
+                  "    pm.expect(data.conversation).to.have.property('id');",
+                  "    pm.expect(data.conversation.id).to.be.a('string');",
+                  "    pm.collectionVariables.set('conversation_id', data.conversation.id);",
+                  "    if (data.conversation.title) {",
+                  "        pm.collectionVariables.set('conversation_title', data.conversation.title);",
+                  "    }",
+                  "});",
+                  "pm.test('response includes choices', function () {",
+                  "    pm.expect(data).to.have.property('choices');",
+                  "    pm.expect(data.choices).to.be.an('array').that.is.not.empty;",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Step 6: Continue Conversation",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{model_id}}\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"Tell me a joke.\"}\n  ],\n  \"conversation\": {\n    \"id\": \"{{conversation_id}}\"\n  }\n}"
+            },
+            "url": "{{kong_url}}/v1/chat/completions"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('conversation continued successfully', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('same conversation id returned', function () {",
+                  "    pm.expect(data.conversation.id).to.eql(pm.collectionVariables.get('conversation_id'));",
+                  "});",
+                  "pm.test('assistant response provided', function () {",
+                  "    pm.expect(data.choices[0].message).to.have.property('role', 'assistant');",
+                  "    pm.expect(data.choices[0].message.content).to.be.a('string').and.not.empty;",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Step 7: Get Conversation Details",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/conversations/{{conversation_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('conversation retrieved successfully', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('conversation has correct structure', function () {",
+                  "    pm.expect(data).to.have.property('id');",
+                  "    pm.expect(data).to.have.property('object', 'conversation');",
+                  "    pm.expect(data).to.have.property('title');",
+                  "    pm.expect(data).to.have.property('created_at');",
+                  "});",
+                  "pm.test('conversation ID matches', function () {",
+                  "    pm.expect(data.id).to.equal(pm.collectionVariables.get('conversation_id'));",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Step 8: List User Conversations",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/conversations?limit=10"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('conversations list retrieved', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('list contains conversations', function () {",
+                  "    pm.expect(data).to.have.property('data');",
+                  "    pm.expect(data.data).to.be.an('array');",
+                  "    pm.expect(data.data.length).to.be.at.least(1);",
+                  "});",
+                  "pm.test('pagination metadata present', function () {",
+                  "    pm.expect(data).to.have.property('total');",
+                  "    pm.expect(data.total).to.be.a('number');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Conversation-Project Relationships",
+      "description": "Tests for conversation-project operations: create in project, list by project, move between projects, and move to global",
+      "item": [
+        {
+          "name": "Step 9: Create Conversation in Project",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"title\": \"Project Conversation - Marketing\",\n  \"project_id\": \"{{project_id_1}}\"\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/conversations",
+              "host": [
+                "{{kong_url}}"
+              ],
+              "path": [
+                "v1",
+                "conversations"
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('[Step 9] Conversation created with project_id', function () {",
+                  "    pm.expect(pm.response.code).to.be.oneOf([200, 201]);",
+                  "});",
+                  "",
+                  "pm.test('[Step 9] Response contains conversation object', function () {",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.id).to.exist;",
+                  "    pm.expect(response.id).to.include('conv_');",
+                  "    pm.expect(response.title).to.equal('Project Conversation - Marketing');",
+                  "    ",
+                  "    // Save conversation ID for later tests",
+                  "    pm.collectionVariables.set('conversation_in_project_1', response.id);",
+                  "    console.log('Saved conversation_in_project_1:', response.id);",
+                  "});",
+                  "",
+                  "console.log('Step 9 Complete: Conversation Created in Project');"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Step 10: Create Second Conversation in Same Project",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"title\": \"Project Conversation - Strategy\",\n  \"project_id\": \"{{project_id_1}}\"\n}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/v1/conversations",
+              "host": [
+                "{{kong_url}}"
+              ],
+              "path": [
+                "v1",
+                "conversations"
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('[Step 10] Second conversation created in project', function () {",
+                  "    pm.expect(pm.response.code).to.be.oneOf([200, 201]);",
+                  "});",
+                  "",
+                  "pm.test('[Step 10] Second conversation has correct structure', function () {",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response.id).to.exist;",
+                  "    pm.expect(response.title).to.equal('Project Conversation - Strategy');",
+                  "    ",
+                  "    pm.collectionVariables.set('conversation_in_project_2', response.id);",
+                  "    console.log('Saved conversation_in_project_2:', response.id);",
+                  "});",
+                  "",
+                  "console.log('Step 10 Complete: Second Conversation Created in Project');"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Step 11: List Conversations by Project",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": {
+              "raw": "{{kong_url}}/v1/conversations?project_id={{project_id_1}}",
+              "host": [
+                "{{kong_url}}"
+              ],
+              "path": [
+                "v1",
+                "conversations"
+              ],
+              "query": [
+                {
+                  "key": "project_id",
+                  "value": "{{project_id_1}}"
+                }
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('[Step 11] Conversations filtered by project_id retrieved successfully', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "",
+                  "pm.test('[Step 11] Response contains conversation data array', function () {",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response).to.have.property('data');",
+                  "    pm.expect(response.data).to.be.an('array');",
+                  "    pm.expect(response.data.length).to.be.at.least(1);",
+                  "    console.log('Found ' + response.data.length + ' conversations in project');",
+                  "});",
+                  "",
+                  "console.log('Step 11 Complete: Conversations Filtered by Project');"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Step 12: Verify Conversations Still Belong to Project",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": {
+              "raw": "{{kong_url}}/v1/conversations?project_id={{project_id_1}}",
+              "host": [
+                "{{kong_url}}"
+              ],
+              "path": [
+                "v1",
+                "conversations"
+              ],
+              "query": [
+                {
+                  "key": "project_id",
+                  "value": "{{project_id_1}}"
+                }
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('[Step 12] Conversations still in project after creation', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "",
+                  "pm.test('[Step 12] Both project conversations are present', function () {",
+                  "    const response = pm.response.json();",
+                  "    const conv1Exists = response.data.some(conv => ",
+                  "        conv.id === pm.collectionVariables.get('conversation_in_project_1')",
+                  "    );",
+                  "    const conv2Exists = response.data.some(conv => ",
+                  "        conv.id === pm.collectionVariables.get('conversation_in_project_2')",
+                  "    );",
+                  "    pm.expect(conv1Exists).to.be.true;",
+                  "    pm.expect(conv2Exists).to.be.true;",
+                  "    console.log('Confirmed: Both conversations remain in project_id_1');",
+                  "});",
+                  "",
+                  "console.log('Step 12 Complete: Verified Conversations in Project');"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Step 13: List Conversations without Project Filter (Global + Project)",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": {
+              "raw": "{{kong_url}}/v1/conversations?limit=100",
+              "host": [
+                "{{kong_url}}"
+              ],
+              "path": [
+                "v1",
+                "conversations"
+              ],
+              "query": [
+                {
+                  "key": "limit",
+                  "value": "100"
+                }
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('[Step 13] Global conversations list retrieved successfully', function () {",
+                  "    pm.response.to.have.status(200);",
+                  "});",
+                  "",
+                  "pm.test('[Step 13] All conversations (global and project-scoped) are returned', function () {",
+                  "    const response = pm.response.json();",
+                  "    pm.expect(response).to.have.property('data');",
+                  "    pm.expect(response.data).to.be.an('array');",
+                  "    pm.expect(response.data.length).to.be.at.least(3);",
+                  "    ",
+                  "    // Verify we have both project conversations",
+                  "    const conv1Exists = response.data.some(conv => ",
+                  "        conv.id === pm.collectionVariables.get('conversation_in_project_1')",
+                  "    );",
+                  "    const conv2Exists = response.data.some(conv => ",
+                  "        conv.id === pm.collectionVariables.get('conversation_in_project_2')",
+                  "    );",
+                  "    pm.expect(conv1Exists).to.be.true;",
+                  "    pm.expect(conv2Exists).to.be.true;",
+                  "    console.log('Confirmed: All conversations (global and project) returned in list');",
+                  "});",
+                  "",
+                  "console.log('Step 13 Complete: Verified Global Conversation List');"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Cleanup",
+      "item": [
+        {
+          "name": "Delete Conversation",
+          "request": {
+            "method": "DELETE",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/conversations/{{conversation_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('conversation deleted or not found', function () {",
+                  "    pm.expect([200, 204, 404]).to.include(pm.response.code);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Delete Project 1",
+          "request": {
+            "method": "DELETE",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/projects/{{project_id_1}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project 1 deleted (soft-delete)', function () {",
+                  "    pm.expect([200, 204]).to.include(pm.response.code);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Delete Project 2",
+          "request": {
+            "method": "DELETE",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/projects/{{project_id_2}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project 2 deleted', function () {",
+                  "    pm.expect([200, 204]).to.include(pm.response.code);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Delete Project 3",
+          "request": {
+            "method": "DELETE",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/projects/{{project_id_3}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Project 3 deleted', function () {",
+                  "    pm.expect([200, 204]).to.include(pm.response.code);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Verify Deleted Project Not Found",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/projects/{{project_id_1}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Deleted project returns 404', function () {",
+                  "    pm.response.to.have.status(404);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    }
+  ],
+  "variable": [
+    {
+      "key": "kong_url",
+      "value": "http://localhost:8000",
+      "type": "string"
+    },
+    {
+      "key": "model_id",
+      "value": "",
+      "type": "string"
+    },
+    {
+      "key": "access_token",
+      "value": "",
+      "type": "string",
+      "description": "Set this from auth flow or manually"
+    },
+    {
+      "key": "project_id_1",
+      "value": "",
+      "type": "string"
+    },
+    {
+      "key": "project_id_2",
+      "value": "",
+      "type": "string"
+    },
+    {
+      "key": "project_id_3",
+      "value": "",
+      "type": "string"
+    },
+    {
+      "key": "next_cursor",
+      "value": "",
+      "type": "string"
+    }
+  ]
+}
+
diff --git a/tests/automation/mcp-postman-scripts.json b/tests/automation/mcp-postman-scripts.json
new file mode 100644
index 00000000..ac58edba
--- /dev/null
+++ b/tests/automation/mcp-postman-scripts.json
@@ -0,0 +1,724 @@
+{
+  "info": {
+    "name": "MCP Tools & SearXNG Smoke",
+    "description": "Guest auth + MCP tools listing/search/scrape plus direct SearXNG checks.",
+    "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
+  },
+  "variable": [
+    {
+      "key": "kong_url",
+      "value": "http://localhost:8080"
+    },
+    {
+      "key": "mcp_tools_url",
+      "value": "http://localhost:8091"
+    },
+    {
+      "key": "searxng_url",
+      "value": "http://localhost:8086"
+    },
+    {
+      "key": "guest_access_token",
+      "value": ""
+    }
+  ],
+  "item": [
+    {
+      "name": "Guest Auth",
+      "item": [
+        {
+          "name": "Request Guest Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{}"
+            },
+            "url": "{{kong_url}}/auth/guest-login"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const guestData = pm.response.json();",
+                  "pm.test('Guest token issued', function () {",
+                  "  pm.response.to.have.status(201);",
+                  "  pm.expect(guestData).to.have.property('access_token');",
+                  "  pm.collectionVariables.set('guest_access_token', guestData.access_token);",
+                  "});",
+                  "pm.test('Response includes expiry', function () {",
+                  "  pm.expect(guestData).to.have.property('expires_in');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "MCP Search Domain Filter",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/call\",\n  \"params\": {\n    \"name\": \"google_search\",\n    \"arguments\": {\n      \"q\": \"Example Domain\",\n      \"domain_allow_list\": [\"example.com\"]\n    }\n  },\n  \"id\": 4\n}"
+            },
+            "url": "{{mcp_tools_url}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const rawBody = pm.response.text();",
+                  "let payload = null;",
+                  "try {",
+                  "  payload = JSON.parse(rawBody);",
+                  "} catch (err) {",
+                  "  const lines = rawBody.split('\\n');",
+                  "  for (const line of lines) {",
+                  "    if (line.startsWith('data: ')) {",
+                  "      try {",
+                  "        payload = JSON.parse(line.substring(6));",
+                  "        break;",
+                  "      } catch (e) {",
+                  "        console.error('Failed to parse SSE payload', e);",
+                  "      }",
+                  "    }",
+                  "  }",
+                  "}",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "const content = (payload && payload.result && payload.result.content) || [];",
+                  "const textBlock = content.find((block) => block.type === 'text');",
+                  "let structured = null;",
+                  "try {",
+                  "  structured = textBlock ? JSON.parse(textBlock.text) : null;",
+                  "} catch (err) {",
+                  "  console.error('Failed to parse structured search payload', err);",
+                  "}",
+                  "pm.test('Domain filter enforces example.com citations', function () {",
+                  "  pm.expect(structured).to.be.an('object');",
+                  "  pm.expect(structured.results).to.be.an('array');",
+                  "  pm.expect(structured.results.length).to.be.greaterThan(0);",
+                  "  structured.results.forEach((res) => pm.expect(res.source_url).to.include('example.com'));",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "MCP Search Offline Mode",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/call\",\n  \"params\": {\n    \"name\": \"google_search\",\n    \"arguments\": {\n      \"q\": \"offline mode smoke test\",\n      \"offline_mode\": true\n    }\n  },\n  \"id\": 5\n}"
+            },
+            "url": "{{mcp_tools_url}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const rawBody = pm.response.text();",
+                  "let payload = null;",
+                  "try {",
+                  "  payload = JSON.parse(rawBody);",
+                  "} catch (err) {",
+                  "  const lines = rawBody.split('\\n');",
+                  "  for (const line of lines) {",
+                  "    if (line.startsWith('data: ')) {",
+                  "      try {",
+                  "        payload = JSON.parse(line.substring(6));",
+                  "        break;",
+                  "      } catch (e) {",
+                  "        console.error('Failed to parse SSE payload', e);",
+                  "      }",
+                  "    }",
+                  "  }",
+                  "}",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "const content = (payload && payload.result && payload.result.content) || [];",
+                  "const textBlock = content.find((block) => block.type === 'text');",
+                  "let structured = null;",
+                  "try {",
+                  "  structured = textBlock ? JSON.parse(textBlock.text) : null;",
+                  "} catch (err) {",
+                  "  console.error('Failed to parse structured search payload', err);",
+                  "}",
+                  "pm.test('Offline mode returns cache status and non-live flag', function () {",
+                  "  pm.expect(structured).to.be.an('object');",
+                  "  pm.expect(structured.cache_status).to.match(/offline|fallback/);",
+                  "  pm.expect(structured.live).to.eql(false);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "MCP Tools",
+      "item": [
+        {
+          "name": "List MCP Tools",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/list\",\n  \"id\": 1\n}"
+            },
+            "url": "{{mcp_tools_url}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const rawBody = pm.response.text();",
+                  "let payload = null;",
+                  "try {",
+                  "  payload = JSON.parse(rawBody);",
+                  "} catch (err) {",
+                  "  const lines = rawBody.split('\\n');",
+                  "  for (const line of lines) {",
+                  "    if (line.startsWith('data: ')) {",
+                  "      try {",
+                  "        payload = JSON.parse(line.substring(6));",
+                  "        break;",
+                  "      } catch (e) {",
+                  "        console.error('Failed to parse SSE payload', e);",
+                  "      }",
+                  "    }",
+                  "  }",
+                  "}",
+                  "pm.variables.set('mcp_payload', JSON.stringify(payload || {}));",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('Parsed MCP payload', function () {",
+                  "  pm.expect(payload).to.be.an('object');",
+                  "});",
+                  "const parsed = payload && payload.result && payload.result.tools ? payload.result.tools : [];",
+                  "pm.test('Tools list returned', function () {",
+                  "  pm.expect(parsed.length).to.be.greaterThan(0);",
+                  "});",
+                  "pm.collectionVariables.set('mcp_tool_count', parsed.length);"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Serper Search via MCP",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/call\",\n  \"params\": {\n    \"name\": \"google_search\",\n    \"arguments\": {\n      \"q\": \"Model Context Protocol\"\n    }\n  },\n  \"id\": 2\n}"
+            },
+            "url": "{{mcp_tools_url}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const rawBody = pm.response.text();",
+                  "let payload = null;",
+                  "try {",
+                  "  payload = JSON.parse(rawBody);",
+                  "} catch (err) {",
+                  "  const lines = rawBody.split('\\n');",
+                  "  for (const line of lines) {",
+                  "    if (line.startsWith('data: ')) {",
+                  "      try {",
+                  "        payload = JSON.parse(line.substring(6));",
+                  "        break;",
+                  "      } catch (e) {",
+                  "        console.error('Failed to parse SSE payload', e);",
+                  "      }",
+                  "    }",
+                  "  }",
+                  "}",
+                  "pm.variables.set('mcp_payload', JSON.stringify(payload || {}));",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('Parsed MCP payload', function () {",
+                  "  pm.expect(payload).to.be.an('object');",
+                  "});",
+                  "const content = (payload && payload.result && payload.result.content) || [];",
+                  "const textBlock = content.find((block) => block.type === 'text');",
+                  "pm.test('Search payload contains structured text', function () {",
+                  "  pm.expect(textBlock).to.exist;",
+                  "  pm.expect(textBlock.text.length).to.be.greaterThan(0);",
+                  "});",
+                  "let structured = null;",
+                  "try {",
+                  "  structured = textBlock ? JSON.parse(textBlock.text) : null;",
+                  "} catch (err) {",
+                  "  console.error('Failed to parse structured search payload', err);",
+                  "}",
+                  "pm.test('Structured search payload has results and citations', function () {",
+                  "  pm.expect(structured).to.be.an('object');",
+                  "  pm.expect(structured.results).to.be.an('array');",
+                  "  pm.expect(structured.results.length).to.be.greaterThan(0);",
+                  "  pm.expect(structured.results[0]).to.have.property('source_url');",
+                  "  pm.expect(structured.results[0]).to.have.property('cache_status');",
+                  "  pm.expect(structured).to.have.property('citations');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Serper Scrape via MCP",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/call\",\n  \"params\": {\n    \"name\": \"scrape\",\n    \"arguments\": {\n      \"url\": \"https://example.com\"\n    }\n  },\n  \"id\": 3\n}"
+            },
+            "url": "{{mcp_tools_url}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const rawBody = pm.response.text();",
+                  "let payload = null;",
+                  "try {",
+                  "  payload = JSON.parse(rawBody);",
+                  "} catch (err) {",
+                  "  const lines = rawBody.split('\\n');",
+                  "  for (const line of lines) {",
+                  "    if (line.startsWith('data: ')) {",
+                  "      try {",
+                  "        payload = JSON.parse(line.substring(6));",
+                  "        break;",
+                  "      } catch (e) {",
+                  "        console.error('Failed to parse SSE payload', e);",
+                  "      }",
+                  "    }",
+                  "  }",
+                  "}",
+                  "pm.variables.set('mcp_payload', JSON.stringify(payload || {}));",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('Parsed MCP payload', function () {",
+                  "  pm.expect(payload).to.be.an('object');",
+                  "});",
+                  "const content = (payload && payload.result && payload.result.content) || [];",
+                  "const textBlock = content.find((block) => block.type === 'text');",
+                  "pm.test('Scrape payload contains structured text', function () {",
+                  "  pm.expect(textBlock).to.exist;",
+                  "  pm.expect(textBlock.text.length).to.be.greaterThan(0);",
+                  "});",
+                  "let structured = null;",
+                  "try {",
+                  "  structured = textBlock ? JSON.parse(textBlock.text) : null;",
+                  "} catch (err) {",
+                  "  console.error('Failed to parse structured scrape payload', err);",
+                  "}",
+                  "pm.test('Structured scrape payload exposes preview and cache status', function () {",
+                  "  pm.expect(structured).to.be.an('object');",
+                  "  pm.expect(structured).to.have.property('text');",
+                  "  pm.expect(structured).to.have.property('text_preview');",
+                  "  pm.expect(structured).to.have.property('cache_status');",
+                  "});",
+                  "pm.test('Scrape payload mentions example.com', function () {",
+                  "  pm.expect(structured && structured.text).to.include('Example Domain');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "File Search Index",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/call\",\n  \"params\": {\n    \"name\": \"file_search_index\",\n    \"arguments\": {\n      \"document_id\": \"postman-doc-1\",\n      \"text\": \"Postman MCP automation smoke guide covering Model Context Protocol flows.\",\n      \"metadata\": {\n        \"category\": \"kb\",\n        \"version\": \"1.0\"\n      },\n      \"tags\": [\"automation\", \"kb\"]\n    }\n  },\n  \"id\": 6\n}"
+            },
+            "url": "{{mcp_tools_url}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const rawBody = pm.response.text();",
+                  "let payload = null;",
+                  "try {",
+                  "  payload = JSON.parse(rawBody);",
+                  "} catch (err) {",
+                  "  const lines = rawBody.split('\\n');",
+                  "  for (const line of lines) {",
+                  "    if (line.startsWith('data: ')) {",
+                  "      try {",
+                  "        payload = JSON.parse(line.substring(6));",
+                  "        break;",
+                  "      } catch (e) {",
+                  "        console.error('Failed to parse SSE payload', e);",
+                  "      }",
+                  "    }",
+                  "  }",
+                  "}",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "const content = (payload && payload.result && payload.result.content) || [];",
+                  "const textBlock = content.find((block) => block.type === 'text');",
+                  "let structured = null;",
+                  "try {",
+                  "  structured = textBlock ? JSON.parse(textBlock.text) : null;",
+                  "} catch (err) {",
+                  "  console.error('Failed to parse structured indexing payload', err);",
+                  "}",
+                  "pm.test('Vector store returned indexed status', function () {",
+                  "  pm.expect(structured).to.be.an('object');",
+                  "  pm.expect(structured.status || structured.Status).to.be.oneOf(['indexed', 'created']);",
+                  "});",
+                  "pm.collectionVariables.set('file_doc_id', 'postman-doc-1');"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "File Search Query",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/call\",\n  \"params\": {\n    \"name\": \"file_search_query\",\n    \"arguments\": {\n      \"query\": \"automation smoke\",\n      \"top_k\": 3,\n      \"document_ids\": [\"postman-doc-1\"]\n    }\n  },\n  \"id\": 7\n}"
+            },
+            "url": "{{mcp_tools_url}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const rawBody = pm.response.text();",
+                  "let payload = null;",
+                  "try {",
+                  "  payload = JSON.parse(rawBody);",
+                  "} catch (err) {",
+                  "  const lines = rawBody.split('\\n');",
+                  "  for (const line of lines) {",
+                  "    if (line.startsWith('data: ')) {",
+                  "      try {",
+                  "        payload = JSON.parse(line.substring(6));",
+                  "        break;",
+                  "      } catch (e) {",
+                  "        console.error('Failed to parse SSE payload', e);",
+                  "      }",
+                  "    }",
+                  "  }",
+                  "}",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "const content = (payload && payload.result && payload.result.content) || [];",
+                  "const textBlock = content.find((block) => block.type === 'text');",
+                  "let structured = null;",
+                  "try {",
+                  "  structured = textBlock ? JSON.parse(textBlock.text) : null;",
+                  "} catch (err) {",
+                  "  console.error('Failed to parse structured search payload', err);",
+                  "}",
+                  "pm.test('Vector search returns our indexed document', function () {",
+                  "  pm.expect(structured).to.be.an('object');",
+                  "  pm.expect(structured.results).to.be.an('array').that.is.not.empty;",
+                  "  const first = structured.results[0];",
+                  "  pm.expect(first.document_id).to.eql('postman-doc-1');",
+                  "  pm.expect(first.text_preview).to.include('automation');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "SandboxFusion Python Exec",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/call\",\n  \"params\": {\n    \"name\": \"python_exec\",\n    \"arguments\": {\n      \"code\": \"print(\\\"hello from sandbox\\\")\",\n      \"language\": \"python\",\n      \"approved\": true\n    }\n  },\n  \"id\": 8\n}"
+            },
+            "url": "{{mcp_tools_url}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const rawBody = pm.response.text();",
+                  "let payload = null;",
+                  "try {",
+                  "  payload = JSON.parse(rawBody);",
+                  "} catch (err) {",
+                  "  const lines = rawBody.split('\\n');",
+                  "  for (const line of lines) {",
+                  "    if (line.startsWith('data: ')) {",
+                  "      try {",
+                  "        payload = JSON.parse(line.substring(6));",
+                  "        break;",
+                  "      } catch (e) {",
+                  "        console.error('Failed to parse SSE payload', e);",
+                  "      }",
+                  "    }",
+                  "  }",
+                  "}",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "const content = (payload && payload.result && payload.result.content) || [];",
+                  "const textBlock = content.find((block) => block.type === 'text');",
+                  "let structured = null;",
+                  "try {",
+                  "  structured = textBlock ? JSON.parse(textBlock.text) : null;",
+                  "} catch (err) {",
+                  "  console.error('Failed to parse sandboxfusion payload', err);",
+                  "}",
+                  "pm.test('SandboxFusion returned stdout', function () {",
+                  "  pm.expect(structured).to.be.an('object');",
+                  "  pm.expect(structured.stdout || '').to.include('hello from sandbox');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "SearXNG",
+      "item": [
+        {
+          "name": "SearXNG HTML Search",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "X-Forwarded-For",
+                "value": "127.0.0.1"
+              },
+              {
+                "key": "X-Real-IP",
+                "value": "127.0.0.1"
+              },
+              {
+                "key": "User-Agent",
+                "value": "PostmanRuntime/7.36"
+              },
+              {
+                "key": "Accept",
+                "value": "text/html"
+              }
+            ],
+            "url": {
+              "raw": "{{searxng_url}}/search?q=Model+Context+Protocol",
+              "host": [
+                "{{searxng_url}}"
+              ],
+              "path": [
+                "search"
+              ],
+              "query": [
+                {
+                  "key": "q",
+                  "value": "Model Context Protocol"
+                }
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const searxHtml = pm.response.text();",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('SearXNG page contains query text', function () {",
+                  "  pm.expect(searxHtml).to.include('SearXNG');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "SearXNG Text Scrape",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "X-Forwarded-For",
+                "value": "127.0.0.1"
+              },
+              {
+                "key": "X-Real-IP",
+                "value": "127.0.0.1"
+              },
+              {
+                "key": "User-Agent",
+                "value": "PostmanRuntime/7.36"
+              },
+              {
+                "key": "Accept",
+                "value": "text/html"
+              }
+            ],
+            "url": {
+              "raw": "{{searxng_url}}/search?q=https://example.com",
+              "host": [
+                "{{searxng_url}}"
+              ],
+              "path": [
+                "search"
+              ],
+              "query": [
+                {
+                  "key": "q",
+                  "value": "https://example.com"
+                }
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const searxText = pm.response.text();",
+                  "pm.test('Status code is 200', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('Body contains text', function () {",
+                  "  pm.expect(searxText.length).to.be.greaterThan(0);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
+
diff --git a/tests/automation/media-postman-scripts.json b/tests/automation/media-postman-scripts.json
new file mode 100644
index 00000000..b71fd577
--- /dev/null
+++ b/tests/automation/media-postman-scripts.json
@@ -0,0 +1,855 @@
+{
+  "info": {
+    "name": "Media API Automation",
+    "description": "Comprehensive Postman collection for media-api endpoints including upload, deduplication, resolution, presigning, and download flows via Kong gateway. All requests go through kong_url.",
+    "_postman_id": "media-api-automation-collection",
+    "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
+  },
+  "item": [
+    {
+      "name": "Authentication",
+      "item": [
+        {
+          "name": "Request Guest Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{}"
+            },
+            "url": {
+              "raw": "{{kong_url}}/auth/guest-login",
+              "host": [
+                "{{kong_url}}"
+              ],
+              "path": [
+                "auth",
+                "guest-login"
+              ]
+            }
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var data = pm.response.json();",
+                  "pm.test('guest token issued', function () {",
+                  "    pm.response.to.have.status(201);",
+                  "    pm.expect(data.access_token).to.be.a('string').and.not.empty;",
+                  "});",
+                  "pm.collectionVariables.set('media_access_token', data.access_token);",
+                  "pm.collectionVariables.set('media_user_id', data.user_id || '');"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "Health Check",
+      "request": {
+        "method": "GET",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}",
+            "type": "text"
+          }
+        ],
+        "url": {
+          "raw": "{{kong_url}}/media/healthz",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "healthz"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('health status is 200', function () {",
+              "    pm.response.to.have.status(200);",
+              "});",
+              "pm.test('body reports healthy/ok', function () {",
+              "    var data = pm.response.json();",
+              "    pm.expect(['ok', 'healthy']).to.include(data.status);",
+              "});"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Prepare Upload (Get Presigned URL)",
+      "request": {
+        "method": "POST",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "Content-Type",
+            "value": "application/json"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "body": {
+          "mode": "raw",
+          "raw": "{\n  \"mime_type\": \"image/png\",\n  \"user_id\": \"automation\"\n}"
+        },
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media/prepare-upload",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media",
+            "prepare-upload"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('prepare upload successful', function () {",
+              "    pm.response.to.have.status(200);",
+              "});",
+              "",
+              "var data = pm.response.json();",
+              "",
+              "pm.test('response contains jan_id', function () {",
+              "    pm.expect(data).to.have.property('id');",
+              "    pm.expect(data.id).to.match(/^jan_[a-z0-9]+$/);",
+              "});",
+              "",
+              "pm.test('response contains presigned upload URL', function () {",
+              "    pm.expect(data).to.have.property('upload_url');",
+              "    pm.expect(data.upload_url).to.be.a('string');",
+              "    pm.expect(data.upload_url).to.match(/https?:\\/\\//);",
+              "});",
+              "",
+              "pm.test('response contains mime type', function () {",
+              "    pm.expect(data).to.have.property('mime_type');",
+              "    pm.expect(data.mime_type).to.eql('image/png');",
+              "});",
+              "",
+              "pm.test('response contains expires_in', function () {",
+              "    pm.expect(data).to.have.property('expires_in');",
+              "    pm.expect(data.expires_in).to.be.a('number').above(0);",
+              "});",
+              "",
+              "// Store for client-side upload simulation",
+              "if (data.id && data.upload_url) {",
+              "    pm.collectionVariables.set('presigned_upload_id', data.id);",
+              "    pm.collectionVariables.set('presigned_upload_url', data.upload_url);",
+              "}"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Ingest Media (Remote URL)",
+      "request": {
+        "method": "POST",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "Content-Type",
+            "value": "application/json"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "body": {
+          "mode": "raw",
+          "raw": "{\n  \"source\": {\n    \"type\": \"remote_url\",\n    \"url\": \"https://www.jan.ai/_next/static/media/cute-robot-flying.1479447f.png\"\n  },\n  \"filename\": \"httpbin.png\",\n  \"user_id\": \"automation\"\n}"
+        },
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('upload successful', function () {",
+              "    pm.response.to.have.status(200);",
+              "});",
+              "",
+              "var data = pm.response.json();",
+              "",
+              "pm.test('response contains media id', function () {",
+              "    pm.expect(data).to.have.property('id');",
+              "    pm.expect(data.id).to.be.a('string');",
+              "    pm.expect(data.id).to.match(/^jan_[a-z0-9]+$/);",
+              "});",
+              "",
+              "pm.test('response contains mime type', function () {",
+              "    pm.expect(data).to.have.property('mime');",
+              "    pm.expect(data.mime).to.match(/^image\\//);",
+              "});",
+              "",
+              "pm.test('response contains bytes size', function () {",
+              "    pm.expect(data).to.have.property('bytes');",
+              "    pm.expect(data.bytes).to.be.above(0);",
+              "});",
+              "",
+              "pm.test('response indicates deduplication status', function () {",
+              "    pm.expect(data).to.have.property('deduped');",
+              "    pm.expect(data.deduped).to.be.a('boolean');",
+              "});",
+              "",
+              "pm.test('response contains presigned URL', function () {",
+              "    pm.expect(data).to.have.property('presigned_url');",
+              "    if (data.presigned_url) {",
+              "        pm.expect(data.presigned_url).to.be.a('string');",
+              "        pm.expect(data.presigned_url).to.match(/https?:\\/\\//);",
+              "    }",
+              "});",
+              "",
+              "// Store the media ID for subsequent tests",
+              "if (data.id) {",
+              "    pm.collectionVariables.set('latest_media_id', data.id);",
+              "    pm.collectionVariables.set('latest_media_mime', data.mime);",
+              "    pm.collectionVariables.set('latest_media_bytes', data.bytes);",
+              "    if (data.presigned_url) {",
+              "        pm.collectionVariables.set('latest_presigned_url', data.presigned_url);",
+              "    }",
+              "}"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Ingest Media (Data URL)",
+      "request": {
+        "method": "POST",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "Content-Type",
+            "value": "application/json"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "body": {
+          "mode": "raw",
+          "raw": "{\n  \"source\": {\n    \"type\": \"data_url\",\n    \"data_url\": \"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==\"\n  },\n  \"filename\": \"test-pixel.png\",\n  \"user_id\": \"automation\"\n}"
+        },
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('upload successful', function () {",
+              "    pm.response.to.have.status(200);",
+              "});",
+              "",
+              "var data = pm.response.json();",
+              "",
+              "pm.test('response contains media id', function () {",
+              "    pm.expect(data).to.have.property('id');",
+              "    pm.expect(data.id).to.match(/^jan_[a-z0-9]+$/);",
+              "});",
+              "",
+              "pm.test('mime type is image', function () {",
+              "    pm.expect(data.mime).to.match(/^image\\//);",
+              "});",
+              "",
+              "pm.test('presigned URL provided', function () {",
+              "    pm.expect(data).to.have.property('presigned_url');",
+              "    if (data.presigned_url) {",
+              "        pm.expect(data.presigned_url).to.match(/https?:\\/\\//);",
+              "    }",
+              "});",
+              "",
+              "// Store for deduplication test and downstream flows",
+              "pm.collectionVariables.set('dataurl_media_id', data.id);",
+              "pm.collectionVariables.set('dataurl_deduped_first', data.deduped);",
+              "pm.collectionVariables.set('latest_media_id', data.id);",
+              "pm.collectionVariables.set('latest_media_mime', data.mime);",
+              "pm.collectionVariables.set('latest_media_bytes', data.bytes);",
+              "if (data.presigned_url) {",
+              "    pm.collectionVariables.set('latest_presigned_url', data.presigned_url);",
+              "}"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Test Deduplication (Upload Same Data URL)",
+      "request": {
+        "method": "POST",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "Content-Type",
+            "value": "application/json"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "body": {
+          "mode": "raw",
+          "raw": "{\n  \"source\": {\n    \"type\": \"data_url\",\n    \"data_url\": \"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==\"\n  },\n  \"filename\": \"test-pixel-duplicate.png\",\n  \"user_id\": \"automation\"\n}"
+        },
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('upload successful', function () {",
+              "    pm.response.to.have.status(200);",
+              "});",
+              "",
+              "var data = pm.response.json();",
+              "var firstId = pm.collectionVariables.get('dataurl_media_id');",
+              "",
+              "pm.test('deduplication returns same id', function () {",
+              "    pm.expect(data.id).to.eql(firstId);",
+              "});",
+              "",
+              "pm.test('deduped flag is true', function () {",
+              "    pm.expect(data.deduped).to.be.true;",
+              "});"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Resolve Payload with jan_* Placeholder",
+      "request": {
+        "method": "POST",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "Content-Type",
+            "value": "application/json"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "body": {
+          "mode": "raw",
+          "raw": "{\n  \"payload\": {\n    \"messages\": [\n      {\n        \"role\": \"user\",\n        \"content\": [\n          {\n            \"type\": \"text\",\n            \"text\": \"What's in this image?\"\n          },\n          {\n            \"type\": \"image_url\",\n            \"image_url\": {\n              \"url\": \"data:image/png;{{latest_media_id}}\"\n            }\n          }\n        ]\n      }\n    ]\n  }\n}"
+        },
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media/resolve",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media",
+            "resolve"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('resolve successful', function () {",
+              "    pm.response.to.have.status(200);",
+              "});",
+              "",
+              "var data = pm.response.json();",
+              "",
+              "pm.test('payload is returned', function () {",
+              "    pm.expect(data).to.have.property('payload');",
+              "    pm.expect(data.payload).to.be.an('object');",
+              "});",
+              "",
+              "pm.test('jan_* placeholder replaced with signed URL', function () {",
+              "    var message = (data.payload.messages || [])[0];",
+              "    pm.expect(message).to.be.an('object');",
+              "    var imagePart = (message.content || []).find(function (part) {",
+              "        return part.type === 'image_url';",
+              "    });",
+              "    pm.expect(imagePart).to.be.an('object');",
+              "    pm.expect(imagePart.image_url).to.have.property('url');",
+              "    pm.expect(imagePart.image_url.url).to.match(/^https?:\\/\\//);",
+              "    pm.expect(imagePart.image_url.url).to.not.match(/^data:image/);",
+              "});",
+              "",
+              "pm.test('messages structure preserved', function () {",
+              "    pm.expect(data.payload.messages).to.be.an('array');",
+              "    pm.expect(data.payload.messages[0]).to.have.property('content');",
+              "});"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Proxy Download (Direct Stream)",
+      "request": {
+        "method": "GET",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media/{{latest_media_id}}",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media",
+            "{{latest_media_id}}"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('download successful', function () {",
+              "    pm.response.to.have.status(200);",
+              "});",
+              "",
+              "pm.test('content-type header is set', function () {",
+              "    var contentType = pm.response.headers.get('Content-Type');",
+              "    pm.expect(contentType).to.exist;",
+              "    // Could be direct binary stream or presigned URL response",
+              "    pm.expect(contentType).to.match(/(image\\/|application\\/json)/);",
+              "});",
+              "",
+              "// If PROXY_DOWNLOAD=false, we get JSON with presigned URL",
+              "if (pm.response.headers.get('Content-Type').includes('application/json')) {",
+              "    pm.test('presigned URL returned', function () {",
+              "        var data = pm.response.json();",
+              "        pm.expect(data).to.have.property('url');",
+              "        pm.expect(data.url).to.match(/https?:\\/\\//);",
+              "        pm.collectionVariables.set('latest_presigned_url', data.url);",
+              "    });",
+              "} else {",
+              "    pm.test('binary data received', function () {",
+              "        pm.expect(pm.response).to.have.property('stream');",
+              "    });",
+              "}"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Get Presigned URL (Dedicated Endpoint)",
+      "request": {
+        "method": "GET",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media/{{latest_media_id}}/presign",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media",
+            "{{latest_media_id}}",
+            "presign"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('presign successful', function () {",
+              "    pm.response.to.have.status(200);",
+              "});",
+              "",
+              "var data = pm.response.json();",
+              "",
+              "pm.test('response contains media id', function () {",
+              "    pm.expect(data).to.have.property('id');",
+              "    pm.expect(data.id).to.equal(pm.collectionVariables.get('latest_media_id'));",
+              "});",
+              "",
+              "pm.test('response contains presigned URL', function () {",
+              "    pm.expect(data).to.have.property('url');",
+              "    pm.expect(data.url).to.be.a('string');",
+              "    pm.expect(data.url).to.match(/https?:\\/\\//);",
+              "});",
+              "",
+              "pm.test('response contains expires_in', function () {",
+              "    pm.expect(data).to.have.property('expires_in');",
+              "    pm.expect(data.expires_in).to.be.a('number');",
+              "    pm.expect(data.expires_in).to.be.above(0);",
+              "});",
+              "",
+              "// Store the presigned URL for potential downstream use",
+              "if (data.url) {",
+              "    pm.collectionVariables.set('latest_presigned_url', data.url);",
+              "}"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Get Nonexistent Media (404 Test)",
+      "request": {
+        "method": "GET",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media/jan_nonexistent123",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media",
+            "jan_nonexistent123"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('returns 404 for nonexistent media', function () {",
+              "    pm.response.to.have.status(404);",
+              "});",
+              "",
+              "pm.test('error message provided', function () {",
+              "    var data = pm.response.json();",
+              "    pm.expect(data).to.have.property('error');",
+              "});"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Ingest Invalid Source Type (Error Test)",
+      "request": {
+        "method": "POST",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "Content-Type",
+            "value": "application/json"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "body": {
+          "mode": "raw",
+          "raw": "{\n  \"source\": {\n    \"type\": \"invalid_type\",\n    \"url\": \"https://example.com/image.png\"\n  },\n  \"user_id\": \"automation\"\n}"
+        },
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('returns 400 for invalid source type', function () {",
+              "    pm.response.to.have.status(400);",
+              "});",
+              "",
+              "pm.test('error message provided', function () {",
+              "    var data = pm.response.json();",
+              "    pm.expect(data).to.have.property('error');",
+              "    pm.expect(data.error).to.include('unknown source type');",
+              "});"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Ingest Without Auth Key (401 Test)",
+      "request": {
+        "method": "POST",
+        "header": [
+          {
+            "key": "Content-Type",
+            "value": "application/json"
+          }
+        ],
+        "body": {
+          "mode": "raw",
+          "raw": "{\n  \"source\": {\n    \"type\": \"remote_url\",\n    \"url\": \"https://www.jan.ai/_next/static/media/cute-robot-flying.1479447f.png\"\n  },\n  \"user_id\": \"automation\"\n}"
+        },
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('returns 401 without auth key', function () {",
+              "    pm.response.to.have.status(401);",
+              "});"
+            ]
+          }
+        }
+      ],
+      "response": []
+    },
+    {
+      "name": "Resolve Payload Without Placeholders",
+      "request": {
+        "method": "POST",
+        "header": [
+          {
+            "key": "Authorization",
+            "value": "Bearer {{media_access_token}}"
+          },
+          {
+            "key": "Content-Type",
+            "value": "application/json"
+          },
+          {
+            "key": "X-Media-Service-Key",
+            "value": "{{media_service_key}}"
+          }
+        ],
+        "body": {
+          "mode": "raw",
+          "raw": "{\n  \"payload\": {\n    \"messages\": [\n      {\n        \"role\": \"user\",\n        \"content\": \"Hello, this message has no media placeholders.\"\n      }\n    ]\n  }\n}"
+        },
+        "url": {
+          "raw": "{{kong_url}}/media/v1/media/resolve",
+          "host": [
+            "{{kong_url}}/media"
+          ],
+          "path": [
+            "v1",
+            "media",
+            "resolve"
+          ]
+        }
+      },
+      "event": [
+        {
+          "listen": "test",
+          "script": {
+            "type": "text/javascript",
+            "exec": [
+              "pm.test('resolve successful', function () {",
+              "    pm.response.to.have.status(200);",
+              "});",
+              "",
+              "var data = pm.response.json();",
+              "",
+              "pm.test('payload returned unchanged', function () {",
+              "    pm.expect(data.payload).to.deep.equal({",
+              "        messages: [{",
+              "            role: 'user',",
+              "            content: 'Hello, this message has no media placeholders.'",
+              "        }]",
+              "    });",
+              "});"
+            ]
+          }
+        }
+      ],
+      "response": []
+    }
+  ],
+  "variable": [
+    {
+      "key": "kong_url",
+      "value": "http://localhost:8000"
+    },
+    {
+      "key": "media_service_key",
+      "value": "changeme-media-key"
+    },
+    {
+      "key": "latest_media_id",
+      "value": ""
+    },
+    {
+      "key": "latest_media_mime",
+      "value": ""
+    },
+    {
+      "key": "latest_media_bytes",
+      "value": ""
+    },
+    {
+      "key": "dataurl_media_id",
+      "value": ""
+    },
+    {
+      "key": "dataurl_deduped_first",
+      "value": ""
+    },
+    {
+      "key": "latest_presigned_url",
+      "value": ""
+    },
+    {
+      "key": "presigned_upload_id",
+      "value": ""
+    },
+    {
+      "key": "presigned_upload_url",
+      "value": ""
+    },
+    {
+      "key": "media_access_token",
+      "value": ""
+    },
+    {
+      "key": "media_user_id",
+      "value": ""
+    }
+  ]
+}
diff --git a/tests/automation/memory-postman-scripts.json b/tests/automation/memory-postman-scripts.json
new file mode 100644
index 00000000..60cfb592
--- /dev/null
+++ b/tests/automation/memory-postman-scripts.json
@@ -0,0 +1,1916 @@
+{
+    "info": {
+        "_postman_id": "memory-tools-complete-test",
+        "name": "Memory Tools - Complete API Tests",
+        "description": "Comprehensive test suite for memory-tools service.\n\nTests embedding functionality via memory-tools API endpoints rather than direct BGE-M3 calls.\n\nCoverage:\n- Memory CRUD operations (user/project/episodic)\n- Vector search and semantic similarity\n- Batch embedding performance\n- Memory extraction from conversations\n- Importance scoring and ranking\n- End-to-end workflows\n\nNote: Advanced features (summarization, LLM planning, sparse embeddings) are tested indirectly as they're internal-only. See INTERNAL_FEATURES.md for details.",
+        "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
+    },
+    "variable": [
+        {
+            "key": "base_url",
+            "value": "http://localhost:8090",
+            "type": "string"
+        },
+        {
+            "key": "embedding_url",
+            "value": "http://localhost:8091",
+            "type": "string"
+        },
+        {
+            "key": "user_id",
+            "value": "user_test_001",
+            "type": "string"
+        },
+        {
+            "key": "project_id",
+            "value": "proj_test_001",
+            "type": "string"
+        },
+        {
+            "key": "conversation_id",
+            "value": "conv_test_001",
+            "type": "string"
+        },
+        {
+            "key": "user_memory_id",
+            "value": "",
+            "type": "string"
+        },
+        {
+            "key": "project_fact_id",
+            "value": "",
+            "type": "string"
+        },
+        {
+            "key": "llm_api_url",
+            "value": "http://localhost:8080",
+            "type": "string"
+        },
+        {
+            "key": "llm_api_token",
+            "value": "",
+            "type": "string"
+        },
+        {
+            "name": "LLM API - Memory integration (disabled by default)",
+            "disabled": false,
+            "event": [
+                {
+                    "listen": "test",
+                    "script": {
+                        "exec": [
+                            "pm.test(\"Status code is 200\", function () {",
+                            "    pm.response.to.have.status(200);",
+                            "});",
+                            "",
+                            "pm.test(\"Response has content\", function () {",
+                            "    pm.expect(pm.response.json()).to.have.property('choices');",
+                            "});"
+                        ],
+                        "type": "text/javascript"
+                    }
+                }
+            ],
+            "request": {
+                "auth": {
+                    "type": "bearer",
+                    "bearer": [
+                        {
+                            "key": "token",
+                            "value": "{{llm_api_token}}",
+                            "type": "string"
+                        }
+                    ]
+                },
+                "method": "POST",
+                "header": [
+                    {
+                        "key": "Content-Type",
+                        "value": "application/json"
+                    }
+                ],
+                "body": {
+                    "mode": "raw",
+                    "raw": "{\n  \"model\": \"gpt-4o-mini\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"What do you know about my preferences?\"}\n  ],\n  \"conversation\": {\n    \"id\": \"{{conversation_id}}\"\n  }\n}"
+                },
+                "url": {
+                    "raw": "{{llm_api_url}}/v1/chat/completions",
+                    "host": [
+                        "{{llm_api_url}}"
+                    ],
+                    "path": [
+                        "v1",
+                        "chat",
+                        "completions"
+                    ]
+                },
+                "description": "Optional: exercise llm-api memory orchestration. Enable this request and provide `llm_api_token` when llm-api is running with memory-tools available."
+            }
+        }
+    ],
+    "item": [
+        {
+            "name": "1. Health Checks",
+            "item": [
+                {
+                    "name": "Memory Tools Health",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Response has correct structure\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData).to.have.property('status');",
+                                    "    pm.expect(jsonData).to.have.property('service');",
+                                    "    pm.expect(jsonData.status).to.eql('healthy');",
+                                    "    pm.expect(jsonData.service).to.eql('memory-tools');",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "GET",
+                        "header": [],
+                        "url": {
+                            "raw": "{{base_url}}/healthz",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "healthz"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Test Embedding via Memory Tools",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Embedding service is working via memory-tools\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.dimension).to.eql(1024);",
+                                    "    pm.expect(jsonData.status).to.eql('ok');",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": ""
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/embed/test",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "embed",
+                                "test"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "2. Embedding Tests",
+            "item": [
+                {
+                    "name": "Test Single Embedding",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Embedding dimension is 1024\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData).to.have.property('dimension');",
+                                    "    pm.expect(jsonData.dimension).to.eql(1024);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": ""
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/embed/test",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "embed",
+                                "test"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "3. User Memory - Upsert",
+            "item": [
+                {
+                    "name": "Upsert User Preference",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Response has success status\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.status).to.eql('success');",
+                                    "    pm.expect(jsonData).to.have.property('ids');",
+                                    "    pm.expect(jsonData.ids).to.be.an('array');",
+                                    "    pm.expect(jsonData.ids.length).to.be.greaterThan(0);",
+                                    "    ",
+                                    "    // Save ID for later tests",
+                                    "    pm.collectionVariables.set('user_memory_id', jsonData.ids[0]);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"items\": [\n    {\n      \"scope\": \"preference\",\n      \"key\": \"language_preference\",\n      \"text\": \"I prefer Python for backend development and TypeScript for frontend\",\n      \"importance\": \"high\"\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/user/upsert",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "user",
+                                "upsert"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Upsert User Profile",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Multiple items upserted\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.ids).to.be.an('array');",
+                                    "    pm.expect(jsonData.ids.length).to.eql(2);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"items\": [\n    {\n      \"scope\": \"profile\",\n      \"key\": \"role\",\n      \"text\": \"I am a senior software engineer with 8 years of experience\",\n      \"importance\": \"medium\"\n    },\n    {\n      \"scope\": \"skill\",\n      \"key\": \"expertise\",\n      \"text\": \"Expert in distributed systems, microservices, and cloud architecture\",\n      \"importance\": \"high\"\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/user/upsert",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "user",
+                                "upsert"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "4. Project Memory - Upsert",
+            "item": [
+                {
+                    "name": "Upsert Project Decision",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Project fact created\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.status).to.eql('success');",
+                                    "    pm.expect(jsonData.ids).to.be.an('array');",
+                                    "    ",
+                                    "    // Save ID for later tests",
+                                    "    pm.collectionVariables.set('project_fact_id', jsonData.ids[0]);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"project_id\": \"{{project_id}}\",\n  \"facts\": [\n    {\n      \"kind\": \"decision\",\n      \"title\": \"Database Technology\",\n      \"text\": \"We decided to use PostgreSQL with pgvector extension for vector similarity search\",\n      \"confidence\": 0.95\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/project/upsert",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "project",
+                                "upsert"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Upsert Multiple Project Facts",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Multiple facts created\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.ids.length).to.eql(3);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"project_id\": \"{{project_id}}\",\n  \"facts\": [\n    {\n      \"kind\": \"assumption\",\n      \"title\": \"Performance Target\",\n      \"text\": \"The system should handle 1000 requests per second\",\n      \"confidence\": 0.8\n    },\n    {\n      \"kind\": \"risk\",\n      \"title\": \"Scalability Concern\",\n      \"text\": \"Vector search performance may degrade with more than 1M items\",\n      \"confidence\": 0.7\n    },\n    {\n      \"kind\": \"fact\",\n      \"title\": \"Embedding Model\",\n      \"text\": \"Using BGE-M3 model for 1024-dimensional embeddings\",\n      \"confidence\": 1.0\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/project/upsert",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "project",
+                                "upsert"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "5. Memory Load (Search)",
+            "item": [
+                {
+                    "name": "Load User Memories",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Core memory returned\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData).to.have.property('core_memory');",
+                                    "    pm.expect(jsonData.core_memory).to.be.an('array');",
+                                    "    pm.expect(jsonData.core_memory.length).to.be.greaterThan(0);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Memory items have similarity scores\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    if (jsonData.core_memory.length > 0) {",
+                                    "        pm.expect(jsonData.core_memory[0]).to.have.property('similarity');",
+                                    "        pm.expect(jsonData.core_memory[0].similarity).to.be.a('number');",
+                                    "        pm.expect(jsonData.core_memory[0].similarity).to.be.within(0, 1);",
+                                    "    }",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"query\": \"What programming languages do I prefer?\",\n  \"options\": {\n    \"max_user_items\": 10,\n    \"min_similarity\": 0.3\n  }\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/load",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "load"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Load Project Memories",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Semantic memory returned\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData).to.have.property('semantic_memory');",
+                                    "    pm.expect(jsonData.semantic_memory).to.be.an('array');",
+                                    "    pm.expect(jsonData.semantic_memory.length).to.be.greaterThan(0);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Project facts have confidence scores\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    if (jsonData.semantic_memory.length > 0) {",
+                                    "        pm.expect(jsonData.semantic_memory[0]).to.have.property('confidence');",
+                                    "        pm.expect(jsonData.semantic_memory[0].confidence).to.be.within(0, 1);",
+                                    "    }",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"project_id\": \"{{project_id}}\",\n  \"query\": \"What database are we using?\",\n  \"options\": {\n    \"max_project_items\": 10,\n    \"min_similarity\": 0.3\n  }\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/load",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "load"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Load All Memory Types",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"All memory types present\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData).to.have.property('core_memory');",
+                                    "    pm.expect(jsonData).to.have.property('semantic_memory');",
+                                    "    pm.expect(jsonData).to.have.property('episodic_memory');",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Vector search quality check\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    // Check if results are ranked by similarity",
+                                    "    if (jsonData.core_memory.length > 1) {",
+                                    "        for (let i = 0; i < jsonData.core_memory.length - 1; i++) {",
+                                    "            pm.expect(jsonData.core_memory[i].similarity).to.be.at.least(jsonData.core_memory[i + 1].similarity);",
+                                    "        }",
+                                    "    }",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"project_id\": \"{{project_id}}\",\n  \"conversation_id\": \"{{conversation_id}}\",\n  \"query\": \"Tell me about the project architecture and my skills\",\n  \"options\": {\n    \"max_user_items\": 20,\n    \"max_project_items\": 20,\n    \"max_episodic_items\": 20,\n    \"min_similarity\": 0.3\n  }\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/load",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "load"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "6. Memory Observe",
+            "item": [
+                {
+                    "name": "Observe Simple Conversation",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Observation successful\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.status).to.eql('success');",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"project_id\": \"{{project_id}}\",\n  \"conversation_id\": \"{{conversation_id}}\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"I prefer using Docker for containerization\",\n      \"created_at\": \"2025-11-20T10:00:00Z\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"Noted! I'll remember that you prefer Docker for containerization.\",\n      \"created_at\": \"2025-11-20T10:00:05Z\"\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/observe",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "observe"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Observe Multi-Turn Conversation",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Multi-turn observation successful\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.status).to.eql('success');",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"project_id\": \"{{project_id}}\",\n  \"conversation_id\": \"{{conversation_id}}\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"We should use Redis for caching\",\n      \"created_at\": \"2025-11-20T10:05:00Z\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"That's a good decision. Redis is excellent for caching. Should I remember this as a project decision?\",\n      \"created_at\": \"2025-11-20T10:05:05Z\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"Yes, remember that we decided to use Redis for caching with a TTL of 1 hour\",\n      \"created_at\": \"2025-11-20T10:05:10Z\"\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/observe",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "observe"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "7. Memory Stats",
+            "item": [
+                {
+                    "name": "Get User Memory Stats",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Stats include all counts\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData).to.have.property('user_memory_count');",
+                                    "    pm.expect(jsonData).to.have.property('episodic_events_count');",
+                                    "    pm.expect(jsonData.user_memory_count).to.be.a('number');",
+                                    "    pm.expect(jsonData.user_memory_count).to.be.greaterThan(0);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "GET",
+                        "header": [],
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/stats?user_id={{user_id}}",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "stats"
+                            ],
+                            "query": [
+                                {
+                                    "key": "user_id",
+                                    "value": "{{user_id}}"
+                                }
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Get Project Memory Stats",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Project stats available\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData).to.have.property('project_facts_count');",
+                                    "    pm.expect(jsonData.project_facts_count).to.be.greaterThan(0);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "GET",
+                        "header": [],
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/stats?user_id={{user_id}}&project_id={{project_id}}",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "stats"
+                            ],
+                            "query": [
+                                {
+                                    "key": "user_id",
+                                    "value": "{{user_id}}"
+                                },
+                                {
+                                    "key": "project_id",
+                                    "value": "{{project_id}}"
+                                }
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "8. Memory Export",
+            "item": [
+                {
+                    "name": "Export User Memory",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Export contains user data\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData).to.have.property('user_memory');",
+                                    "    pm.expect(jsonData).to.have.property('episodic_events');",
+                                    "    pm.expect(jsonData.user_memory).to.be.an('array');",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Export data is complete\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    if (jsonData.user_memory.length > 0) {",
+                                    "        pm.expect(jsonData.user_memory[0]).to.have.property('id');",
+                                    "        pm.expect(jsonData.user_memory[0]).to.have.property('text');",
+                                    "        pm.expect(jsonData.user_memory[0]).to.have.property('created_at');",
+                                    "    }",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "GET",
+                        "header": [],
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/export?user_id={{user_id}}",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "export"
+                            ],
+                            "query": [
+                                {
+                                    "key": "user_id",
+                                    "value": "{{user_id}}"
+                                }
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "9. Memory Delete",
+            "item": [
+                {
+                    "name": "Delete Specific Memory",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Delete successful\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.status).to.eql('success');",
+                                    "    pm.expect(jsonData).to.have.property('deleted_count');",
+                                    "    pm.expect(jsonData.deleted_count).to.be.greaterThan(0);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"ids\": [\"{{user_memory_id}}\"]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/delete",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "delete"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "10. Batch Embedding & Performance",
+            "item": [
+                {
+                    "name": "Test Batch User Memory Upsert with Embeddings",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Batch embedding successful\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.status).to.eql('success');",
+                                    "    pm.expect(jsonData.ids).to.be.an('array');",
+                                    "    pm.expect(jsonData.ids.length).to.eql(5);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Batch processing completed quickly\", function () {",
+                                    "    pm.expect(pm.response.responseTime).to.be.below(5000);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"items\": [\n    {\n      \"scope\": \"preference\",\n      \"key\": \"lang_python\",\n      \"text\": \"Python is great for data science\",\n      \"importance\": \"high\"\n    },\n    {\n      \"scope\": \"preference\",\n      \"key\": \"lang_javascript\",\n      \"text\": \"JavaScript powers modern web development\",\n      \"importance\": \"high\"\n    },\n    {\n      \"scope\": \"preference\",\n      \"key\": \"lang_rust\",\n      \"text\": \"Rust offers memory safety without garbage collection\",\n      \"importance\": \"high\"\n    },\n    {\n      \"scope\": \"preference\",\n      \"key\": \"lang_go\",\n      \"text\": \"Go is excellent for concurrent programming\",\n      \"importance\": \"high\"\n    },\n    {\n      \"scope\": \"preference\",\n      \"key\": \"lang_typescript\",\n      \"text\": \"TypeScript adds type safety to JavaScript\",\n      \"importance\": \"high\"\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/user/upsert",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "user",
+                                "upsert"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Test Embedding Quality via Vector Search",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Vector search returns results\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.core_memory).to.be.an('array');",
+                                    "    pm.expect(jsonData.core_memory.length).to.be.greaterThan(0);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Embeddings enable semantic search\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    // Query about 'scripting' should find JavaScript/Python",
+                                    "    const hasRelevant = jsonData.core_memory.some(m => ",
+                                    "        m.text.includes('JavaScript') || m.text.includes('Python')",
+                                    "    );",
+                                    "    pm.expect(hasRelevant).to.be.true;",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"query\": \"What are good languages for scripting and web development?\",\n  \"options\": {\n    \"max_user_items\": 10,\n    \"min_similarity\": 0.3\n  }\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/load",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "load"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Test Embedding Dimension Consistency",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Embedding service is consistent\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.dimension).to.eql(1024);",
+                                    "    pm.expect(jsonData.status).to.eql('ok');",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": ""
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/embed/test",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "embed",
+                                "test"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "11. Memory Building & Extraction",
+            "item": [
+                {
+                    "name": "Extract User Preferences from Conversation",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "pm.test(\"Memory extraction successful\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.status).to.eql('success');",
+                                    "});",
+                                    "",
+                                    "// Verify memory was extracted and stored",
+                                    "setTimeout(() => {",
+                                    "    pm.sendRequest({",
+                                    "        url: pm.collectionVariables.get('base_url') + '/v1/memory/load',",
+                                    "        method: 'POST',",
+                                    "        header: {'Content-Type': 'application/json'},",
+                                    "        body: {",
+                                    "            mode: 'raw',",
+                                    "            raw: JSON.stringify({",
+                                    "                user_id: pm.collectionVariables.get('user_id'),",
+                                    "                query: 'programming preferences',",
+                                    "                options: { max_user_items: 10 }",
+                                    "            })",
+                                    "        }",
+                                    "    }, (err, res) => {",
+                                    "        pm.test('Memory was extracted from conversation', () => {",
+                                    "            pm.expect(res.json().core_memory.length).to.be.greaterThan(0);",
+                                    "        });",
+                                    "    });",
+                                    "}, 500);"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"project_id\": \"{{project_id}}\",\n  \"conversation_id\": \"conv_extraction_001\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"I prefer using Go for backend microservices because of its excellent concurrency support\",\n      \"created_at\": \"2025-11-21T10:00:00Z\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"That's a great choice! Go's goroutines make concurrent programming much easier. I'll remember your preference for Go in backend development.\",\n      \"created_at\": \"2025-11-21T10:00:05Z\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"Yes, and I like using React with TypeScript for frontend work\",\n      \"created_at\": \"2025-11-21T10:00:10Z\"\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/observe",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "observe"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Extract Project Decisions Automatically",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "",
+                                    "// Verify project decision was extracted",
+                                    "setTimeout(() => {",
+                                    "    pm.sendRequest({",
+                                    "        url: pm.collectionVariables.get('base_url') + '/v1/memory/load',",
+                                    "        method: 'POST',",
+                                    "        header: {'Content-Type': 'application/json'},",
+                                    "        body: {",
+                                    "            mode: 'raw',",
+                                    "            raw: JSON.stringify({",
+                                    "                user_id: pm.collectionVariables.get('user_id'),",
+                                    "                project_id: pm.collectionVariables.get('project_id'),",
+                                    "                query: 'database decisions',",
+                                    "                options: { max_project_items: 10 }",
+                                    "            })",
+                                    "        }",
+                                    "    }, (err, res) => {",
+                                    "        pm.test('Project decision was extracted', () => {",
+                                    "            const semanticMemory = res.json().semantic_memory;",
+                                    "            pm.expect(semanticMemory).to.be.an('array');",
+                                    "        });",
+                                    "    });",
+                                    "}, 500);"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"project_id\": \"{{project_id}}\",\n  \"conversation_id\": \"conv_decision_001\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"We should use MongoDB for the document storage layer\",\n      \"created_at\": \"2025-11-21T11:00:00Z\"\n    },\n    {\n      \"role\": \"assistant\",\n      \"content\": \"MongoDB is a good choice for document storage. Should I remember this as a project decision?\",\n      \"created_at\": \"2025-11-21T11:00:05Z\"\n    },\n    {\n      \"role\": \"user\",\n      \"content\": \"Yes, let's use MongoDB with replica sets for high availability\",\n      \"created_at\": \"2025-11-21T11:00:10Z\"\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/observe",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "observe"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Test Memory Importance Scoring",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"All items stored successfully\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.ids.length).to.eql(4);",
+                                    "});",
+                                    "",
+                                    "// Verify importance scoring in search results",
+                                    "setTimeout(() => {",
+                                    "    pm.sendRequest({",
+                                    "        url: pm.collectionVariables.get('base_url') + '/v1/memory/load',",
+                                    "        method: 'POST',",
+                                    "        header: {'Content-Type': 'application/json'},",
+                                    "        body: {",
+                                    "            mode: 'raw',",
+                                    "            raw: JSON.stringify({",
+                                    "                user_id: pm.collectionVariables.get('user_id'),",
+                                    "                query: 'security requirements',",
+                                    "                options: { max_user_items: 10 }",
+                                    "            })",
+                                    "        }",
+                                    "    }, (err, res) => {",
+                                    "        pm.test('Critical items ranked higher', () => {",
+                                    "            const memories = res.json().core_memory;",
+                                    "            const criticalItem = memories.find(m => m.text.includes('API keys'));",
+                                    "            pm.expect(criticalItem).to.exist;",
+                                    "            pm.expect(criticalItem.score).to.eql(5); // Critical importance",
+                                    "        });",
+                                    "    });",
+                                    "}, 500);"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"items\": [\n    {\n      \"scope\": \"preference\",\n      \"key\": \"security_critical\",\n      \"text\": \"Never store API keys in version control, always use environment variables\",\n      \"importance\": \"critical\"\n    },\n    {\n      \"scope\": \"preference\",\n      \"key\": \"code_style_high\",\n      \"text\": \"I prefer using ESLint with strict rules for code consistency\",\n      \"importance\": \"high\"\n    },\n    {\n      \"scope\": \"context\",\n      \"key\": \"temp_note\",\n      \"text\": \"Maybe consider adding unit tests later\",\n      \"importance\": \"low\"\n    },\n    {\n      \"scope\": \"preference\",\n      \"key\": \"testing_medium\",\n      \"text\": \"I recommend writing integration tests for critical paths\",\n      \"importance\": \"medium\"\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/user/upsert",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "user",
+                                "upsert"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Test Memory Deduplication",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Duplicate memory handled correctly\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "    var jsonData = pm.response.json();",
+                                    "    // Upsert should update existing, not create duplicate",
+                                    "    pm.expect(jsonData.status).to.eql('success');",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"items\": [\n    {\n      \"scope\": \"preference\",\n      \"key\": \"language_preference\",\n      \"text\": \"I prefer Python for backend development and TypeScript for frontend (updated)\",\n      \"importance\": \"high\"\n    }\n  ]\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/user/upsert",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "user",
+                                "upsert"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "12. Cross-lingual & Semantic Search",
+            "item": [
+                {
+                    "name": "Test Semantic Similarity (Synonyms)",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Semantic search finds related concepts\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "    var jsonData = pm.response.json();",
+                                    "    ",
+                                    "    // Query is about 'bug' but should find 'error' and 'issue' memories",
+                                    "    pm.expect(jsonData.core_memory).to.be.an('array');",
+                                    "    ",
+                                    "    // Check if similar concepts are retrieved",
+                                    "    if (jsonData.core_memory.length > 0) {",
+                                    "        pm.expect(jsonData.core_memory[0].similarity).to.be.above(0.3);",
+                                    "    }",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"query\": \"How do I report a bug?\",\n  \"options\": {\n    \"max_user_items\": 10,\n    \"min_similarity\": 0.3\n  }\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/load",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "load"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "Test Semantic Search with Different Phrasing",
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"Different phrasing retrieves same memories\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "    var jsonData = pm.response.json();",
+                                    "    ",
+                                    "    // Should find preferences about programming languages",
+                                    "    pm.expect(jsonData.core_memory).to.be.an('array');",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"query\": \"Which coding language should I choose?\",\n  \"options\": {\n    \"max_user_items\": 10,\n    \"min_similarity\": 0.3\n  }\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/load",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "load"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "13. End-to-End Workflow",
+            "item": [
+                {
+                    "name": "E2E: Store \u00e2\u2020\u2019 Search \u00e2\u2020\u2019 Verify",
+                    "event": [
+                        {
+                            "listen": "prerequest",
+                            "script": {
+                                "exec": [
+                                    "// Generate unique IDs for this test",
+                                    "pm.collectionVariables.set('e2e_user_id', 'e2e_user_' + Date.now());",
+                                    "pm.collectionVariables.set('e2e_project_id', 'e2e_proj_' + Date.now());"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        },
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"E2E workflow successful\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "    var jsonData = pm.response.json();",
+                                    "    pm.expect(jsonData.core_memory.length).to.be.greaterThan(0);",
+                                    "    ",
+                                    "    // Verify the stored memory is retrieved",
+                                    "    const found = jsonData.core_memory.some(item => ",
+                                    "        item.text.includes('Rust for systems programming')",
+                                    "    );",
+                                    "    pm.expect(found).to.be.true;",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{e2e_user_id}}\",\n  \"query\": \"What programming language do I prefer for systems programming?\",\n  \"options\": {\n    \"max_user_items\": 10\n  }\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/load",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "load"
+                            ]
+                        }
+                    }
+                },
+                {
+                    "name": "E2E: Conversation \u00e2\u2020\u2019 Extraction \u00e2\u2020\u2019 Retrieval",
+                    "event": [
+                        {
+                            "listen": "prerequest",
+                            "script": {
+                                "exec": [
+                                    "// Step 1: Store a conversation",
+                                    "const userId = 'e2e_conv_' + Date.now();",
+                                    "const projectId = 'e2e_proj_conv_' + Date.now();",
+                                    "pm.collectionVariables.set('e2e_conv_user', userId);",
+                                    "pm.collectionVariables.set('e2e_conv_project', projectId);",
+                                    "",
+                                    "pm.sendRequest({",
+                                    "    url: pm.collectionVariables.get('base_url') + '/v1/memory/observe',",
+                                    "    method: 'POST',",
+                                    "    header: {'Content-Type': 'application/json'},",
+                                    "    body: {",
+                                    "        mode: 'raw',",
+                                    "        raw: JSON.stringify({",
+                                    "            user_id: userId,",
+                                    "            project_id: projectId,",
+                                    "            conversation_id: 'e2e_test_conv',",
+                                    "            messages: [",
+                                    "                {",
+                                    "                    role: 'user',",
+                                    "                    content: 'I prefer using Kubernetes for container orchestration',",
+                                    "                    created_at: new Date().toISOString()",
+                                    "                }",
+                                    "            ]",
+                                    "        })",
+                                    "    }",
+                                    "}, (err, res) => {",
+                                    "    console.log('Conversation stored:', res.code);",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        },
+                        {
+                            "listen": "test",
+                            "script": {
+                                "exec": [
+                                    "pm.test(\"E2E conversation extraction works\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "    var jsonData = pm.response.json();",
+                                    "    ",
+                                    "    // Verify extracted memory is retrieved",
+                                    "    const hasMemory = jsonData.core_memory.length > 0 || jsonData.episodic_memory.length > 0;",
+                                    "    pm.expect(hasMemory).to.be.true;",
+                                    "});"
+                                ],
+                                "type": "text/javascript"
+                            }
+                        }
+                    ],
+                    "request": {
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"user_id\": \"{{e2e_conv_user}}\",\n  \"project_id\": \"{{e2e_conv_project}}\",\n  \"query\": \"container orchestration preferences\",\n  \"options\": {\n    \"max_user_items\": 10,\n    \"max_episodic_items\": 10\n  }\n}"
+                        },
+                        "url": {
+                            "raw": "{{base_url}}/v1/memory/load",
+                            "host": [
+                                "{{base_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "memory",
+                                "load"
+                            ]
+                        }
+                    }
+                }
+            ]
+        },
+        {
+            "name": "LLM API - Memory integration (disabled by default)",
+            "item": [
+                {
+                    "name": "Scenario 1: Standard Response (Memory Disabled)",
+                    "disabled": true,
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "type": "text/javascript",
+                                "exec": [
+                                    "const token = pm.collectionVariables.get('llm_api_token') || pm.environment.get('llm_api_token');",
+                                    "if (!token) {",
+                                    "    console.log('Skipping LLM API scenario (no token)');",
+                                    "    pm.test('Skipped: llm_api_token not set', function () { pm.expect(true).to.be.true; });",
+                                    "    return;",
+                                    "}",
+                                    "",
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "pm.test(\"No memory sources returned\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    if (jsonData.choices && jsonData.choices.length > 0) {",
+                                    "        const memSources = jsonData.choices[0].message.memory_sources;",
+                                    "        pm.expect(memSources === undefined || memSources.length === 0).to.be.true;",
+                                    "    }",
+                                    "});"
+                                ]
+                            }
+                        }
+                    ],
+                    "request": {
+                        "auth": {
+                            "type": "bearer",
+                            "bearer": [
+                                {
+                                    "key": "token",
+                                    "value": "{{llm_api_token}}",
+                                    "type": "string"
+                                }
+                            ]
+                        },
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"model\": \"gpt-4o-mini\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"Check if K8s deployment is healthy\"}\n  ],\n  \"conversation\": {\"id\": \"{{conversation_id}}\"},\n  \"augment_with_memory\": false\n}"
+                        },
+                        "url": {
+                            "raw": "{{llm_api_url}}/v1/chat/completions",
+                            "host": [
+                                "{{llm_api_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "chat",
+                                "completions"
+                            ]
+                        },
+                        "description": "Baseline behavior with memory disabled."
+                    }
+                },
+                {
+                    "name": "Scenario 2: Tool-Augmented Response with Memory",
+                    "disabled": true,
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "type": "text/javascript",
+                                "exec": [
+                                    "const token = pm.collectionVariables.get('llm_api_token') || pm.environment.get('llm_api_token');",
+                                    "if (!token) {",
+                                    "    console.log('Skipping LLM API scenario (no token)');",
+                                    "    pm.test('Skipped: llm_api_token not set', function () { pm.expect(true).to.be.true; });",
+                                    "    return;",
+                                    "}",
+                                    "",
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "pm.test(\"Memory sources included when requested\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    if (jsonData.choices && jsonData.choices.length > 0) {",
+                                    "        pm.expect(jsonData.choices[0]).to.have.property('message');",
+                                    "    }",
+                                    "});"
+                                ]
+                            }
+                        }
+                    ],
+                    "request": {
+                        "auth": {
+                            "type": "bearer",
+                            "bearer": [
+                                {
+                                    "key": "token",
+                                    "value": "{{llm_api_token}}",
+                                    "type": "string"
+                                }
+                            ]
+                        },
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"model\": \"gpt-4o-mini\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"Check if K8s deployment is healthy\"}\n  ],\n  \"conversation\": {\"id\": \"{{conversation_id}}\"},\n  \"augment_with_memory\": true,\n  \"project_id\": \"proj_devops\"\n}"
+                        },
+                        "url": {
+                            "raw": "{{llm_api_url}}/v1/chat/completions",
+                            "host": [
+                                "{{llm_api_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "chat",
+                                "completions"
+                            ]
+                        },
+                        "description": "Memory-enabled request; expects augmented prompt and tool path."
+                    }
+                },
+                {
+                    "name": "Scenario 3: Project Memory Vector Search",
+                    "disabled": true,
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "type": "text/javascript",
+                                "exec": [
+                                    "const token = pm.collectionVariables.get('llm_api_token') || pm.environment.get('llm_api_token');",
+                                    "if (!token) {",
+                                    "    console.log('Skipping LLM API scenario (no token)');",
+                                    "    pm.test('Skipped: llm_api_token not set', function () { pm.expect(true).to.be.true; });",
+                                    "    return;",
+                                    "}",
+                                    "",
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});",
+                                    "pm.test(\"Memory sources present when requested\", function () {",
+                                    "    var jsonData = pm.response.json();",
+                                    "    if (jsonData.choices && jsonData.choices.length > 0) {",
+                                    "        var msg = jsonData.choices[0].message || {};",
+                                    "        pm.expect(msg).to.have.property('memory_sources');",
+                                    "    }",
+                                    "});"
+                                ]
+                            }
+                        }
+                    ],
+                    "request": {
+                        "auth": {
+                            "type": "bearer",
+                            "bearer": [
+                                {
+                                    "key": "token",
+                                    "value": "{{llm_api_token}}",
+                                    "type": "string"
+                                }
+                            ]
+                        },
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"model\": \"gpt-4o-mini\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"What's our database strategy?\"}\n  ],\n  \"conversation\": {\"id\": \"{{conversation_id}}\"},\n  \"augment_with_memory\": true,\n  \"project_id\": \"proj_backend\",\n  \"include_memory_sources\": true\n}"
+                        },
+                        "url": {
+                            "raw": "{{llm_api_url}}/v1/chat/completions",
+                            "host": [
+                                "{{llm_api_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "chat",
+                                "completions"
+                            ]
+                        },
+                        "description": "Exercises project fact retrieval with memory_sources in response."
+                    }
+                },
+                {
+                    "name": "Scenario 4: Graceful Degradation (Memory Down)",
+                    "disabled": true,
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "type": "text/javascript",
+                                "exec": [
+                                    "const token = pm.collectionVariables.get('llm_api_token') || pm.environment.get('llm_api_token');",
+                                    "if (!token) {",
+                                    "    console.log('Skipping LLM API scenario (no token)');",
+                                    "    pm.test('Skipped: llm_api_token not set', function () { pm.expect(true).to.be.true; });",
+                                    "    return;",
+                                    "}",
+                                    "",
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});"
+                                ]
+                            }
+                        }
+                    ],
+                    "request": {
+                        "auth": {
+                            "type": "bearer",
+                            "bearer": [
+                                {
+                                    "key": "token",
+                                    "value": "{{llm_api_token}}",
+                                    "type": "string"
+                                }
+                            ]
+                        },
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"model\": \"gpt-4o-mini\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"What's our tech stack?\"}\n  ],\n  \"conversation\": {\"id\": \"{{conversation_id}}\"},\n  \"augment_with_memory\": true,\n  \"project_id\": \"proj_backend\"\n}"
+                        },
+                        "url": {
+                            "raw": "{{llm_api_url}}/v1/chat/completions",
+                            "host": [
+                                "{{llm_api_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "chat",
+                                "completions"
+                            ]
+                        },
+                        "description": "Run with memory-tools stopped to confirm graceful fallback (status 200 without memory)."
+                    }
+                },
+                {
+                    "name": "Scenario 5: Auto-Promotion to Project Memory (manual verification)",
+                    "disabled": true,
+                    "event": [
+                        {
+                            "listen": "test",
+                            "script": {
+                                "type": "text/javascript",
+                                "exec": [
+                                    "const token = pm.collectionVariables.get('llm_api_token') || pm.environment.get('llm_api_token');",
+                                    "if (!token) {",
+                                    "    console.log('Skipping LLM API scenario (no token)');",
+                                    "    pm.test('Skipped: llm_api_token not set', function () { pm.expect(true).to.be.true; });",
+                                    "    return;",
+                                    "}",
+                                    "",
+                                    "pm.test(\"Status code is 200\", function () {",
+                                    "    pm.response.to.have.status(200);",
+                                    "});"
+                                ]
+                            }
+                        }
+                    ],
+                    "request": {
+                        "auth": {
+                            "type": "bearer",
+                            "bearer": [
+                                {
+                                    "key": "token",
+                                    "value": "{{llm_api_token}}",
+                                    "type": "string"
+                                }
+                            ]
+                        },
+                        "method": "POST",
+                        "header": [
+                            {
+                                "key": "Content-Type",
+                                "value": "application/json"
+                            }
+                        ],
+                        "body": {
+                            "mode": "raw",
+                            "raw": "{\n  \"model\": \"gpt-4o-mini\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"We've decided to use Docker Compose for local dev\"}\n  ],\n  \"conversation\": {\"id\": \"{{conversation_id}}\"},\n  \"augment_with_memory\": true,\n  \"project_id\": \"proj_infra\"\n}"
+                        },
+                        "url": {
+                            "raw": "{{llm_api_url}}/v1/chat/completions",
+                            "host": [
+                                "{{llm_api_url}}"
+                            ],
+                            "path": [
+                                "v1",
+                                "chat",
+                                "completions"
+                            ]
+                        },
+                        "description": "Kick off memory observation; verify later via /v1/memory/load for project facts."
+                    }
+                }
+            ]
+        }
+    ],
+    "event": [
+        {
+            "listen": "prerequest",
+            "script": {
+                "type": "text/javascript",
+                "exec": [
+                    "// Global pre-request script",
+                    "console.log('Running test against:', pm.collectionVariables.get('base_url'));"
+                ]
+            }
+        },
+        {
+            "listen": "test",
+            "script": {
+                "type": "text/javascript",
+                "exec": [
+                    "// Global test script",
+                    "pm.test(\"Response time is acceptable\", function () {",
+                    "    pm.expect(pm.response.responseTime).to.be.below(5000);",
+                    "});"
+                ]
+            }
+        }
+    ]
+}
\ No newline at end of file
diff --git a/tests/automation/responses-postman-scripts.json b/tests/automation/responses-postman-scripts.json
new file mode 100644
index 00000000..ace36c4c
--- /dev/null
+++ b/tests/automation/responses-postman-scripts.json
@@ -0,0 +1,1734 @@
+{
+  "info": {
+    "name": "Response API - Complete Test Suite",
+    "description": "Comprehensive test suite for Response API with MCP tool orchestration, background mode, and webhook notifications",
+    "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
+  },
+  "variable": [
+    {
+      "key": "kong_url",
+      "value": "http://localhost:8000",
+      "description": "Kong Gateway base URL"
+    },
+    {
+      "key": "guest_access_token",
+      "value": ""
+    },
+    {
+      "key": "response_id",
+      "value": ""
+    },
+    {
+      "key": "conversation_id",
+      "value": ""
+    },
+    {
+      "key": "default_model_id",
+      "value": ""
+    },
+    {
+      "key": "background_response_id",
+      "value": ""
+    },
+    {
+      "key": "long_task_response_id",
+      "value": ""
+    },
+    {
+      "key": "webhook_url",
+      "value": "http://host.docker.internal:9999/test",
+      "description": "Replace with your webhook endpoint"
+    }
+  ],
+  "item": [
+    {
+      "name": "1. Authentication",
+      "item": [
+        {
+          "name": "Request Guest Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{}"
+            },
+            "url": "{{kong_url}}/auth/guest-login"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const guestData = pm.response.json();",
+                  "pm.test('Guest token issued', function () {",
+                  "  pm.response.to.have.status(201);",
+                  "  pm.expect(guestData).to.have.property('access_token');",
+                  "  pm.collectionVariables.set('guest_access_token', guestData.access_token);",
+                  "});",
+                  "pm.test('Response includes expiry', function () {",
+                  "  pm.expect(guestData).to.have.property('expires_in');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "2. Model Catalogue",
+      "item": [
+        {
+          "name": "List Available Models",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Fetch models from LLM API and store the first id as default."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const payload = pm.response.json();",
+                  "pm.test('models request succeeded', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(payload.data).to.be.an('array').that.is.not.empty;",
+                  "});",
+                  "const firstModel = payload.data[0];",
+                  "pm.test('first model exposes id', function () {",
+                  "  pm.expect(firstModel).to.have.property('id');",
+                  "});",
+                  "pm.collectionVariables.set('default_model_id', firstModel.id);"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "2. Health & Service Checks",
+      "item": [
+      {
+        "name": "Response API Health",
+        "request": {
+          "method": "GET",
+          "header": [
+            {
+              "key": "Authorization",
+              "value": "Bearer {{guest_access_token}}"
+            }
+          ],
+          "url": "{{kong_url}}/responses/healthz"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Health check passed', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "MCP Tools Available",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/list\",\n  \"params\": {},\n  \"id\": 1\n}"
+            },
+            "url": "{{mcp_tools_url}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('MCP tools listed', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  const body = pm.response.text();",
+                  "  pm.expect(body).to.include('google_search');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "LLM API Chat Completion Smoke",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"messages\": [\n    {\"role\": \"user\", \"content\": \"Say hello from the health check.\"}\n  ],\n  \"max_tokens\": 20,\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/v1/chat/completions"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('LLM API responded', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  const data = pm.response.json();",
+                  "  pm.expect(data).to.have.property('choices');",
+                  "  pm.expect(data.choices[0]).to.have.property('message');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "3. Basic Responses (No Tools)",
+      "item": [
+        {
+          "name": "Create Simple Text Response",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"What is 2+2?\",\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Response created successfully', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response).to.have.property('id');",
+                  "  pm.expect(response).to.have.property('object', 'response');",
+                  "  pm.expect(response).to.have.property('status');",
+                  "  pm.collectionVariables.set('response_id', response.id);",
+                  "});",
+                  "pm.test('Response has output', function () {",
+                  "  pm.expect(response).to.have.property('output');",
+                  "});",
+                  "pm.test('Status is completed', function () {",
+                  "  pm.expect(response.status).to.be.oneOf(['completed', 'in_progress']);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Get Response by ID",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Response retrieved', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response.id).to.equal(pm.collectionVariables.get('response_id'));",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "4. Tool Calling - Google Search",
+      "item": [
+        {
+          "name": "Create Response with Search Tool",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Search for the latest news about OpenAI and summarize the top 3 results\",\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"google_search\",\n        \"description\": \"Search the web using Google\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"q\": {\n              \"type\": \"string\",\n              \"description\": \"Search query\"\n            },\n            \"num\": {\n              \"type\": \"integer\",\n              \"description\": \"Number of results\"\n            }\n          },\n          \"required\": [\"q\"]\n        }\n      }\n    }\n  ],\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Response created with tool call', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response).to.have.property('id');",
+                  "  pm.collectionVariables.set('response_id', response.id);",
+                  "});",
+                  "pm.test('Tool was executed', function () {",
+                  "  // Response should contain tool execution metadata",
+                  "  pm.expect(response.output).to.exist;",
+                  "});",
+                  "pm.test('Response contains search results', function () {",
+                  "  const outputText = JSON.stringify(response.output).toLowerCase();",
+                  "  pm.expect(outputText).to.satisfy(function(text) {",
+                  "    return text.includes('openai') || text.includes('search') || text.includes('result');",
+                  "  });",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "5. Tool Calling - Multi-Step",
+      "item": [
+        {
+          "name": "Search and Scrape Chain",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Search for the official OpenAI website, then scrape the homepage and tell me what's their main product\",\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"google_search\",\n        \"description\": \"Search the web\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\"q\": {\"type\": \"string\"}},\n          \"required\": [\"q\"]\n        }\n      }\n    },\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"scrape\",\n        \"description\": \"Scrape webpage content\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"url\": {\"type\": \"string\", \"description\": \"URL to scrape\"}\n          },\n          \"required\": [\"url\"]\n        }\n      }\n    }\n  ],\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Multi-step tool execution completed', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response.status).to.equal('completed');",
+                  "});",
+                  "pm.test('Response contains final answer', function () {",
+                  "  pm.expect(response.output).to.exist;",
+                  "  const outputText = JSON.stringify(response.output).toLowerCase();",
+                  "  pm.expect(outputText.length).to.be.greaterThan(50);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "6. Tool Calling - File Search",
+      "item": [
+        {
+          "name": "Index Document",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Index this document: OpenAI released GPT-4 in March 2023. It's a large multimodal model.\",\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"file_search_index\",\n        \"description\": \"Index text for search\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"document_id\": {\"type\": \"string\"},\n            \"text\": {\"type\": \"string\"},\n            \"metadata\": {\"type\": \"object\"}\n          },\n          \"required\": [\"document_id\", \"text\"]\n        }\n      }\n    }\n  ],\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Document indexed', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Query Indexed Documents",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"When was GPT-4 released? Search my indexed documents.\",\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"file_search_query\",\n        \"description\": \"Query indexed documents\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"query\": {\"type\": \"string\"},\n            \"top_k\": {\"type\": \"integer\"}\n          },\n          \"required\": [\"query\"]\n        }\n      }\n    }\n  ],\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('File search executed', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('Response mentions GPT-4', function () {",
+                  "  const outputText = JSON.stringify(response.output).toLowerCase();",
+                  "  pm.expect(outputText).to.include('gpt-4');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "7. Conversation Continuity",
+      "item": [
+        {
+          "name": "Create Response with Conversation",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"My name is Alice and I live in Paris\",\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Response created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.collectionVariables.set('response_id', response.id);",
+                  "  if (response.conversation) {",
+                  "    pm.collectionVariables.set('conversation_id', response.conversation.id);",
+                  "  }",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Continue from Previous Response",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"What's my name and where do I live?\",\n  \"previous_response_id\": \"{{response_id}}\",\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Conversation continued', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('Response remembers context', function () {",
+                  "  const outputText = JSON.stringify(response.output).toLowerCase();",
+                  "  pm.expect(outputText).to.include('alice');",
+                  "  pm.expect(outputText).to.include('paris');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "8. Error Handling",
+      "item": [
+        {
+          "name": "Invalid Tool Name",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Use the invalid_tool\",\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"invalid_tool_that_does_not_exist\",\n        \"description\": \"Non-existent tool\",\n        \"parameters\": {\"type\": \"object\", \"properties\": {}}\n      }\n    }\n  ],\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Handles invalid tool gracefully', function () {",
+                  "  // Should either return error or complete without tool",
+                  "  pm.expect(pm.response.code).to.be.oneOf([200, 400, 422]);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Missing Required Parameters",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"input\": \"Test without model\",\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Validates required fields', function () {",
+                  "  pm.response.to.have.status(400);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Cancel Response",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{response_id}}/cancel"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Cancel endpoint exists', function () {",
+                  "  pm.expect(pm.response.code).to.be.oneOf([200, 404, 409]);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "9. Complex Scenario",
+      "item": [
+        {
+          "name": "Search, Scrape, Analyze Chain",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Search for recent AI news, scrape the top result, and execute Python code to count how many times 'AI' appears in the content\",\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"google_search\",\n        \"description\": \"Search the web\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\"q\": {\"type\": \"string\"}},\n          \"required\": [\"q\"]\n        }\n      }\n    },\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"scrape\",\n        \"description\": \"Scrape webpage\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\"url\": {\"type\": \"string\"}},\n          \"required\": [\"url\"]\n        }\n      }\n    },\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"python_exec\",\n        \"description\": \"Execute Python code\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"code\": {\"type\": \"string\"},\n            \"language\": {\"type\": \"string\"}\n          },\n          \"required\": [\"code\"]\n        }\n      }\n    }\n  ],\n  \"stream\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Complex tool chain executed', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response.status).to.equal('completed');",
+                  "});",
+                  "pm.test('Response contains analysis', function () {",
+                  "  pm.expect(response.output).to.exist;",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "10. Basic Background Mode",
+      "item": [
+        {
+          "name": "Create Background Response",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Write a detailed analysis of the history of space exploration, covering major milestones from the 1950s to present day.\",\n  \"background\": true,\n  \"store\": true\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Background response created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response).to.have.property('id');",
+                  "  pm.expect(response).to.have.property('status');",
+                  "  pm.collectionVariables.set('background_response_id', response.id);",
+                  "});",
+                  "pm.test('Status is queued or in_progress', function () {",
+                  "  pm.expect(response.status).to.be.oneOf(['queued', 'in_progress']);",
+                  "});",
+                  "pm.test('Response has no output yet', function () {",
+                  "  // Background responses should not have output immediately",
+                  "  if (response.output) {",
+                  "    pm.expect(response.output).to.be.empty;",
+                  "  }",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Poll Background Response",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Response retrieved', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response.id).to.equal(pm.collectionVariables.get('background_response_id'));",
+                  "});",
+                  "pm.test('Status is valid', function () {",
+                  "  pm.expect(response.status).to.be.oneOf(['queued', 'in_progress', 'completed', 'failed', 'cancelled']);",
+                  "});",
+                  "// Log current status for debugging",
+                  "console.log('Current status: ' + response.status);",
+                  "",
+                  "// If completed, verify output exists",
+                  "if (response.status === 'completed') {",
+                  "  pm.test('Completed response has output', function () {",
+                  "    pm.expect(response.output).to.exist;",
+                  "  });",
+                  "}"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Wait and Poll Again (2s)",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "prerequest",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "// Wait 2 seconds before polling",
+                  "setTimeout(function(){}, 2000);"
+                ]
+              }
+            },
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Response still accessible', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "console.log('Status after 2s: ' + response.status);"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "11. Background with Webhooks",
+      "item": [
+        {
+          "name": "Create Background Response with Webhook",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Generate a comprehensive guide to machine learning algorithms with examples.\",\n  \"background\": true,\n  \"store\": true,\n  \"metadata\": {\n    \"webhook_url\": \"{{webhook_url}}\",\n    \"webhook_events\": [\"response.completed\", \"response.failed\"]\n  }\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Background response with webhook created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response).to.have.property('id');",
+                  "  pm.collectionVariables.set('background_response_id', response.id);",
+                  "});",
+                  "pm.test('Initial status is queued or in_progress', function () {",
+                  "  pm.expect(response.status).to.be.oneOf(['queued', 'in_progress']);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Verify Webhook Metadata",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Response has metadata', function () {",
+                  "  pm.expect(response).to.have.property('metadata');",
+                  "});",
+                  "pm.test('Webhook URL stored in metadata', function () {",
+                  "  if (response.metadata) {",
+                  "    pm.expect(response.metadata.webhook_url).to.exist;",
+                  "  }",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "12. Background with Tool Calling",
+      "item": [
+        {
+          "name": "Background Response with Google Search",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Search for the latest developments in quantum computing and provide a detailed summary with sources.\",\n  \"background\": true,\n  \"store\": true,\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"google_search\",\n        \"description\": \"Search the web using Google\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"q\": {\n              \"type\": \"string\",\n              \"description\": \"Search query\"\n            },\n            \"num\": {\n              \"type\": \"integer\",\n              \"description\": \"Number of results\"\n            }\n          },\n          \"required\": [\"q\"]\n        }\n      }\n    }\n  ]\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Background response with tools created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response).to.have.property('id');",
+                  "  pm.collectionVariables.set('background_response_id', response.id);",
+                  "});",
+                  "pm.test('Tools accepted', function () {",
+                  "  pm.expect(response.status).to.be.oneOf(['queued', 'in_progress']);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Poll Tool Execution Progress",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Tool execution trackable', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "// Check if tool execution metadata exists",
+                  "if (response.metadata && response.metadata.tool_calls) {",
+                  "  console.log('Tool calls detected: ' + JSON.stringify(response.metadata.tool_calls));",
+                  "}"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Background Multi-Step Tool Chain",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Search for the OpenAI homepage, scrape its content, and analyze the main products mentioned.\",\n  \"background\": true,\n  \"store\": true,\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"google_search\",\n        \"description\": \"Search the web\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\"q\": {\"type\": \"string\"}},\n          \"required\": [\"q\"]\n        }\n      }\n    },\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"scrape\",\n        \"description\": \"Scrape webpage content\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\"url\": {\"type\": \"string\"}},\n          \"required\": [\"url\"]\n        }\n      }\n    }\n  ]\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Multi-step background task created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.collectionVariables.set('background_response_id', response.id);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "13. Background Cancellation",
+      "item": [
+        {
+          "name": "Create Long-Running Background Task",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Write a comprehensive 10,000 word essay on the history of artificial intelligence, including detailed examples, citations, and analysis.\",\n  \"background\": true,\n  \"store\": true\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Long-running task created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.collectionVariables.set('background_response_id', response.id);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Cancel Background Response",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}/cancel"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Cancel request accepted', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('Status is cancelled or terminal', function () {",
+                  "  pm.expect(response.status).to.be.oneOf(['cancelled', 'completed', 'failed']);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Cancel Again (Idempotent)",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}/cancel"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Idempotent cancellation', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.test('Returns final state', function () {",
+                  "  const response = pm.response.json();",
+                  "  pm.expect(response).to.have.property('status');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Verify Cancelled Status",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Cancelled response persists', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response.status).to.equal('cancelled');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "14. Background Conversation Continuity",
+      "item": [
+        {
+          "name": "Background Response with Context",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"My name is Sarah and I'm a software engineer working on AI systems.\",\n  \"background\": true,\n  \"store\": true\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Context response created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.collectionVariables.set('background_response_id', response.id);",
+                  "});",
+                  "if (response.conversation) {",
+                  "  pm.collectionVariables.set('conversation_id', response.conversation.id);",
+                  "}"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Wait for First Response",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "prerequest",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "// Wait for completion",
+                  "setTimeout(function(){}, 3000);"
+                ]
+              }
+            },
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('First response status checked', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "console.log('Status: ' + response.status);"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Continue Conversation in Background",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"What's my name and profession? Please provide a detailed response.\",\n  \"previous_response_id\": \"{{background_response_id}}\",\n  \"background\": true,\n  \"store\": true\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Continuation created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.collectionVariables.set('background_response_id', response.id);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Poll Continuation Response",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Continuation trackable', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "// When completed, check if context was maintained",
+                  "if (response.status === 'completed') {",
+                  "  pm.test('Context maintained', function () {",
+                  "    const outputText = JSON.stringify(response.output).toLowerCase();",
+                  "    pm.expect(outputText).to.satisfy(function(text) {",
+                  "      return text.includes('sarah') || text.includes('engineer');",
+                  "    });",
+                  "  });",
+                  "}"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "15. Background Error Handling",
+      "item": [
+        {
+          "name": "Background Without Store (Should Fail)",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Test background without store\",\n  \"background\": true,\n  \"store\": false\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Rejects background without store', function () {",
+                  "  // Per OpenAI docs: background requires store=true",
+                  "  pm.expect(pm.response.code).to.be.oneOf([400, 422]);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Poll Non-Existent Background Response",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/resp_nonexistent123"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Returns 404 for missing response', function () {",
+                  "  pm.response.to.have.status(404);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Cancel Non-Existent Background Response",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/resp_nonexistent123/cancel"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('Returns error for missing response', function () {",
+                  "  pm.expect(pm.response.code).to.be.oneOf([404, 400]);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "16. Complex Background Scenarios",
+      "item": [
+        {
+          "name": "Parallel Background Tasks",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Analyze the differences between neural networks and decision trees in machine learning.\",\n  \"background\": true,\n  \"store\": true,\n  \"metadata\": {\n    \"task_id\": \"parallel_1\",\n    \"webhook_url\": \"{{webhook_url}}\"\n  }\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('First parallel task created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "pm.environment.set('parallel_task_1', response.id);"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Background with Multiple Tools",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Research the latest AI breakthroughs, scrape relevant articles, and write Python code to analyze the word frequency of 'neural network' across all sources.\",\n  \"background\": true,\n  \"store\": true,\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"google_search\",\n        \"description\": \"Search the web\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\"q\": {\"type\": \"string\"}},\n          \"required\": [\"q\"]\n        }\n      }\n    },\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"scrape\",\n        \"description\": \"Scrape webpage\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\"url\": {\"type\": \"string\"}},\n          \"required\": [\"url\"]\n        }\n      }\n    },\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"python_exec\",\n        \"description\": \"Execute Python code\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\"code\": {\"type\": \"string\"}},\n          \"required\": [\"code\"]\n        }\n      }\n    }\n  ],\n  \"metadata\": {\n    \"webhook_url\": \"{{webhook_url}}\",\n    \"complexity\": \"high\"\n  }\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Complex multi-tool background task created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.collectionVariables.set('background_response_id', response.id);",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Poll Complex Task Progress",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Complex task progressing', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "console.log('Status: ' + response.status);",
+                  "if (response.metadata) {",
+                  "  console.log('Metadata: ' + JSON.stringify(response.metadata));",
+                  "}"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "17. Background Monitoring & Observability",
+      "item": [
+        {
+          "name": "Create Monitored Background Task",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Generate a detailed technical specification for a REST API.\",\n  \"background\": true,\n  \"store\": true,\n  \"metadata\": {\n    \"trace_id\": \"test-trace-001\",\n    \"user_id\": \"test-user\",\n    \"priority\": \"high\",\n    \"webhook_url\": \"{{webhook_url}}\"\n  }\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Monitored task created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.collectionVariables.set('background_response_id', response.id);",
+                  "});",
+                  "pm.test('Trace metadata preserved', function () {",
+                  "  if (response.metadata) {",
+                  "    pm.expect(response.metadata.trace_id).to.equal('test-trace-001');",
+                  "  }",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Check Response Timing",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{background_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Timing metadata exists', function () {",
+                  "  pm.expect(response).to.have.property('created_at');",
+                  "});",
+                  "if (response.status === 'completed') {",
+                  "  pm.test('Completion time recorded', function () {",
+                  "    pm.expect(response).to.have.property('completed_at');",
+                  "  });",
+                  "}"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "18. Long-Running Research Task",
+      "item": [
+        {
+          "name": "Create Long Research Task",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{default_model_id}}\",\n  \"input\": \"Research the economic impact of semaglutide on global healthcare systems.\\nDo:\\n- Include specific figures, trends, statistics, and measurable outcomes.\\n- Prioritize reliable, up-to-date sources: peer-reviewed research, health organizations (e.g., WHO, CDC), regulatory agencies, or pharmaceutical earnings reports.\\n- Include inline citations and return all source metadata.\\n\\nBe analytical, avoid generalities, and ensure that each section supports data-backed reasoning that could inform healthcare policy or financial modeling.\",\n  \"background\": true,\n  \"store\": true,\n  \"metadata\": {\n    \"task_type\": \"research\",\n    \"complexity\": \"very_high\",\n    \"estimated_duration\": \"long\",\n    \"webhook_url\": \"{{webhook_url}}\"\n  }\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Long research task created', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(response.background).to.be.true;",
+                  "  pm.expect(response.store).to.be.true;",
+                  "  pm.collectionVariables.set('long_task_response_id', response.id);",
+                  "});",
+                  "pm.test('Task metadata preserved', function () {",
+                  "  pm.expect(response.metadata).to.have.property('task_type', 'research');",
+                  "  pm.expect(response.metadata).to.have.property('complexity', 'very_high');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Poll Long Task Status (Initial)",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{long_task_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Long task is processing', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  const validStatuses = ['queued', 'in_progress', 'completed'];",
+                  "  pm.expect(validStatuses).to.include(response.status);",
+                  "});",
+                  "console.log('Long task status: ' + response.status);",
+                  "console.log('Task ID: ' + response.id);"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Wait and Poll Long Task (10s)",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{long_task_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Task is progressing or completed', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "console.log('Status after delay: ' + response.status);",
+                  "if (response.status === 'completed' && response.output) {",
+                  "  console.log('Task completed with output length: ' + JSON.stringify(response.output).length + ' chars');",
+                  "}",
+                  "if (response.status === 'failed' && response.error) {",
+                  "  console.log('Task failed with error: ' + JSON.stringify(response.error));",
+                  "}"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Final Status Check",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{long_task_response_id}}"
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const response = pm.response.json();",
+                  "pm.test('Long task has terminal status', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  const terminalStatuses = ['completed', 'failed', 'cancelled'];",
+                  "  const inProgressStatuses = ['queued', 'in_progress'];",
+                  "  const allStatuses = [...terminalStatuses, ...inProgressStatuses];",
+                  "  pm.expect(allStatuses).to.include(response.status);",
+                  "});",
+                  "console.log('Final status: ' + response.status);",
+                  "if (response.usage) {",
+                  "  console.log('Token usage: ' + JSON.stringify(response.usage));",
+                  "}"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
diff --git a/tests/automation/test-all.postman.json b/tests/automation/test-all.postman.json
new file mode 100644
index 00000000..34efb6f9
--- /dev/null
+++ b/tests/automation/test-all.postman.json
@@ -0,0 +1,485 @@
+{
+  "info": {
+    "name": "Gateway Full Stack Tests",
+    "description": "Covers auth, media, LLM, MCP, and Responses APIs exclusively through the Kong gateway.",
+    "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
+  },
+  "item": [
+    {
+      "name": "1. Authentication",
+      "item": [
+        {
+          "name": "Request Guest Token",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{}"
+            },
+            "url": "{{kong_url}}/auth/guest-login",
+            "description": "Provision a guest identity for downstream authenticated calls."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const authPayload = pm.response.json();",
+                  "pm.test('guest token issued', function () {",
+                  "  pm.response.to.have.status(201);",
+                  "  pm.expect(authPayload).to.have.property('access_token');",
+                  "});",
+                  "pm.collectionVariables.set('guest_access_token', authPayload.access_token);",
+                  "pm.collectionVariables.set('guest_user_id', authPayload.user_id || 'guest');",
+                  "pm.collectionVariables.set('guest_refresh_token', authPayload.refresh_token || '');"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "2. Model Catalogue",
+      "item": [
+        {
+          "name": "List Available Models",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/v1/models",
+            "description": "Fetch available models via Kong."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var payload = pm.response.json();",
+                  "pm.test('models request succeeded', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(payload.data).to.be.an('array').that.is.not.empty;",
+                  "});",
+                  "pm.collectionVariables.set('model_id', payload.data[0].id);",
+                  "pm.collectionVariables.set('model_label', payload.data[0].id || payload.data[0].name);"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "3. Media Workflows",
+      "item": [
+        {
+          "name": "Upload Sample Image",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "X-Media-Service-Key",
+                "value": "{{media_service_key}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"source\": {\n    \"type\": \"data_url\",\n    \"data_url\": \"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAuMBg6W7NcgAAAAASUVORK5CYII=\"\n  },\n  \"user_id\": \"{{guest_user_id}}\",\n  \"filename\": \"semaglutide.png\"\n}"
+            },
+            "url": "{{kong_url}}/media/v1/media",
+            "description": "Upload a tiny PNG via data URL and capture jan_id + presigned URL."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const mediaPayload = pm.response.json();",
+                  "pm.test('media upload succeeded', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(mediaPayload.id).to.match(/^jan_/);",
+                  "});",
+                  "pm.test('presigned download url issued', function () {",
+                  "  pm.expect(mediaPayload.presigned_url).to.be.a('string').and.not.empty;",
+                  "});",
+                  "pm.test('presigned url is external', function () {",
+                  "  pm.expect(mediaPayload.presigned_url).to.match(/^https?:\\/\\//);",
+                  "});",
+                  "pm.collectionVariables.set('media_id', mediaPayload.id);",
+                  "pm.collectionVariables.set('media_placeholder', `data:${mediaPayload.mime};${mediaPayload.id}`);",
+                  "pm.collectionVariables.set('media_presigned_url', mediaPayload.presigned_url);"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Upload Remote Image",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "X-Media-Service-Key",
+                "value": "{{media_service_key}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"source\": {\n    \"type\": \"remote_url\",\n    \"url\": \"https://httpbin.org/image/png\"\n  },\n  \"user_id\": \"{{guest_user_id}}\",\n  \"filename\": \"semaglutide-remote.png\"\n}"
+            },
+            "url": "{{kong_url}}/media/v1/media",
+            "description": "Fetch an external image via remote_url ingestion."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const remotePayload = pm.response.json();",
+                  "pm.test('remote media upload succeeded', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(remotePayload.id).to.match(/^jan_/);",
+                  "});",
+                  "pm.test('remote mime is image', function () {",
+                  "  pm.expect(remotePayload.mime).to.match(/^image\\//);",
+                  "});",
+                  "pm.test('remote media has bytes', function () {",
+                  "  pm.expect(remotePayload.bytes).to.be.above(0);",
+                  "});",
+                  "pm.collectionVariables.set('media_remote_id', remotePayload.id);"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Resolve Placeholder",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "X-Media-Service-Key",
+                "value": "{{media_service_key}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"payload\": {\n    \"hero_image\": \"{{media_placeholder}}\"\n  }\n}"
+            },
+            "url": "{{kong_url}}/media/v1/media/resolve",
+            "description": "Swap jan_* placeholder for a signed download URL."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const body = pm.response.json();",
+                  "pm.test('resolve succeeded', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "});",
+                  "let resolvedPayload = body.payload;",
+                  "if (typeof resolvedPayload === 'string') {",
+                  "  try {",
+                  "    resolvedPayload = JSON.parse(resolvedPayload);",
+                  "  } catch (err) {",
+                  "    // fallback to raw string",
+                  "  }",
+                  "}",
+                  "const flattened = typeof resolvedPayload === 'string'",
+                  "  ? resolvedPayload",
+                  "  : JSON.stringify(resolvedPayload || {});",
+                  "pm.test('jan placeholder replaced', function () {",
+                  "  pm.expect(flattened).to.include('http');",
+                  "  pm.expect(flattened).to.not.include('data:image');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Prepare Upload Slot",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              },
+              {
+                "key": "X-Media-Service-Key",
+                "value": "{{media_service_key}}"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"mime_type\": \"image/png\",\n  \"user_id\": \"{{guest_user_id}}\"\n}"
+            },
+            "url": "{{kong_url}}/media/v1/media/prepare-upload",
+            "description": "Reserve a jan_id and obtain a presigned upload URL."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "const prep = pm.response.json();",
+                  "pm.test('upload slot issued', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(prep.upload_url).to.be.a('string').and.not.empty;",
+                  "  pm.expect(prep.id).to.match(/^jan_/);",
+                  "});",
+                  "pm.test('upload url is external', function () {",
+                  "  pm.expect(prep.upload_url).to.match(/^https?:\\/\\//);",
+                  "});",
+                  "pm.collectionVariables.set('upload_media_id', prep.id);",
+                  "pm.collectionVariables.set('upload_url', prep.upload_url);"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "4. LLM Completion",
+      "item": [
+        {
+          "name": "Call Chat Completions with jan_id",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{model_id}}\",\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": [\n        {\n          \"type\": \"text\",\n          \"text\": \"Describe this image briefly, mention semaglutide, and cite notable visual cues.\"\n        },\n        {\n          \"type\": \"image_url\",\n          \"image_url\": {\n            \"url\": \"{{media_placeholder}}\"\n          }\n        }\n      ]\n    }\n  ],\n  \"max_tokens\": 12000,\n  \"temperature\": 0.4\n}"
+            },
+            "url": "{{kong_url}}/v1/chat/completions",
+            "description": "Ensure models can reference stored media via jan_id through the gateway."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var resp = pm.response.json();",
+                  "pm.test('chat completion ok', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(resp.choices).to.be.an('array').that.is.not.empty;",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "5. MCP Tools",
+      "item": [
+        {
+          "name": "List Tools",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"jsonrpc\": \"2.0\",\n  \"method\": \"tools/list\",\n  \"params\": {},\n  \"id\": 1\n}"
+            },
+            "url": "{{kong_url}}/mcp",
+            "description": "Verify MCP tool discovery via Kong."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "pm.test('mcp tools listed', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(pm.response.text()).to.include('google_search');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "name": "6. Responses API",
+      "item": [
+        {
+          "name": "Create Analytical Response",
+          "request": {
+            "method": "POST",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              },
+              {
+                "key": "Content-Type",
+                "value": "application/json"
+              }
+            ],
+            "body": {
+              "mode": "raw",
+              "raw": "{\n  \"model\": \"{{model_id}}\",\n  \"input\": \"Research the economic impact of semaglutide on global healthcare systems.\\nDo:\\n- Include specific figures, trends, statistics, and measurable outcomes.\\n- Prioritize reliable, up-to-date sources: peer-reviewed research, health organizations (e.g., WHO, CDC), regulatory agencies, or pharmaceutical earnings reports.\\n- Include inline citations and return all source metadata.\\n\\nBe analytical, avoid generalities, and ensure that each section supports data-backed reasoning that could inform healthcare policy or financial modeling.\",\n  \"stream\": false,\n  \"metadata\": {\n    \"suite\": \"gateway-e2e\",\n    \"media_reference\": \"{{media_id}}\"\n  },\n  \"tool_choice\": {\n    \"type\": \"auto\"\n  },\n  \"tools\": [\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"google_search\",\n        \"description\": \"Search the public web for authoritative results\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"q\": {\n              \"type\": \"string\",\n              \"description\": \"query\"\n            },\n            \"num\": {\n              \"type\": \"integer\",\n              \"description\": \"number of results\"\n            }\n          },\n          \"required\": [\n            \"q\"\n          ]\n        }\n      }\n    },\n    {\n      \"type\": \"function\",\n      \"function\": {\n        \"name\": \"scrape\",\n        \"description\": \"Scrape the contents of a URL\",\n        \"parameters\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"url\": {\n              \"type\": \"string\"\n            },\n            \"include_markdown\": {\n              \"type\": \"boolean\"\n            }\n          },\n          \"required\": [\n            \"url\"\n          ]\n        }\n      }\n    }\n  ]\n}"
+            },
+            "url": "{{kong_url}}/responses/v1/responses",
+            "description": "Run the complex semaglutide prompt with tool orchestration."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var body = pm.response.json();",
+                  "pm.test('response request accepted', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(body.id).to.match(/^resp_/);",
+                  "});",
+                  "pm.collectionVariables.set('response_id', body.id);",
+                  "pm.expect(body.status).to.be.oneOf(['completed', 'in_progress']);",
+                  "pm.test('response contained usage or executions metadata', function () {",
+                  "  pm.expect(body).to.have.property('status');",
+                  "});"
+                ]
+              }
+            }
+          ]
+        },
+        {
+          "name": "Fetch Response",
+          "request": {
+            "method": "GET",
+            "header": [
+              {
+                "key": "Authorization",
+                "value": "Bearer {{guest_access_token}}"
+              }
+            ],
+            "url": "{{kong_url}}/responses/v1/responses/{{response_id}}",
+            "description": "Confirm the response completed successfully."
+          },
+          "event": [
+            {
+              "listen": "test",
+              "script": {
+                "type": "text/javascript",
+                "exec": [
+                  "var result = pm.response.json();",
+                  "pm.test('response fetched', function () {",
+                  "  pm.response.to.have.status(200);",
+                  "  pm.expect(result.status).to.not.eql('failed');",
+                  "});",
+                  "pm.expect(result).to.have.property('output');"
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    }
+  ],
+  "variable": [
+    {
+      "key": "gateway_url",
+      "value": "http://localhost:8000"
+    },
+    {
+      "key": "kong_url",
+      "value": "http://localhost:8000"
+    },
+    {
+      "key": "media_service_key",
+      "value": "changeme-media-key"
+    },
+    {
+      "key": "guest_access_token",
+      "value": ""
+    },
+    {
+      "key": "guest_user_id",
+      "value": ""
+    },
+    {
+      "key": "model_id",
+      "value": ""
+    },
+    {
+      "key": "media_id",
+      "value": ""
+    },
+    {
+      "key": "media_placeholder",
+      "value": ""
+    },
+    {
+      "key": "media_presigned_url",
+      "value": ""
+    },
+    {
+      "key": "upload_media_id",
+      "value": ""
+    },
+    {
+      "key": "upload_url",
+      "value": ""
+    },
+    {
+      "key": "response_id",
+      "value": ""
+    },
+    {
+      "key": "media_remote_id",
+      "value": ""
+    }
+  ]
+}
diff --git a/tests/docker-entrypoint.sh b/tests/docker-entrypoint.sh
deleted file mode 100644
index 4e1ddfb8..00000000
--- a/tests/docker-entrypoint.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-# Prefer mounted /tests; fallback to baked-in /app
-if [[ -d "/tests" ]]; then
-  WORKDIR="/tests"
-else
-  WORKDIR="/app"
-fi
-
-cd "$WORKDIR"
-
-# Load .env if present
-if [[ -f .env ]]; then
-  set -a
-  source .env
-  set +a
-fi
-
-# If first arg is a known test file name or empty, use run-loadtest.sh
-if [[ $# -eq 0 ]]; then
-  exec bash ./run-loadtest.sh
-fi
-
-case "$1" in
-  *.js)
-    # Run k6 directly with provided script
-    shift
-    exec k6 run "$@"
-    ;;
-  run|run-all)
-    shift || true
-    exec bash ./run-loadtest.sh "$@"
-    ;;
-  *)
-    # Pass through to k6 for custom commands
-    exec k6 "$@"
-    ;;
-esac
diff --git a/tests/grafana/HOW_TO_READ_RESULT.md b/tests/grafana/HOW_TO_READ_RESULT.md
deleted file mode 100644
index e4b0865f..00000000
--- a/tests/grafana/HOW_TO_READ_RESULT.md
+++ /dev/null
@@ -1,406 +0,0 @@
-# How to Read Test Results with Grafana
-
-This guide shows you how to set up Grafana with Docker to visualize your K6 test results locally.
-
-## Overview
-
-This setup provides:
-- **Local Grafana instance** running in Docker
-- **Prometheus** for metrics storage
-- **Pre-built dashboard** for K6 test visualization
-- **Real-time monitoring** of test performance
-- **Historical analysis** of test trends
-
-## Prerequisites
-
-- Docker and Docker Compose installed
-- Basic understanding of Docker containers
-- Ports 3000 (Grafana) and 9090 (Prometheus) available
-
-## Quick Start
-
-### 1. Create Docker Compose Setup
-
-Create a `docker-compose.yml` file in your `tests/` directory:
-
-```yaml
-version: '3.8'
-
-services:
-  prometheus:
-    image: prom/prometheus:latest
-    container_name: janai-prometheus
-    ports:
-      - "9090:9090"
-    volumes:
-      - ./prometheus.yml:/etc/prometheus/prometheus.yml
-      - prometheus_data:/prometheus
-    command:
-      - '--config.file=/etc/prometheus/prometheus.yml'
-      - '--storage.tsdb.path=/prometheus'
-      - '--web.console.libraries=/etc/prometheus/console_libraries'
-      - '--web.console.templates=/etc/prometheus/consoles'
-      - '--storage.tsdb.retention.time=200h'
-      - '--web.enable-lifecycle'
-    networks:
-      - monitoring
-
-  grafana:
-    image: grafana/grafana:latest
-    container_name: janai-grafana
-    ports:
-      - "3000:3000"
-    volumes:
-      - grafana_data:/var/lib/grafana
-      - ./grafana-dashboard.json:/var/lib/grafana/dashboards/k6-dashboard.json
-      - ./grafana-provisioning:/etc/grafana/provisioning
-    environment:
-      - GF_SECURITY_ADMIN_PASSWORD=admin
-      - GF_USERS_ALLOW_SIGN_UP=false
-    networks:
-      - monitoring
-
-volumes:
-  prometheus_data:
-  grafana_data:
-
-networks:
-  monitoring:
-    driver: bridge
-```
-
-### 2. Create Prometheus Configuration
-
-Create `prometheus.yml` in your `tests/` directory:
-
-```yaml
-global:
-  scrape_interval: 15s
-  evaluation_interval: 15s
-
-rule_files:
-  # - "first_rules.yml"
-  # - "second_rules.yml"
-
-scrape_configs:
-  - job_name: 'prometheus'
-    static_configs:
-      - targets: ['localhost:9090']
-
-  - job_name: 'k6'
-    static_configs:
-      - targets: ['host.docker.internal:9090']
-    metrics_path: /api/v1/write
-    scheme: http
-```
-
-### 3. Create Grafana Provisioning
-
-Create the directory structure and files:
-
-```bash
-mkdir -p grafana-provisioning/datasources
-mkdir -p grafana-provisioning/dashboards
-```
-
-Create `grafana-provisioning/datasources/prometheus.yml`:
-
-```yaml
-apiVersion: 1
-
-datasources:
-  - name: Prometheus
-    type: prometheus
-    access: proxy
-    url: http://prometheus:9090
-    isDefault: true
-    editable: true
-```
-
-Create `grafana-provisioning/dashboards/dashboard.yml`:
-
-```yaml
-apiVersion: 1
-
-providers:
-  - name: 'default'
-    orgId: 1
-    folder: ''
-    type: file
-    disableDeletion: false
-    updateIntervalSeconds: 10
-    allowUiUpdates: true
-    options:
-      path: /var/lib/grafana/dashboards
-```
-
-### 4. Start the Monitoring Stack
-
-```bash
-# Start Grafana and Prometheus
-docker-compose up -d
-
-# Check if containers are running
-docker-compose ps
-```
-
-### 5. Access Grafana
-
-Open your browser and go to:
-- **Grafana**: http://localhost:3000
-- **Username**: `admin`
-- **Password**: `admin`
-
-## Running Tests with Metrics
-
-### Method 1: Direct K6 with Prometheus Remote Write
-
-```bash
-# Set environment variables
-export K6_PROMETHEUS_RW_SERVER_URL="http://localhost:9090/api/v1/write"
-export K6_PROMETHEUS_RW_TREND_STATS="p(95),p(99),min,max"
-export K6_PROMETHEUS_RW_PUSH_INTERVAL="5s"
-
-# Run test with metrics export
-k6 run --out experimental-prometheus-rw src/test-completion-standard.js
-```
-
-### Method 2: Using Docker with Host Network
-
-```bash
-# Run K6 test with metrics to local Prometheus
-docker run --rm -it \
-   --network host \
-   -e BASE=https://api-stag.jan.ai \
-   -e MODEL=jan-v1-4b \
-   -e DEBUG=true \
-   -e K6_PROMETHEUS_RW_SERVER_URL="http://localhost:9090/api/v1/write" \
-   -e K6_PROMETHEUS_RW_TREND_STATS="p(95),p(99),min,max" \
-   -e K6_PROMETHEUS_RW_PUSH_INTERVAL="5s" \
-   janai/k6-tests:local run test-completion-standard
-```
-
-### Method 3: Using Our Test Runner
-
-```bash
-# Set environment variables
-export K6_PROMETHEUS_RW_SERVER_URL="http://localhost:9090/api/v1/write"
-export K6_PROMETHEUS_RW_TREND_STATS="p(95),p(99),min,max"
-export K6_PROMETHEUS_RW_PUSH_INTERVAL="5s"
-
-# Run using our test runner
-./run-loadtest.sh test-completion-standard
-```
-
-## Dashboard Features
-
-### Main Panels
-
-1. **LLM Performance Overview**
-   - Response time percentiles (p95, p99)
-   - Tokens per second
-   - Time to first byte (TTFB)
-   - Queue time metrics
-
-2. **HTTP Performance**
-   - Request duration trends
-   - Request rate (RPS)
-   - Error rate percentage
-   - Response size distribution
-
-3. **Custom K6 Metrics**
-   - Guest login time
-   - Token refresh time
-   - Completion time (non-streaming)
-   - Streaming completion time
-   - Tool call response time (extended)
-
-4. **Test Segmentation**
-   - By test case (test-completion-standard, test-responses, etc.)
-   - By test ID (individual run tracking)
-   - By environment (staging vs production)
-
-### Key Metrics to Monitor
-
-```prometheus
-# Response times
-k6_http_req_duration{testid="test-completion-standard_20250923_042450_1"}
-
-# Custom completion metrics
-k6_completion_time_ms{testid="test-completion-standard_20250923_042450_1"}
-k6_guest_login_time_ms{testid="test-completion-standard_20250923_042450_1"}
-
-# Tool call metrics (extended timeouts)
-k6_response_time_with_tools_ms{testid="test-responses_20250923_042450_1"}
-
-# Error rates
-k6_http_req_failed{testid="test-completion-standard_20250923_042450_1"}
-
-# Throughput
-k6_http_reqs{testid="test-completion-standard_20250923_042450_1"}
-```
-
-## Dashboard Navigation
-
-### Time Range Selection
-- **Last 5 minutes**: For real-time monitoring
-- **Last hour**: For recent test analysis
-- **Last 24 hours**: For daily trends
-- **Last 7 days**: For weekly patterns
-
-### Panel Interactions
-- **Click and drag**: Zoom into specific time ranges
-- **Panel menu**: Access panel options and edit
-- **Refresh**: Manual refresh or auto-refresh (5s, 10s, 30s, 1m, 5m, 15m, 30m, 1h)
-
-### Alerting
-- **Threshold alerts**: Set up alerts for response time limits
-- **Error rate alerts**: Monitor failure rates
-- **Performance regression**: Detect performance degradation
-
-## Troubleshooting
-
-### Common Issues
-
-1. **Metrics not appearing in Grafana**
-   ```bash
-   # Check Prometheus targets
-   curl http://localhost:9090/api/v1/targets
-   
-   # Check if K6 is sending metrics
-   curl http://localhost:9090/api/v1/query?query=k6_http_reqs
-   ```
-
-2. **Connection refused errors**
-   ```bash
-   # Check if containers are running
-   docker-compose ps
-   
-   # Check container logs
-   docker-compose logs prometheus
-   docker-compose logs grafana
-   ```
-
-3. **Dashboard not loading**
-   ```bash
-   # Check Grafana logs
-   docker-compose logs grafana
-   
-   # Restart Grafana
-   docker-compose restart grafana
-   ```
-
-### Port Conflicts
-
-If ports 3000 or 9090 are in use:
-
-```yaml
-# Modify docker-compose.yml
-services:
-  grafana:
-    ports:
-      - "3001:3000"  # Change to 3001
-  prometheus:
-    ports:
-      - "9091:9090"  # Change to 9091
-```
-
-Then update your K6 command:
-```bash
-export K6_PROMETHEUS_RW_SERVER_URL="http://localhost:9091/api/v1/write"
-```
-
-## Advanced Configuration
-
-### Custom Dashboard Panels
-
-You can add custom panels to monitor specific metrics:
-
-1. **Add Panel**: Click "+" → "Add panel"
-2. **Query**: Use Prometheus queries like:
-   ```prometheus
-   # Average response time
-   avg(k6_http_req_duration)
-   
-   # Error rate percentage
-   rate(k6_http_req_failed[5m]) * 100
-   
-   # Requests per second
-   rate(k6_http_reqs[5m])
-   ```
-
-### Alerting Rules
-
-Create alerting rules in Prometheus:
-
-```yaml
-# prometheus-alerts.yml
-groups:
-  - name: k6-alerts
-    rules:
-      - alert: HighResponseTime
-        expr: k6_http_req_duration > 5
-        for: 1m
-        labels:
-          severity: warning
-        annotations:
-          summary: "High response time detected"
-          
-      - alert: HighErrorRate
-        expr: rate(k6_http_req_failed[5m]) > 0.05
-        for: 1m
-        labels:
-          severity: critical
-        annotations:
-          summary: "High error rate detected"
-```
-
-## Cleanup
-
-To stop and remove all containers:
-
-```bash
-# Stop containers
-docker-compose down
-
-# Remove volumes (WARNING: This deletes all data)
-docker-compose down -v
-
-# Remove images
-docker-compose down --rmi all
-```
-
-## Example Workflow
-
-1. **Start monitoring stack**:
-   ```bash
-   docker-compose up -d
-   ```
-
-2. **Run a test with metrics**:
-   ```bash
-   export K6_PROMETHEUS_RW_SERVER_URL="http://localhost:9090/api/v1/write"
-   ./run-loadtest.sh test-completion-standard
-   ```
-
-3. **View results in Grafana**:
-   - Open http://localhost:3000
-   - Login with admin/admin
-   - Navigate to "K6 Load Test Dashboard"
-   - Select time range "Last 5 minutes"
-
-4. **Analyze performance**:
-   - Check response time trends
-   - Monitor error rates
-   - Compare different test runs
-   - Set up alerts for thresholds
-
-## Next Steps
-
-- **Set up alerts** for performance thresholds
-- **Create custom dashboards** for specific use cases
-- **Integrate with CI/CD** for automated monitoring
-- **Export dashboards** for team sharing
-- **Configure retention policies** for long-term analysis
-
-This setup gives you comprehensive visibility into your K6 test performance with real-time monitoring and historical analysis capabilities! 📊✨
diff --git a/tests/grafana/README.md b/tests/grafana/README.md
deleted file mode 100644
index edc74fae..00000000
--- a/tests/grafana/README.md
+++ /dev/null
@@ -1,144 +0,0 @@
-# Grafana Monitoring Setup
-
-This directory contains all Grafana and Prometheus monitoring components for K6 load tests.
-
-## 📁 Directory Structure
-
-```
-grafana/
-├── README.md                           # This file
-├── docker-compose.yml                  # Docker Compose for Grafana + Prometheus
-├── grafana-dashboard.json              # Pre-built K6 dashboard
-├── prometheus.yml                      # Prometheus configuration
-└── grafana-provisioning/               # Grafana auto-provisioning
-    ├── dashboards/
-    │   └── dashboard.yml              # Dashboard provisioning config
-    └── datasources/
-        └── prometheus.yml              # Prometheus datasource config
-
-../ (parent directory)
-├── setup-monitoring.sh                 # Linux/Mac setup script
-├── setup-monitoring.bat                # Windows setup script
-├── run-test-with-monitoring.sh         # Linux/Mac test runner with metrics
-└── run-test-with-monitoring.bat        # Windows test runner with metrics
-```
-
-## 🚀 Quick Start
-
-### 1. Start Monitoring Stack
-
-**Linux/Mac:**
-```bash
-../setup-monitoring.sh
-```
-
-**Windows:**
-```cmd
-..\setup-monitoring.bat
-```
-
-### 2. Run Tests with Metrics
-
-**Linux/Mac:**
-```bash
-../run-test-with-monitoring.sh test-completion-standard
-```
-
-**Windows:**
-```cmd
-..\run-test-with-monitoring.bat test-completion-standard
-```
-
-### 3. View Results
-
-- **Grafana Dashboard**: http://localhost:3000 (admin/admin)
-- **Prometheus**: http://localhost:9090
-
-## 📊 What's Included
-
-### Grafana Dashboard Features
-- **HTTP Performance Metrics**: Request duration, throughput, error rates
-- **Test Segmentation**: Filter by Test ID and Test Case
-- **Real-time Monitoring**: 5-second refresh intervals
-- **Comprehensive Coverage**: All K6 built-in metrics
-
-### Prometheus Configuration
-- **Remote Write Receiver**: Enabled for K6 metrics
-- **Data Retention**: 15 days
-- **Scrape Intervals**: Optimized for load testing
-
-### Auto-Provisioning
-- **Dashboard**: Automatically loads K6 dashboard on startup
-- **Datasource**: Prometheus connection configured automatically
-- **No Manual Setup**: Everything works out of the box
-
-## 🔧 Manual Setup (Alternative)
-
-If you prefer manual setup:
-
-```bash
-# Start services
-docker-compose up -d
-
-# Wait for services to start
-sleep 10
-
-# Run test with metrics
-export K6_PROMETHEUS_RW_SERVER_URL="http://localhost:9090/api/v1/write"
-k6 run --out experimental-prometheus-rw ../src/test-completion-standard.js
-```
-
-## 📈 Available Metrics
-
-The dashboard displays these K6 metrics:
-
-- `k6_http_reqs_total` - Total HTTP requests
-- `k6_http_req_duration_p95` - 95th percentile response time
-- `k6_http_req_failed_rate` - HTTP error rate
-- `k6_vus` - Virtual users
-- `k6_iterations_total` - Total test iterations
-- `k6_checks_rate` - Check success rate
-
-## 🛠️ Troubleshooting
-
-### Services Not Starting
-```bash
-# Check logs
-docker-compose logs
-
-# Restart services
-docker-compose down
-docker-compose up -d
-```
-
-### No Metrics in Grafana
-1. Verify Prometheus is running: http://localhost:9090
-2. Check K6 environment variable: `K6_PROMETHEUS_RW_SERVER_URL`
-3. Ensure test is using `--out experimental-prometheus-rw`
-
-### Dashboard Not Loading
-1. Check Grafana logs: `docker-compose logs grafana`
-2. Verify dashboard file: `grafana-dashboard.json`
-3. Check provisioning config: `grafana-provisioning/dashboards/dashboard.yml`
-
-## 📚 Documentation
-
-- **Complete Setup Guide**: See `HOW_TO_READ_RESULT.md` in this directory
-- **Main Test Documentation**: See `../README.md`
-- **Adding New Tests**: See `../HOW_TO_ADD_TESTS.md`
-
-## 🔄 Updates
-
-When updating the dashboard or configuration:
-
-1. **Dashboard**: Update `grafana-dashboard.json`
-2. **Config**: Update `prometheus.yml` or provisioning files
-3. **Restart**: Run `docker-compose restart` to apply changes
-
-## 🌐 External Access
-
-To access from other machines:
-
-1. Update `docker-compose.yml` ports (e.g., `3000:3000` → `0.0.0.0:3000:3000`)
-2. Update Prometheus URL in scripts: `http://YOUR_IP:9090/api/v1/write`
-3. Restart services: `docker-compose restart`
diff --git a/tests/grafana/docker-compose.yml b/tests/grafana/docker-compose.yml
deleted file mode 100644
index 48e778da..00000000
--- a/tests/grafana/docker-compose.yml
+++ /dev/null
@@ -1,44 +0,0 @@
-version: '3.8'
-
-services:
-  prometheus:
-    image: prom/prometheus:latest
-    container_name: janai-prometheus
-    ports:
-      - "9090:9090"
-    volumes:
-      - ./prometheus.yml:/etc/prometheus/prometheus.yml
-      - prometheus_data:/prometheus
-    command:
-      - '--config.file=/etc/prometheus/prometheus.yml'
-      - '--storage.tsdb.path=/prometheus'
-      - '--web.console.libraries=/etc/prometheus/console_libraries'
-      - '--web.console.templates=/etc/prometheus/consoles'
-      - '--storage.tsdb.retention.time=200h'
-      - '--web.enable-lifecycle'
-      - '--web.enable-remote-write-receiver'
-    networks:
-      - monitoring
-
-  grafana:
-    image: grafana/grafana:latest
-    container_name: janai-grafana
-    ports:
-      - "3000:3000"
-    volumes:
-      - grafana_data:/var/lib/grafana
-      - ./grafana-dashboard.json:/var/lib/grafana/dashboards/k6-dashboard.json
-      - ./grafana-provisioning:/etc/grafana/provisioning
-    environment:
-      - GF_SECURITY_ADMIN_PASSWORD=admin
-      - GF_USERS_ALLOW_SIGN_UP=false
-    networks:
-      - monitoring
-
-volumes:
-  prometheus_data:
-  grafana_data:
-
-networks:
-  monitoring:
-    driver: bridge
diff --git a/tests/grafana/grafana-dashboard.json b/tests/grafana/grafana-dashboard.json
deleted file mode 100644
index 1394def4..00000000
--- a/tests/grafana/grafana-dashboard.json
+++ /dev/null
@@ -1,1419 +0,0 @@
-{
-  "annotations": {
-    "list": [
-      {
-        "builtIn": 1,
-        "datasource": {
-          "type": "grafana",
-          "uid": "-- Grafana --"
-        },
-        "enable": true,
-        "hide": true,
-        "iconColor": "rgba(0, 211, 255, 1)",
-        "name": "Annotations & Alerts",
-        "type": "dashboard"
-      }
-    ]
-  },
-  "editable": true,
-  "fiscalYearStartMonth": 0,
-  "graphTooltip": 0,
-  "id": 31273,
-  "links": [],
-  "panels": [
-    {
-      "collapsed": false,
-      "gridPos": {
-        "h": 1,
-        "w": 24,
-        "x": 0,
-        "y": 0
-      },
-      "id": 1,
-      "panels": [],
-      "title": "📊 Overview",
-      "type": "row"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "thresholds"
-          },
-          "mappings": [],
-          "max": 1,
-          "min": 0,
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": 0
-              },
-              {
-                "color": "red",
-                "value": 0.02
-              }
-            ]
-          },
-          "unit": "percentunit"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 6,
-        "x": 0,
-        "y": 1
-      },
-      "id": 2,
-      "options": {
-        "minVizHeight": 75,
-        "minVizWidth": 75,
-        "orientation": "auto",
-        "reduceOptions": {
-          "calcs": [
-            "lastNotNull"
-          ],
-          "fields": "",
-          "values": false
-        },
-        "showThresholdLabels": false,
-        "showThresholdMarkers": true,
-        "sizing": "auto"
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "rate(k6_http_req_failed_total{testid=~\"$testid\", test_case=~\"$test_case\"}[1m]) / rate(k6_http_reqs_total{testid=~\"$testid\", test_case=~\"$test_case\"}[1m])",
-          "refId": "A"
-        }
-      ],
-      "title": "Error Rate",
-      "type": "gauge"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "tokens/s",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "vis": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": 0
-              },
-              {
-                "color": "red",
-                "value": 80
-              }
-            ]
-          },
-          "unit": "reqps"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 18,
-        "x": 6,
-        "y": 1
-      },
-      "id": 3,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "hideZeros": false,
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "rate(k6_http_reqs_total{testid=~\"$testid\", test_case=~\"$test_case\"}[1m])",
-          "legendFormat": "{{scenario}} - {{method}}",
-          "refId": "A"
-        }
-      ],
-      "title": "Request Rate (RPS)",
-      "type": "timeseries"
-    },
-    {
-      "collapsed": false,
-      "gridPos": {
-        "h": 1,
-        "w": 24,
-        "x": 0,
-        "y": 9
-      },
-      "id": 4,
-      "panels": [],
-      "title": "🚀 LLM Performance Metrics",
-      "type": "row"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "vis": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": 0
-              },
-              {
-                "color": "red",
-                "value": 1000
-              }
-            ]
-          },
-          "unit": "ms"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 12,
-        "x": 0,
-        "y": 10
-      },
-      "id": 5,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "hideZeros": false,
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_ttfb_ms_p95{testid=~\"$testid\", test_case=~\"$test_case\", status=\"200\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p95)",
-          "refId": "A"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_ttfb_ms_p99{testid=~\"$testid\", test_case=~\"$test_case\", status=\"200\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p99)",
-          "refId": "B"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_ttfb_ms_min{testid=~\"$testid\", test_case=~\"$test_case\", status=\"200\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (min)",
-          "refId": "C"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_ttfb_ms_max{testid=~\"$testid\", test_case=~\"$test_case\", status=\"200\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (max)",
-          "refId": "D"
-        }
-      ],
-      "title": "Time to First Byte (TTFB) - LLM Response",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "vis": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": 0
-              },
-              {
-                "color": "red",
-                "value": 10
-              }
-            ]
-          },
-          "unit": "token"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 12,
-        "x": 12,
-        "y": 10
-      },
-      "id": 6,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "hideZeros": false,
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_tokens_per_sec_p95{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p95)",
-          "refId": "A"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_tokens_per_sec_p99{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p99)",
-          "refId": "B"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_tokens_per_sec_min{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (min)",
-          "refId": "C"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_tokens_per_sec_max{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (max)",
-          "refId": "D"
-        }
-      ],
-      "title": "Tokens Per Second - Generation Speed",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "vis": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": 0
-              },
-              {
-                "color": "red",
-                "value": 5000
-              }
-            ]
-          },
-          "unit": "ms"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 8,
-        "x": 0,
-        "y": 18
-      },
-      "id": 7,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "hideZeros": false,
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_total_ms_p95{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p95)",
-          "refId": "A"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_total_ms_p99{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p99)",
-          "refId": "B"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_total_ms_min{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (min)",
-          "refId": "C"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_total_ms_max{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (max)",
-          "refId": "D"
-        }
-      ],
-      "title": "Total Response Time - End-to-End",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "vis": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": 0
-              },
-              {
-                "color": "red",
-                "value": 1000
-              }
-            ]
-          },
-          "unit": "ms"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 8,
-        "x": 8,
-        "y": 18
-      },
-      "id": 8,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "hideZeros": false,
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_receiving_ms_p95{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p95)",
-          "refId": "A"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_receiving_ms_p99{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p99)",
-          "refId": "B"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_receiving_ms_min{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (min)",
-          "refId": "C"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_receiving_ms_max{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (max)",
-          "refId": "D"
-        }
-      ],
-      "title": "Receiving Time - Stream/Response Duration",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "vis": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": 0
-              },
-              {
-                "color": "red",
-                "value": 500
-              }
-            ]
-          },
-          "unit": "ms"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 8,
-        "x": 16,
-        "y": 18
-      },
-      "id": 9,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "hideZeros": false,
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_queue_ms_p95{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p95)",
-          "refId": "A"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_llm_queue_ms_p99{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{prompt}} (p99)",
-          "refId": "B"
-        }
-      ],
-      "title": "Queue Time - Server Processing Delay",
-      "type": "timeseries"
-    },
-    {
-      "collapsed": false,
-      "gridPos": {
-        "h": 1,
-        "w": 24,
-        "x": 0,
-        "y": 26
-      },
-      "id": 10,
-      "panels": [],
-      "title": "🌐 HTTP Performance",
-      "type": "row"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "vis": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": 0
-              },
-              {
-                "color": "red",
-                "value": 1
-              }
-            ]
-          },
-          "unit": "s"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 12,
-        "x": 0,
-        "y": 27
-      },
-      "id": 11,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "hideZeros": false,
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_http_req_duration_p95{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{method}} (p95)",
-          "refId": "A"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_http_req_duration_p99{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{method}} (p99)",
-          "refId": "B"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_http_req_duration_min{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{method}} (min)",
-          "refId": "C"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_http_req_duration_max{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "{{scenario}} - {{method}} (max)",
-          "refId": "D"
-        }
-      ],
-      "title": "HTTP Request Duration",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "palette-classic"
-          },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": {
-              "legend": false,
-              "tooltip": false,
-              "vis": false,
-              "viz": false
-            },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": {
-              "type": "linear"
-            },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": {
-              "group": "A",
-              "mode": "none"
-            },
-            "thresholdsStyle": {
-              "mode": "off"
-            }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": 0
-              },
-              {
-                "color": "red",
-                "value": 10
-              }
-            ]
-          },
-          "unit": "short"
-        },
-        "overrides": []
-      },
-      "gridPos": {
-        "h": 8,
-        "w": 12,
-        "x": 12,
-        "y": 27
-      },
-      "id": 12,
-      "options": {
-        "legend": {
-          "calcs": [],
-          "displayMode": "list",
-          "placement": "bottom",
-          "showLegend": true
-        },
-        "tooltip": {
-          "hideZeros": false,
-          "mode": "single",
-          "sort": "none"
-        }
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_vus{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "Virtual Users - {{testid}}",
-          "refId": "A"
-        },
-        {
-          "datasource": {
-            "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
-          },
-          "expr": "k6_vus_max{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "Max VUs - {{testid}}",
-          "refId": "B"
-        }
-      ],
-      "title": "Virtual Users",
-      "type": "timeseries"
-    },
-    {
-      "collapsed": false,
-      "gridPos": {
-        "h": 1,
-        "w": 24,
-        "x": 0,
-        "y": 35
-      },
-      "id": 13,
-      "panels": [],
-      "title": "⏱️ HTTP Timing Breakdown (k6 built-in)",
-      "type": "row"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 1 } ] },
-          "unit": "s"
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 36 },
-      "id": 14,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "k6_http_req_blocked_p95{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p95)", "refId": "A" },
-        { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "k6_http_req_blocked_p99{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p99)", "refId": "B" },
-        { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "k6_http_req_blocked_min{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (min)", "refId": "C" },
-        { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "k6_http_req_blocked_max{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (max)", "refId": "D" }
-      ],
-      "title": "HTTP Blocked Time",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 1 } ] }, "unit": "s" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 36 },
-      "id": 15,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        { "expr": "k6_http_req_connecting_p95{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p95)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_connecting_p99{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p99)", "refId": "B", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_connecting_min{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (min)", "refId": "C", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_connecting_max{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (max)", "refId": "D", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } }
-      ],
-      "title": "HTTP Connecting Time",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 1 } ] }, "unit": "s" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 36 },
-      "id": 16,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        { "expr": "k6_http_req_tls_handshaking_p95{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p95)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_tls_handshaking_p99{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p99)", "refId": "B", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_tls_handshaking_min{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (min)", "refId": "C", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_tls_handshaking_max{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (max)", "refId": "D", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } }
-      ],
-      "title": "HTTP TLS Handshaking",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 1 } ] }, "unit": "s" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 44 },
-      "id": 17,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        { "expr": "k6_http_req_waiting_p95{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p95)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_waiting_p99{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p99)", "refId": "B", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_waiting_min{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (min)", "refId": "C", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_waiting_max{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (max)", "refId": "D", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } }
-      ],
-      "title": "HTTP Waiting (TTFB)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 1 } ] }, "unit": "s" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 44 },
-      "id": 18,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        { "expr": "k6_http_req_sending_p95{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p95)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_sending_p99{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p99)", "refId": "B", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_sending_min{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (min)", "refId": "C", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_sending_max{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (max)", "refId": "D", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } }
-      ],
-      "title": "HTTP Sending Time",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 1 } ] }, "unit": "s" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 44 },
-      "id": 19,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        { "expr": "k6_http_req_receiving_p95{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p95)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_receiving_p99{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p99)", "refId": "B", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_receiving_min{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (min)", "refId": "C", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_http_req_receiving_max{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (max)", "refId": "D", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } }
-      ],
-      "title": "HTTP Receiving Time",
-      "type": "timeseries"
-    },
-    {
-      "collapsed": false,
-      "gridPos": { "h": 1, "w": 24, "x": 0, "y": 52 },
-      "id": 20,
-      "panels": [],
-      "title": "📈 Throughput, Iterations & Checks",
-      "type": "row"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0.95 }, { "color": "red", "value": 0.8 } ] }, "unit": "percentunit" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 6, "x": 0, "y": 53 },
-      "id": 21,
-      "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" },
-      "pluginVersion": "12.1.0",
-      "targets": [ { "expr": "k6_checks_rate{testid=~\"$testid\", test_case=~\"$test_case\"}", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } } ],
-      "title": "Checks Rate",
-      "type": "gauge"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 1e7 } ] }, "unit": "Bps" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 12, "x": 6, "y": 53 },
-      "id": 22,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        { "expr": "rate(k6_data_sent_total{testid=~\"$testid\", test_case=~\"$test_case\"}[1m])", "legendFormat": "Data Sent (B/s)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "rate(k6_data_received_total{testid=~\"$testid\", test_case=~\"$test_case\"}[1m])", "legendFormat": "Data Received (B/s)", "refId": "B", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } }
-      ],
-      "title": "Data Throughput (bytes/sec)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 10 } ] }, "unit": "ops" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 6, "x": 18, "y": 53 },
-      "id": 23,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [ { "expr": "rate(k6_dropped_iterations_total{testid=~\"$testid\", test_case=~\"$test_case\"}[1m])", "legendFormat": "Dropped Iterations (1m rate)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } } ],
-      "title": "Dropped Iterations (rate)",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 5 } ] }, "unit": "s" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 61 },
-      "id": 24,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        { "expr": "k6_iteration_duration_p95{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p95)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_iteration_duration_p99{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (p99)", "refId": "B", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_iteration_duration_min{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (min)", "refId": "C", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } },
-        { "expr": "k6_iteration_duration_max{testid=~\"$testid\", test_case=~\"$test_case\"}", "legendFormat": "{{scenario}} (max)", "refId": "D", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } }
-      ],
-      "title": "Iteration Duration",
-      "type": "timeseries"
-    },
-    {
-      "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-      "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": 0 }, { "color": "red", "value": 200 } ] }, "unit": "ops" }, "overrides": [] },
-      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 61 },
-      "id": 25,
-      "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } },
-      "pluginVersion": "12.1.0",
-      "targets": [ { "expr": "rate(k6_iterations_total{testid=~\"$testid\", test_case=~\"$test_case\"}[1m])", "legendFormat": "Iterations (1m rate)", "refId": "A", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" } } ],
-      "title": "Iterations Rate",
-      "type": "timeseries"
-    },
-    {
-      "datasource": {
-        "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
-      },
-      "fieldConfig": {
-        "defaults": {
-          "color": { "mode": "palette-classic" },
-          "custom": {
-            "axisBorderShow": false,
-            "axisCenteredZero": false,
-            "axisColorMode": "text",
-            "axisLabel": "",
-            "axisPlacement": "auto",
-            "barAlignment": 0,
-            "barWidthFactor": 0.6,
-            "drawStyle": "line",
-            "fillOpacity": 10,
-            "gradientMode": "none",
-            "hideFrom": { "legend": false, "tooltip": false, "vis": false, "viz": false },
-            "insertNulls": false,
-            "lineInterpolation": "linear",
-            "lineWidth": 1,
-            "pointSize": 5,
-            "scaleDistribution": { "type": "linear" },
-            "showPoints": "never",
-            "spanNulls": false,
-            "stacking": { "group": "A", "mode": "none" },
-            "thresholdsStyle": { "mode": "off" }
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              { "color": "green", "value": 0 },
-              { "color": "red", "value": 0.05 }
-            ]
-          },
-          "unit": "percentunit"
-        },
-        "overrides": []
-      },
-      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 69 },
-      "id": 26,
-      "options": {
-        "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true },
-        "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" }
-      },
-      "pluginVersion": "12.1.0",
-      "targets": [
-        {
-          "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" },
-          "expr": "k6_http_req_failed_rate{testid=~\"$testid\", test_case=~\"$test_case\"}",
-          "legendFormat": "Failed Rate - {{scenario}}",
-          "refId": "A"
-        }
-      ],
-      "title": "HTTP Failed Rate (built-in)",
-      "type": "timeseries"
-    }
-  ],
-  "preload": false,
-  "refresh": "5s",
-  "schemaVersion": 41,
-  "tags": [
-    "k6",
-    "load-testing",
-    "llm",
-    "jan-server"
-  ],
-  "templating": {
-    "list": [
-      {
-        "current": {
-          "text": "Mimir-Prod",
-          "value": "mimir-prod"
-        },
-        "label": "Datasource",
-        "name": "DS_PROMETHEUS",
-        "options": [],
-        "query": "prometheus",
-        "refresh": 1,
-        "regex": "",
-        "type": "datasource"
-      },
-      {
-        "current": {
-          "text": [
-            "All"
-          ],
-          "value": [
-            "$__all"
-          ]
-        },
-        "datasource": {
-          "type": "prometheus",
-          "uid": "${DS_PROMETHEUS}"
-        },
-        "definition": "label_values(k6_http_reqs_total, testid)",
-        "includeAll": true,
-        "label": "Test ID",
-        "multi": true,
-        "name": "testid",
-        "options": [],
-        "query": {
-          "query": "label_values(k6_http_reqs_total, testid)",
-          "refId": "StandardVariableQuery"
-        },
-        "refresh": 1,
-        "regex": "",
-        "sort": 1,
-        "type": "query"
-      },
-      {
-        "current": {
-          "text": [
-            "All"
-          ],
-          "value": [
-            "$__all"
-          ]
-        },
-        "datasource": {
-          "type": "prometheus",
-          "uid": "${DS_PROMETHEUS}"
-        },
-        "definition": "label_values(k6_http_reqs_total, test_case)",
-        "includeAll": true,
-        "label": "Test Case",
-        "multi": true,
-        "name": "test_case",
-        "options": [],
-        "query": {
-          "query": "label_values(k6_http_reqs_total, test_case)",
-          "refId": "StandardVariableQuery"
-        },
-        "refresh": 1,
-        "regex": "",
-        "type": "query"
-      }
-    ]
-  },
-  "time": {
-    "from": "now-1h",
-    "to": "now"
-  },
-  "timepicker": {},
-  "timezone": "",
-  "title": "Jan Server Load Test - k6 & LLM Metrics",
-  "uid": "lkkajsdfq234124asd",
-  "version": 2
-}
\ No newline at end of file
diff --git a/tests/grafana/grafana-provisioning/datasources/prometheus.yml b/tests/grafana/grafana-provisioning/datasources/prometheus.yml
deleted file mode 100644
index 1a57b69c..00000000
--- a/tests/grafana/grafana-provisioning/datasources/prometheus.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-apiVersion: 1
-
-datasources:
-  - name: Prometheus
-    type: prometheus
-    access: proxy
-    url: http://prometheus:9090
-    isDefault: true
-    editable: true
diff --git a/tests/grafana/prometheus.yml b/tests/grafana/prometheus.yml
deleted file mode 100644
index cc563d47..00000000
--- a/tests/grafana/prometheus.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-global:
-  scrape_interval: 15s
-  evaluation_interval: 15s
-
-rule_files:
-  # - "first_rules.yml"
-  # - "second_rules.yml"
-
-scrape_configs:
-  - job_name: 'prometheus'
-    static_configs:
-      - targets: ['localhost:9090']
diff --git a/tests/run-loadtest.bat b/tests/run-loadtest.bat
deleted file mode 100644
index 735844d8..00000000
--- a/tests/run-loadtest.bat
+++ /dev/null
@@ -1,218 +0,0 @@
-@echo off
-setlocal enabledelayedexpansion
-
-REM Load Test Runner Script for Jan Server (Windows Batch Version)
-REM Usage: run-loadtest.bat [test_case_name]
-REM Examples:
-REM   run-loadtest.bat                    # Run all test cases
-REM   run-loadtest.bat health-check      # Run only health-check test
-REM   run-loadtest.bat --list            # Show available test cases
-
-echo Jan Server Load Test Runner
-echo ============================
-
-REM Load environment variables from .env file if it exists
-if exist ".env" (
-    echo Loading environment from .env file...
-    for /f "usebackq tokens=1,2 delims==" %%a in (".env") do (
-        if not "%%a"=="" if not "%%a:~0,1%"=="#" (
-            set "%%a=%%b"
-        )
-    )
-)
-
-REM Default configuration
-set "DEFAULT_BASE_URL=https://api-dev.jan.ai"
-set "DEFAULT_MODEL=jan-v1-4b"
-set "DEFAULT_DURATION_MIN=5"
-set "DEFAULT_NONSTREAM_RPS=2"
-set "DEFAULT_STREAM_RPS=1"
-
-REM Environment variables (can be overridden)
-if not defined BASE set "BASE=%DEFAULT_BASE_URL%"
-if not defined MODEL set "MODEL=%DEFAULT_MODEL%"
-if not defined DURATION_MIN set "DURATION_MIN=%DEFAULT_DURATION_MIN%"
-if not defined NONSTREAM_RPS set "NONSTREAM_RPS=%DEFAULT_NONSTREAM_RPS%"
-if not defined STREAM_RPS set "STREAM_RPS=%DEFAULT_STREAM_RPS%"
-if not defined LOADTEST_TOKEN set "LOADTEST_TOKEN="
-if not defined API_KEY set "API_KEY="
-
-REM Set k6 executable path
-set "K6_EXE=k6"
-
-REM Validate environment
-if "%BASE%"=="" (
-    echo [ERROR] BASE URL is required
-    exit /b 1
-)
-
-if "%API_KEY%"=="" if "%LOADTEST_TOKEN%"=="" (
-    echo [WARNING] Neither API_KEY nor LOADTEST_TOKEN is set. Test might fail.
-)
-
-REM Handle different arguments
-set "TEST_CASE=%~1"
-
-if "%TEST_CASE%"=="--list" goto :list_test_cases
-if "%TEST_CASE%"=="-l" goto :list_test_cases
-if "%TEST_CASE%"=="--help" goto :list_test_cases
-if "%TEST_CASE%"=="-h" goto :list_test_cases
-
-REM Get available test cases
-set "TEST_CASES="
-if not exist "src" (
-    echo [ERROR] Source directory not found: src
-    exit /b 1
-)
-
-for %%f in (src\*.js) do (
-    set "filename=%%~nf"
-    set "TEST_CASES=!TEST_CASES! !filename!"
-)
-
-if "%TEST_CASE%"=="" (
-    REM No argument provided - run all test cases
-    echo [INFO] No specific test case provided, running all test cases...
-    goto :run_all_tests
-)
-
-REM Specific test case provided
-echo [INFO] Running specific test case: %TEST_CASE%
-goto :run_single_test
-
-:list_test_cases
-echo [INFO] Available test cases:
-for %%f in (src\*.js) do (
-    echo   - %%~nf (src\%%~nf.js)
-)
-echo.
-echo Usage:
-echo   %~nx0                    # Run all test cases
-echo   %~nx0 [test_case_name]   # Run specific test case
-echo.
-echo Examples:
-echo   %~nx0                    # Run all tests
-echo   %~nx0 health-check       # Run only health-check test
-echo   %~nx0 completion-flow    # Run full completion API flow test
-echo   %~nx0 chat-completion    # Run chat completion test
-echo   %~nx0 --list             # Show this help
-goto :end
-
-:run_all_tests
-echo [INFO] Running all test cases
-echo ====================================================
-
-set "FAILED_TESTS="
-set "TOTAL_TESTS=0"
-
-for %%t in (%TEST_CASES%) do (
-    set /a TOTAL_TESTS+=1
-    echo.
-    echo [INFO] 📋 Running test case: %%t
-    echo [INFO] ----------------------------------------------------
-    
-    call :run_test "%%t"
-    if errorlevel 1 (
-        echo [ERROR] ❌ Test case '%%t' failed
-        set "FAILED_TESTS=!FAILED_TESTS! %%t"
-    ) else (
-        echo [SUCCESS] ✅ Test case '%%t' completed successfully
-    )
-    
-    REM Add a delay between tests
-    if !TOTAL_TESTS! gtr 1 (
-        echo [INFO] Waiting 10 seconds before next test...
-        timeout /t 10 /nobreak >nul
-    )
-)
-
-REM Summary
-echo.
-echo ====================================================
-echo [INFO] 📊 TEST EXECUTION SUMMARY
-echo ====================================================
-echo [INFO] Total tests: %TOTAL_TESTS%
-
-set "FAILED_COUNT=0"
-for %%t in (%FAILED_TESTS%) do set /a FAILED_COUNT+=1
-set /a PASSED_COUNT=%TOTAL_TESTS%-%FAILED_COUNT%
-
-echo [INFO] Passed: %PASSED_COUNT%
-echo [INFO] Failed: %FAILED_COUNT%
-
-if %FAILED_COUNT%==0 (
-    echo [SUCCESS] 🎉 All tests passed!
-    exit /b 0
-) else (
-    echo [ERROR] 💥 Failed tests:%FAILED_TESTS%
-    exit /b 1
-)
-goto :end
-
-:run_single_test
-call :run_test "%TEST_CASE%"
-exit /b %errorlevel%
-
-:run_test
-set "TEST_CASE=%~1"
-set "TIMESTAMP=%date:~-4,4%%date:~-10,2%%date:~-7,2%_%time:~0,2%%time:~3,2%%time:~6,2%"
-set "TIMESTAMP=%TIMESTAMP: =0%"
-set "RESULTS_DIR=results"
-set "OUTPUT_FILE=%RESULTS_DIR%\%TEST_CASE%_%TIMESTAMP%.json"
-set "TEST_FILE=src\%TEST_CASE%.js"
-
-REM Check if test file exists
-if not exist "%TEST_FILE%" (
-    echo [ERROR] Test file not found: %TEST_FILE%
-    echo [INFO] Available test cases:
-    for %%f in (src\*.js) do (
-        echo   - %%~nf
-    )
-    exit /b 1
-)
-
-REM Create results directory if it doesn't exist
-if not exist "%RESULTS_DIR%" mkdir "%RESULTS_DIR%"
-
-echo [INFO] Running test case: %TEST_CASE%
-echo [INFO] Test file: %TEST_FILE%
-echo [INFO] Configuration:
-echo [INFO]   Base URL: %BASE%
-echo [INFO]   Model: %MODEL%
-echo [INFO]   Duration: %DURATION_MIN% minutes
-echo [INFO]   Non-stream RPS: %NONSTREAM_RPS%
-echo [INFO]   Stream RPS: %STREAM_RPS%
-echo [INFO]   Output: %OUTPUT_FILE%
-
-REM Generate unique test ID for metrics segmentation
-set "TEST_ID=%TEST_CASE%_%TIMESTAMP%_%RANDOM%"
-echo [INFO] Test ID: %TEST_ID%
-
-REM Execute k6
-echo [INFO] Running k6 test...
-"%K6_EXE%" run ^
-    --summary-export="%OUTPUT_FILE%" ^
-    --out json="%OUTPUT_FILE%" ^
-    --tag testid="%TEST_ID%" ^
-    --tag test_case="%TEST_CASE%" ^
-    --tag environment="%BASE%" ^
-    "%TEST_FILE%"
-
-REM Check if test completed successfully
-if errorlevel 1 (
-    echo [ERROR] Test case '%TEST_CASE%' failed
-    exit /b 1
-) else (
-    echo [SUCCESS] Test case '%TEST_CASE%' completed successfully
-    
-    REM Show results file location
-    if exist "%OUTPUT_FILE%" (
-        echo [INFO] Test Results Summary:
-        echo ==================== METRICS SUMMARY ====================
-        echo [INFO] Results saved to: %OUTPUT_FILE%
-        echo ==========================================================
-    )
-    exit /b 0
-)
-
-:end
\ No newline at end of file
diff --git a/tests/run-loadtest.sh b/tests/run-loadtest.sh
deleted file mode 100755
index 7b397bf6..00000000
--- a/tests/run-loadtest.sh
+++ /dev/null
@@ -1,352 +0,0 @@
-#!/bin/bash
-
-# Load Test Runner Script for Jan Server
-# Usage: ./run-loadtest.sh [test_case_name]
-
-set -e
-
-# Load environment variables from .env file if it exists
-SCRIPT_DIR=$(dirname "$0")
-if [[ -f "$SCRIPT_DIR/.env" ]]; then
-    echo "Loading environment from .env file..."
-    set -a  # automatically export all variables
-    source "$SCRIPT_DIR/.env"
-    set +a
-fi
-
-# Default configuration
-DEFAULT_BASE_URL="https://api-stag.jan.ai"
-DEFAULT_MODEL="jan-v1-4b"
-DEFAULT_DURATION_MIN=5
-DEFAULT_NONSTREAM_RPS=2
-DEFAULT_STREAM_RPS=1
-
-# Environment variables (can be overridden)
-export BASE="${BASE:-$DEFAULT_BASE_URL}"
-export MODEL="${MODEL:-$DEFAULT_MODEL}"
-export DURATION_MIN="${DURATION_MIN:-$DEFAULT_DURATION_MIN}"
-export NONSTREAM_RPS="${NONSTREAM_RPS:-$DEFAULT_NONSTREAM_RPS}"
-export STREAM_RPS="${STREAM_RPS:-$DEFAULT_STREAM_RPS}"
-export DEBUG="${DEBUG:-false}"
-export SINGLE_RUN="${SINGLE_RUN:-false}"
-
-# Cloudflare load test token (required for API access)
-export LOADTEST_TOKEN="${LOADTEST_TOKEN:-}"
-
-# Guest authentication - no API keys needed
-# Tests automatically use guest login
-
-# Prometheus remote write configuration (following k6 docs)
-export K6_PROMETHEUS_RW_SERVER_URL="${K6_PROMETHEUS_RW_SERVER_URL:-}"
-export K6_PROMETHEUS_RW_USERNAME="${K6_PROMETHEUS_RW_USERNAME:-}"
-export K6_PROMETHEUS_RW_PASSWORD="${K6_PROMETHEUS_RW_PASSWORD:-}"
-export K6_PROMETHEUS_RW_TREND_STATS="${K6_PROMETHEUS_RW_TREND_STATS:-p(95),p(99),min,max}"
-export K6_PROMETHEUS_RW_PUSH_INTERVAL="${K6_PROMETHEUS_RW_PUSH_INTERVAL:-5s}"
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Function to print colored output
-log_info() {
-    echo -e "${BLUE}[INFO]${NC} $1"
-}
-
-log_success() {
-    echo -e "${GREEN}[SUCCESS]${NC} $1"
-}
-
-log_warning() {
-    echo -e "${YELLOW}[WARNING]${NC} $1"
-}
-
-log_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-# Function to check if k6 is installed
-check_k6() {
-    if ! command -v k6 &> /dev/null; then
-        log_error "k6 is not installed. Please install k6 first."
-        echo "macOS: brew install k6"
-        echo "Ubuntu/Debian: See README.md for installation instructions"
-        exit 1
-    fi
-}
-
-# Function to validate environment
-validate_env() {
-    if [[ -z "$BASE" ]]; then
-        log_error "BASE URL is required"
-        exit 1
-    fi
-    
-    # Check for Cloudflare load test token
-    if [[ -z "$LOADTEST_TOKEN" ]]; then
-        log_warning "LOADTEST_TOKEN is not set - this may be required for Cloudflare API access"
-        log_info "Set LOADTEST_TOKEN environment variable or add it to .env file"
-    else
-        log_info "Cloudflare load test token configured: [CONFIGURED]"
-    fi
-    
-    # Guest authentication - no API keys needed
-    log_info "Using guest authentication (no API keys required)"
-    
-    # Validate Prometheus endpoint format if provided
-    if [[ -n "$K6_PROMETHEUS_RW_SERVER_URL" ]]; then
-        if [[ ! "$K6_PROMETHEUS_RW_SERVER_URL" =~ ^https?:// ]]; then
-            log_error "K6_PROMETHEUS_RW_SERVER_URL must start with http:// or https://"
-            exit 1
-        fi
-        log_info "Prometheus remote write endpoint configured"
-    fi
-}
-
-# Function to get all available test cases by scanning src folder
-get_available_test_cases() {
-    local script_dir=$(dirname "$0")
-    local src_dir="$script_dir/src"
-    
-    if [[ ! -d "$src_dir" ]]; then
-        log_error "Source directory not found: $src_dir"
-        return 1
-    fi
-    
-    # Find all .js files in src directory and extract base names
-    find "$src_dir" -name "*.js" -type f | while read -r file; do
-        basename "$file" .js
-    done | sort
-}
-
-# Function to run all test cases
-run_all_test_cases() {
-    local available_tests=($(get_available_test_cases))
-    local failed_tests=()
-    local total_tests=${#available_tests[@]}
-    
-    log_info "Running all test cases (${total_tests} total)"
-    log_info "===================================================="
-    
-    for test_case in "${available_tests[@]}"; do
-        log_info ""
-        log_info "📋 Running test case: $test_case"
-        log_info "----------------------------------------------------"
-        
-        if run_single_test_case "$test_case"; then
-            log_success "✅ Test case '$test_case' completed successfully"
-        else
-            log_error "❌ Test case '$test_case' failed"
-            failed_tests+=("$test_case")
-        fi
-        
-        # Add a delay between tests to avoid overwhelming the system
-        if [[ ${#available_tests[@]} -gt 1 ]]; then
-            log_info "Waiting 10 seconds before next test..."
-            sleep 10
-        fi
-    done
-    
-    # Summary
-    log_info ""
-    log_info "===================================================="
-    log_info "📊 TEST EXECUTION SUMMARY"
-    log_info "===================================================="
-    log_info "Total tests: $total_tests"
-    log_info "Passed: $((total_tests - ${#failed_tests[@]}))"
-    log_info "Failed: ${#failed_tests[@]}"
-    
-    if [[ ${#failed_tests[@]} -eq 0 ]]; then
-        log_success "🎉 All tests passed!"
-        return 0
-    else
-        log_error "💥 Failed tests: ${failed_tests[*]}"
-        return 1
-    fi
-}
-
-# Function to run a specific test case (renamed from run_test_case)
-run_single_test_case() {
-    local test_case="$1"
-    local script_dir=$(dirname "$0")
-    local timestamp=$(date +"%Y%m%d_%H%M%S")
-    local results_dir="$script_dir/results"
-    local output_file="$results_dir/${test_case}_${timestamp}.json"
-    local test_file="$script_dir/src/${test_case}.js"
-    
-    # Check if test file exists
-    if [[ ! -f "$test_file" ]]; then
-        log_error "Test file not found: $test_file"
-        log_info "Available test cases:"
-        local available_tests=($(get_available_test_cases))
-        for available_test in "${available_tests[@]}"; do
-            log_info "  - $available_test"
-        done
-        return 1
-    fi
-    
-    # Create results directory if it doesn't exist
-    mkdir -p "$results_dir"
-    
-    log_info "Running test case: $test_case"
-    log_info "Test file: $test_file"
-    log_info "Running command: k6 run $test_file"
-    log_info "Configuration:"
-    log_info "  Base URL: $BASE"
-    log_info "  Model: $MODEL"
-    log_info "  Duration: ${DURATION_MIN} minutes"
-    log_info "  Non-stream RPS: $NONSTREAM_RPS"
-    log_info "  Stream RPS: $STREAM_RPS"
-    log_info "  Debug Mode: $DEBUG"
-    log_info "  Single Run: $SINGLE_RUN"
-    if [[ -n "$LOADTEST_TOKEN" ]]; then
-        log_info "  Load Test Token: [CONFIGURED]"
-    else
-        log_info "  Load Test Token: [NOT SET]"
-    fi
-    log_info "  Output: $output_file"
-    
-    # Generate unique test ID for metrics segmentation
-    local test_id="${test_case}_$(date +%Y%m%d_%H%M%S)_$$"
-    log_info "Test ID: $test_id"
-    
-    # Execute k6 with conditional Prometheus output
-    if [[ -n "$K6_PROMETHEUS_RW_SERVER_URL" ]]; then
-        log_info "Prometheus remote write endpoint configured: [CONFIGURED]"
-        
-        # Validate that it's not localhost in CI environment
-        if [[ "$K6_PROMETHEUS_RW_SERVER_URL" == *"localhost"* ]] || [[ "$K6_PROMETHEUS_RW_SERVER_URL" == *"127.0.0.1"* ]]; then
-            log_warning "Prometheus endpoint appears to be localhost - this will not work in CI environment!"
-        fi
-        
-        # Set optional k6 environment variables for Prometheus remote write
-        if [[ -n "$K6_PROMETHEUS_RW_USERNAME" ]]; then
-            log_info "Using Prometheus with basic auth (username: [CONFIGURED])"
-            export K6_PROMETHEUS_RW_USERNAME
-        fi
-        
-        if [[ -n "$K6_PROMETHEUS_RW_PASSWORD" ]]; then
-            export K6_PROMETHEUS_RW_PASSWORD
-        fi
-        
-        export K6_PROMETHEUS_RW_TREND_STATS
-        export K6_PROMETHEUS_RW_PUSH_INTERVAL
-        
-        # Run k6 with Prometheus remote write output
-        log_info "Running with Prometheus remote write metrics export"
-        log_info "Trend stats: $K6_PROMETHEUS_RW_TREND_STATS"
-        log_info "Push interval: $K6_PROMETHEUS_RW_PUSH_INTERVAL"
-        
-        k6 run \
-            --summary-export="$output_file" \
-            --out json="$output_file" \
-            --out experimental-prometheus-rw \
-            --tag testid="$test_id" \
-            --tag test_case="$test_case" \
-            --tag environment="${BASE##*/}" \
-            "$test_file"
-    else
-        log_info "No Prometheus endpoint configured, running without metrics export"
-        k6 run \
-            --summary-export="$output_file" \
-            --out json="$output_file" \
-            --tag testid="$test_id" \
-            --tag test_case="$test_case" \
-            --tag environment="${BASE##*/}" \
-            "$test_file"
-    fi
-    
-    # Check if test completed successfully
-    if [[ $? -eq 0 ]]; then
-        log_success "Test case '$test_case' completed successfully"
-        
-        # Extract and display key metrics
-        if [[ -f "$output_file" ]]; then
-            log_info "Test Results Summary:"
-            
-            # Parse JSON output for key metrics (requires jq)
-            if command -v jq &> /dev/null; then
-                echo "==================== METRICS SUMMARY ===================="
-                jq -r '.metrics | to_entries[] | select(.key | contains("completion_") or contains("conversation_") or contains("response_") or contains("guest_") or contains("refresh_")) | "\(.key): \(.value.avg // .value.count)"' "$output_file" 2>/dev/null || true
-                echo "=========================================================="
-            fi
-            
-            if [[ -n "$K6_PROMETHEUS_RW_SERVER_URL" ]]; then
-                log_success "Metrics sent to Prometheus directly via k6"
-            fi
-        fi
-        return 0
-    else
-        log_error "Test case '$test_case' failed"
-        return 1
-    fi
-}
-
-# Function to list available test cases
-list_test_cases() {
-    local available_tests=($(get_available_test_cases))
-    
-    if [[ ${#available_tests[@]} -eq 0 ]]; then
-        log_warning "No test cases found in src/ directory"
-        log_info "Create .js files in src/ directory to add test cases"
-        return 1
-    fi
-    
-    log_info "Available test cases (${#available_tests[@]} total):"
-    for test_case in "${available_tests[@]}"; do
-        local test_file="src/${test_case}.js"
-        if [[ -f "$test_file" ]]; then
-            log_info "  - $test_case (src/${test_case}.js)"
-        else
-            log_warning "  - $test_case (file missing: $test_file)"
-        fi
-    done
-    log_info ""
-    log_info "Usage:"
-    log_info "  $0                    # Run all test cases"
-    log_info "  $0 [test_case_name]   # Run specific test case"
-    log_info ""
-    log_info "Examples:"
-    log_info "  $0                                    # Run all tests"
-    log_info "  $0 test-completion-standard          # Run only standard completion test"
-    log_info "  $0 test-completion-conversation      # Run only conversation test"
-    log_info "  $0 test-responses                     # Run only response API test"
-    log_info "  $0 --list                             # Show this help"
-}
-
-# Main execution
-main() {
-    local test_case="$1"
-    
-    log_info "Jan Server Load Test Runner"
-    log_info "============================"
-    
-    # Check prerequisites
-    check_k6
-    validate_env
-    
-    # Handle different arguments
-    case "$test_case" in
-        "--list"|"-l"|"--help"|"-h")
-            list_test_cases
-            exit 0
-            ;;
-        "")
-            # No argument provided - run all test cases
-            log_info "No specific test case provided, running all test cases..."
-            run_all_test_cases
-            exit $?
-            ;;
-        *)
-            # Specific test case provided
-            log_info "Running specific test case: $test_case"
-            run_single_test_case "$test_case"
-            exit $?
-            ;;
-    esac
-}
-
-# Execute main function with all arguments
-main "$@"
diff --git a/tests/run-test-with-monitoring.bat b/tests/run-test-with-monitoring.bat
deleted file mode 100644
index d931b0d5..00000000
--- a/tests/run-test-with-monitoring.bat
+++ /dev/null
@@ -1,62 +0,0 @@
-@echo off
-REM Test Script with Grafana Monitoring
-REM This script runs a K6 test and sends metrics to Grafana
-
-echo ========================================
-echo   K6 Test with Grafana Monitoring
-echo ========================================
-echo.
-
-REM Check if monitoring stack is running
-curl -s http://localhost:9090/api/v1/query?query=up >nul 2>&1
-if %errorlevel% neq 0 (
-    echo ❌ Prometheus is not running. Please start the monitoring stack first:
-    echo    .\setup-monitoring.bat
-    echo    or
-    echo    docker-compose -f grafana\docker-compose.yml up -d
-    pause
-    exit /b 1
-)
-
-echo ✅ Prometheus is running
-echo.
-
-REM Set environment variables for metrics
-set K6_PROMETHEUS_RW_SERVER_URL=http://localhost:9090/api/v1/write
-set K6_PROMETHEUS_RW_TREND_STATS=p(95),p(99),min,max
-set K6_PROMETHEUS_RW_PUSH_INTERVAL=5s
-
-echo 📊 Metrics will be sent to: %K6_PROMETHEUS_RW_SERVER_URL%
-echo.
-
-REM Get test case from command line or use default
-if "%1"=="" (
-    set TEST_CASE=test-completion-standard
-) else (
-    set TEST_CASE=%1
-)
-
-echo 🧪 Running test: %TEST_CASE%
-echo.
-
-REM Run the test
-if exist ".\run-loadtest.bat" (
-    .\run-loadtest.bat %TEST_CASE%
-) else (
-    echo ❌ run-loadtest.bat not found. Running k6 directly...
-    k6 run --out experimental-prometheus-rw "src\%TEST_CASE%.js"
-)
-
-echo.
-echo ✅ Test completed!
-echo.
-echo 📈 View results in Grafana:
-echo    http://localhost:3000
-echo    Username: admin
-echo    Password: admin
-echo.
-echo 🔍 Check Prometheus metrics:
-echo    http://localhost:9090
-echo.
-
-pause
diff --git a/tests/run-test-with-monitoring.sh b/tests/run-test-with-monitoring.sh
deleted file mode 100644
index af4d4e76..00000000
--- a/tests/run-test-with-monitoring.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash
-
-# Test Script with Grafana Monitoring
-# This script runs a K6 test and sends metrics to Grafana
-
-set -euo pipefail
-
-echo "========================================"
-echo "  K6 Test with Grafana Monitoring"
-echo "========================================"
-echo
-
-# Check if monitoring stack is running
-if ! curl -s http://localhost:9090/api/v1/query?query=up >/dev/null 2>&1; then
-    echo "❌ Prometheus is not running. Please start the monitoring stack first:"
-    echo "   ./setup-monitoring.sh"
-    echo "   or"
-    echo "   docker-compose -f grafana/docker-compose.yml up -d"
-    exit 1
-fi
-
-echo "✅ Prometheus is running"
-echo
-
-# Set environment variables for metrics
-export K6_PROMETHEUS_RW_SERVER_URL="http://localhost:9090/api/v1/write"
-export K6_PROMETHEUS_RW_TREND_STATS="p(95),p(99),min,max"
-export K6_PROMETHEUS_RW_PUSH_INTERVAL="5s"
-
-echo "📊 Metrics will be sent to: $K6_PROMETHEUS_RW_SERVER_URL"
-echo
-
-# Get test case from command line or use default
-TEST_CASE="${1:-test-completion-standard}"
-
-echo "🧪 Running test: $TEST_CASE"
-echo
-
-# Run the test
-if [ -f "./run-loadtest.sh" ]; then
-    ./run-loadtest.sh "$TEST_CASE"
-else
-    echo "❌ run-loadtest.sh not found. Running k6 directly..."
-    k6 run --out experimental-prometheus-rw "src/$TEST_CASE.js"
-fi
-
-echo
-echo "✅ Test completed!"
-echo
-echo "📈 View results in Grafana:"
-echo "   http://localhost:3000"
-echo "   Username: admin"
-echo "   Password: admin"
-echo
-echo "🔍 Check Prometheus metrics:"
-echo "   http://localhost:9090"
-echo
diff --git a/tests/setup-monitoring.bat b/tests/setup-monitoring.bat
deleted file mode 100644
index 627b22e0..00000000
--- a/tests/setup-monitoring.bat
+++ /dev/null
@@ -1,67 +0,0 @@
-@echo off
-REM Grafana Monitoring Setup Script for K6 Tests
-REM This script sets up Grafana and Prometheus for monitoring K6 test results
-
-echo ========================================
-echo   K6 Test Monitoring Setup
-echo ========================================
-echo.
-
-REM Check if Docker is running
-docker info >nul 2>&1
-if %errorlevel% neq 0 (
-    echo ❌ Docker is not running. Please start Docker and try again.
-    pause
-    exit /b 1
-)
-
-REM Check if docker-compose is available
-docker-compose --version >nul 2>&1
-if %errorlevel% neq 0 (
-    echo ❌ docker-compose is not installed. Please install docker-compose and try again.
-    pause
-    exit /b 1
-)
-
-echo ✅ Docker and docker-compose are available
-echo.
-
-REM Start the monitoring stack
-echo 🚀 Starting Grafana and Prometheus...
-docker-compose -f grafana\docker-compose.yml up -d
-
-echo.
-echo ⏳ Waiting for services to start...
-timeout /t 10 /nobreak >nul
-
-REM Check if services are running
-docker-compose -f grafana\docker-compose.yml ps | findstr "Up" >nul
-if %errorlevel% equ 0 (
-    echo ✅ Services started successfully!
-    echo.
-    echo 📊 Access your monitoring dashboard:
-    echo    Grafana:    http://localhost:3000
-    echo    Prometheus: http://localhost:9090
-    echo.
-    echo 🔐 Grafana credentials:
-    echo    Username: admin
-    echo    Password: admin
-    echo.
-    echo 🧪 To run tests with metrics:
-    echo    set K6_PROMETHEUS_RW_SERVER_URL=http://localhost:9090/api/v1/write
-    echo    .\run-loadtest.bat test-completion-standard
-    echo    or
-    echo    .\run-test-with-monitoring.bat test-completion-standard
-    echo.
-    echo 📈 The K6 dashboard will be automatically loaded in Grafana
-    echo.
-    echo 🌐 Opening Grafana in your browser...
-    start http://localhost:3000
-) else (
-    echo ❌ Failed to start services. Check the logs:
-    echo    docker-compose -f grafana\docker-compose.yml logs
-    pause
-    exit /b 1
-)
-
-pause
diff --git a/tests/setup-monitoring.sh b/tests/setup-monitoring.sh
deleted file mode 100644
index 2f5ecbd0..00000000
--- a/tests/setup-monitoring.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-
-# Grafana Monitoring Setup Script for K6 Tests
-# This script sets up Grafana and Prometheus for monitoring K6 test results
-
-set -euo pipefail
-
-echo "========================================"
-echo "  K6 Test Monitoring Setup"
-echo "========================================"
-echo
-
-# Check if Docker is running
-if ! docker info >/dev/null 2>&1; then
-    echo "❌ Docker is not running. Please start Docker and try again."
-    exit 1
-fi
-
-# Check if docker-compose is available
-if ! command -v docker-compose >/dev/null 2>&1; then
-    echo "❌ docker-compose is not installed. Please install docker-compose and try again."
-    exit 1
-fi
-
-echo "✅ Docker and docker-compose are available"
-echo
-
-# Start the monitoring stack
-echo "🚀 Starting Grafana and Prometheus..."
-docker-compose -f grafana/docker-compose.yml up -d
-
-echo
-echo "⏳ Waiting for services to start..."
-sleep 10
-
-# Check if services are running
-if docker-compose -f grafana/docker-compose.yml ps | grep -q "Up"; then
-    echo "✅ Services started successfully!"
-    echo
-    echo "📊 Access your monitoring dashboard:"
-    echo "   Grafana:    http://localhost:3000"
-    echo "   Prometheus: http://localhost:9090"
-    echo
-    echo "🔐 Grafana credentials:"
-    echo "   Username: admin"
-    echo "   Password: admin"
-    echo
-    echo "🧪 To run tests with metrics:"
-    echo "   export K6_PROMETHEUS_RW_SERVER_URL=\"http://localhost:9090/api/v1/write\""
-    echo "   ./run-loadtest.sh test-completion-standard"
-    echo "   or"
-    echo "   ./run-test-with-monitoring.sh test-completion-standard"
-    echo
-    echo "📈 The K6 dashboard will be automatically loaded in Grafana"
-    echo
-else
-    echo "❌ Failed to start services. Check the logs:"
-    echo "   docker-compose -f grafana/docker-compose.yml logs"
-    exit 1
-fi
diff --git a/tests/src/health-check.js.example b/tests/src/health-check.js.example
deleted file mode 100644
index f70f60b4..00000000
--- a/tests/src/health-check.js.example
+++ /dev/null
@@ -1,71 +0,0 @@
-import http from 'k6/http';
-import { check, sleep } from 'k6';
-import { Trend, Counter } from 'k6/metrics';
-
-// ====== Config via ENV (with defaults) ======
-const BASE = __ENV.BASE || 'https://api-dev.jan.ai';
-const DURATION_MINUTES = Number(__ENV.DURATION_MIN || 2);
-const TARGET_RPS = Number(__ENV.HEALTH_RPS || 5);
-const API_KEY = __ENV.API_KEY || '';
-const LOADTEST_TOKEN = __ENV.LOADTEST_TOKEN || '';
-
-// ====== Common headers ======
-function buildHeaders(extra = {}) {
-  const h = { 'Content-Type': 'application/json', ...extra };
-  if (API_KEY) h['Authorization'] = `Bearer ${API_KEY}`;
-  if (LOADTEST_TOKEN) h['x-loadtest-token'] = LOADTEST_TOKEN;
-  return h;
-}
-
-// ====== Custom metrics ======
-const healthResponseTime = new Trend('health_response_time_ms', true);
-const healthErrors = new Counter('health_errors');
-
-// ====== Scenarios ======
-const minutes = (n) => `${n}m`;
-
-export const options = {
-  scenarios: {
-    health_check: {
-      executor: 'ramping-arrival-rate',
-      startRate: 1,
-      timeUnit: '1s',
-      preAllocatedVUs: 10,
-      maxVUs: 50,
-      stages: [
-        { duration: minutes(1), target: TARGET_RPS },
-        { duration: minutes(DURATION_MINUTES - 2 > 0 ? DURATION_MINUTES - 2 : 1), target: TARGET_RPS },
-        { duration: minutes(1), target: 0 },
-      ],
-      exec: 'checkHealth',
-      tags: { scenario: 'health_check' },
-    },
-  },
-  thresholds: {
-    'http_req_failed': ['rate<0.01'],
-    'health_response_time_ms': ['p(95)<500'],
-  },
-  discardResponseBodies: false,
-};
-
-// ====== Exec functions ======
-export function checkHealth() {
-  // Health check endpoint
-  let res = http.get(`${BASE}/health`, { headers: buildHeaders() });
-  
-  const status = String(res.status || 0);
-  const tags = { scenario: 'health_check', status };
-  
-  healthResponseTime.add(res.timings.duration, tags);
-  
-  const ok = check(res, {
-    'status 200': (r) => r.status === 200,
-    'response time < 1000ms': (r) => r.timings.duration < 1000,
-  });
-
-  if (!ok) {
-    healthErrors.add(1);
-  }
-  
-  sleep(1);
-}
diff --git a/tests/src/test-completion-conversation.js b/tests/src/test-completion-conversation.js
deleted file mode 100644
index ca87c055..00000000
--- a/tests/src/test-completion-conversation.js
+++ /dev/null
@@ -1,731 +0,0 @@
-import http from 'k6/http';
-import { check, sleep } from 'k6';
-import { Trend, Counter } from 'k6/metrics';
-
-// ====== Config via ENV (with defaults) ======
-const BASE = __ENV.BASE || 'https://api-dev.jan.ai';
-const MODEL = __ENV.MODEL || 'jan-v1-4b';
-const DEBUG = __ENV.DEBUG === 'true' || __ENV.DEBUG === '1';
-const API_KEY = __ENV.API_KEY || '';
-const LOADTEST_TOKEN = __ENV.LOADTEST_TOKEN || '';
-
-// ====== Global state ======
-let accessToken = '';
-let refreshToken = '';
-let conversationId = '';
-
-// ====== Common headers ======
-function buildHeaders(extra = {}) {
-  const h = { 'Content-Type': 'application/json' };
-  for (const key in extra) {
-    if (extra.hasOwnProperty(key)) {
-      h[key] = extra[key];
-    }
-  }
-  if (API_KEY) h['Authorization'] = `Bearer ${API_KEY}`;
-  if (LOADTEST_TOKEN) h['x-loadtest-token'] = LOADTEST_TOKEN;
-  if (accessToken) h['Authorization'] = `Bearer ${accessToken}`;
-  return h;
-}
-
-// ====== Test Configuration ======
-const TEST_ID = `test-completion-conversation-${Date.now()}`;
-const TEST_CASE = 'completion-conversation';
-
-// ====== Custom metrics ======
-const guestLoginTime = new Trend('guest_login_time_ms', true);
-const refreshTokenTime = new Trend('refresh_token_time_ms', true);
-const conversationTime = new Trend('conversation_time_ms', true);
-const completionTime = new Trend('completion_time_ms', true);
-const listConversationsTime = new Trend('list_conversations_time_ms', true);
-const conversationItemsTime = new Trend('conversation_items_time_ms', true);
-const errors = new Counter('conversation_errors');
-const successes = new Counter('conversation_successes');
-
-// ====== LLM-specific metrics ======
-const ttfb = new Trend('llm_ttfb_ms', true);
-const recvTime = new Trend('llm_receiving_ms', true);
-const totalDur = new Trend('llm_total_ms', true);
-const queueDur = new Trend('llm_queue_ms', true);
-// tokens per second is NOT a time metric; don't mark as time
-const tokRate = new Trend('llm_tokens_per_sec');
-const llmErrors = new Counter('llm_errors');
-
-// ====== Helper functions ======
-// helper: record timings with tags (scenario + status + promptType)
-function recordTimings(res, scenario, promptType) {
-  const status = String(res.status || 0);
-  const tags = { scenario, status, prompt: promptType };
-
-  ttfb.add(res.timings.waiting, tags);
-  recvTime.add(res.timings.receiving, tags);
-  totalDur.add(res.timings.duration, tags);
-
-  // custom: queue time header (ms)
-  const q = res.headers['X-Queue-Time'];
-  if (q) {
-    const val = parseFloat(q);
-    if (!isNaN(val)) queueDur.add(val, tags);
-  }
-
-  // custom: tokens/sec if usage present
-  if (status === '200') {
-    try {
-      const j = res.json();
-      const comp = j.usage?.completion_tokens || 0;
-      if (comp > 0) {
-        tokRate.add(comp / (res.timings.duration / 1000), tags);
-      }
-    } catch {}
-  }
-}
-export const options = {
-  iterations: 1,
-  vus: 1,
-  thresholds: {
-    'http_req_failed': ['rate<0.05'],
-    'guest_login_time_ms': ['p(95)<2000'],
-    'refresh_token_time_ms': ['p(95)<2000'],
-    'conversation_time_ms': ['p(95)<3000'],
-    'completion_time_ms': ['p(95)<10000'],
-    'list_conversations_time_ms': ['p(95)<3000'],
-    'conversation_items_time_ms': ['p(95)<3000'],
-  },
-  discardResponseBodies: false,
-  tags: {
-    testid: TEST_ID,
-    test_case: TEST_CASE,
-  },
-};
-
-// ====== Debug Functions ======
-function debugLog(message, data = null) {
-  if (DEBUG) {
-    console.log(`[DEBUG] ${message}`);
-    if (data) {
-      console.log(`[DEBUG] Data:`, JSON.stringify(data, null, 2));
-    }
-  }
-}
-
-function debugRequest(method, url, headers, body) {
-  if (DEBUG) {
-    console.log(`[DEBUG] ====== REQUEST ======`);
-    console.log(`[DEBUG] Method: ${method}`);
-    console.log(`[DEBUG] URL: ${url}`);
-    console.log(`[DEBUG] Headers:`, JSON.stringify(headers, null, 2));
-    if (body) {
-      console.log(`[DEBUG] Body:`, JSON.stringify(JSON.parse(body), null, 2));
-    }
-    console.log(`[DEBUG] ====================`);
-  }
-}
-
-function debugResponse(response) {
-  if (DEBUG) {
-    console.log(`[DEBUG] ====== RESPONSE ======`);
-    console.log(`[DEBUG] Status: ${response.status}`);
-    console.log(`[DEBUG] Headers:`, JSON.stringify(response.headers, null, 2));
-    console.log(`[DEBUG] Body:`, response.body);
-    console.log(`[DEBUG] =====================`);
-  }
-}
-
-
-// ====== Test Functions ======
-function guestLogin() {
-  console.log('[GUEST LOGIN] Starting guest login...');
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify({});
-  const url = `${BASE}/v1/auth/guest-login`;
-  
-  debugRequest('POST', url, headers, body);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  guestLoginTime.add(duration);
-  
-  const ok = check(res, {
-    'guest login status 200': (r) => r.status === 200,
-    'guest login has access_token': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.access_token && body.access_token.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      accessToken = body.access_token;
-      
-      // Extract refresh token from Set-Cookie header
-      const setCookieHeader = res.headers['Set-Cookie'];
-      if (setCookieHeader) {
-        const refreshTokenMatch = setCookieHeader.match(/jan_refresh_token=([^;]+)/);
-        if (refreshTokenMatch) {
-          refreshToken = refreshTokenMatch[1];
-          console.log(`[GUEST LOGIN] ✅ Success! Token: ${accessToken.substring(0, 20)}...`);
-          console.log(`[GUEST LOGIN] ✅ Refresh token extracted`);
-        } else {
-          console.log('[GUEST LOGIN] ⚠️ No refresh token found in cookies');
-        }
-      }
-      
-      return true;
-    } catch (e) {
-      console.log('[GUEST LOGIN] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[GUEST LOGIN] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function refreshAccessToken() {
-  if (!refreshToken) {
-    console.log('[REFRESH TOKEN] ⚠️ No refresh token available, skipping');
-    return false;
-  }
-  
-  console.log('[REFRESH TOKEN] Refreshing access token...');
-  
-  const headers = {
-    'Content-Type': 'application/json',
-    'Cookie': `jan_refresh_token=${refreshToken}`,
-    'Authorization': `Bearer ${accessToken}`
-  };
-  
-  const url = `${BASE}/v1/auth/refresh-token`;
-  
-  debugRequest('GET', url, headers);
-  
-  const startTime = Date.now();
-  const res = http.get(url, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  refreshTokenTime.add(duration);
-  
-  const ok = check(res, {
-    'refresh token status 200': (r) => r.status === 200,
-    'refresh token has access_token': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.access_token && body.access_token.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      accessToken = body.access_token;
-      
-      // Update refresh token from new Set-Cookie header
-      const setCookieHeader = res.headers['Set-Cookie'];
-      if (setCookieHeader) {
-        const refreshTokenMatch = setCookieHeader.match(/jan_refresh_token=([^;]+)/);
-        if (refreshTokenMatch) {
-          refreshToken = refreshTokenMatch[1];
-        }
-      }
-      
-      console.log(`[REFRESH TOKEN] ✅ Success! New token: ${accessToken.substring(0, 20)}...`);
-      console.log(`[REFRESH TOKEN] ✅ Expires in: ${body.expires_in} seconds`);
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[REFRESH TOKEN] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[REFRESH TOKEN] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function createConversation() {
-  console.log('[CREATE CONV] Creating new conversation...');
-  
-  const payload = {
-    title: "Test Conversation - K6 Load Test"
-  };
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify(payload);
-  const url = `${BASE}/v1/conversations`;
-  
-  debugRequest('POST', url, headers, body);
-  debugLog('Payload details:', payload);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  conversationTime.add(duration);
-  
-  const ok = check(res, {
-    'create conversation status 2xx': (r) => r.status >= 200 && r.status < 300,
-    'create conversation has id': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.id && body.id.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      conversationId = body.id;
-      console.log(`[CREATE CONV] ✅ Success! Status: ${res.status}, Conversation ID: ${conversationId}`);
-      console.log(`[CREATE CONV] ✅ Title: ${body.title}`);
-      console.log(`[CREATE CONV] ✅ Object: ${body.object}`);
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[CREATE CONV] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log(`[CREATE CONV] ❌ Failed! Status: ${res.status}, Body: ${res.body}`);
-    errors.add(1);
-    return false;
-  }
-}
-
-function addMessageToConversation(message, isFirstMessage = false) {
-  console.log(`[ADD MESSAGE] Adding non-streaming message to conversation...`);
-  
-  const payload = {
-    model: MODEL,
-    messages: [
-      { role: 'user', content: message }
-    ],
-    temperature: 0.7,
-    max_tokens: 150,
-    stream: false,
-    conversation: conversationId,
-    store: true
-  };
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify(payload);
-  const url = `${BASE}/v1/conv/chat/completions`;
-  
-  debugRequest('POST', url, headers, body);
-  debugLog('Payload details:', payload);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  completionTime.add(duration);
-  
-  // Record LLM-specific timings
-  recordTimings(res, 'conversation_nonstream', 'standard');
-  
-  const ok = check(res, {
-    'conversation completion status 200': (r) => r.status === 200,
-    'conversation completion has choices': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.choices && body.choices.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      const content = body.choices[0].message.content;
-      console.log(`[ADD MESSAGE] ✅ Success! Response ID: ${body.id}`);
-      console.log(`[ADD MESSAGE] ✅ Content: ${content.substring(0, 80)}...`);
-      if (isFirstMessage) {
-        console.log(`[ADD MESSAGE] ✅ First message in conversation ${conversationId}`);
-      }
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[ADD MESSAGE] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[ADD MESSAGE] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function addStreamingMessageToConversation(message, isFirstMessage = false) {
-  console.log(`[ADD STREAMING MESSAGE] Adding streaming message to conversation...`);
-  
-  const payload = {
-    model: MODEL,
-    messages: [
-      { role: 'user', content: message }
-    ],
-    temperature: 0.7,
-    max_tokens: 150,
-    stream: true,
-    conversation: conversationId,
-    store: true
-  };
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify(payload);
-  const url = `${BASE}/v1/conv/chat/completions`;
-  
-  debugRequest('POST', url, headers, body);
-  debugLog('Payload details:', payload);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  completionTime.add(duration);
-  
-  // Record LLM-specific timings
-  recordTimings(res, 'conversation_stream', 'standard');
-  
-  const ok = check(res, {
-    'streaming completion status 200': (r) => r.status === 200,
-    'streaming completion has content': (r) => r.body && r.body.length > 0,
-    'streaming completion is event-stream': (r) => r.headers['Content-Type'] && r.headers['Content-Type'].includes('text/event-stream')
-  });
-  
-  if (ok) {
-    const lines = res.body.split('\n');
-    let chunkCount = 0;
-    let hasContent = false;
-    let hasDone = false;
-    let responseId = '';
-    
-    for (let i = 0; i < lines.length; i++) {
-      const line = lines[i].trim();
-      if (line.startsWith('data: ')) {
-        if (line.includes('[DONE]')) {
-          hasDone = true;
-          console.log(`[ADD STREAMING MESSAGE] ✅ Received completion signal: data: [DONE]`);
-        } else {
-          try {
-            chunkCount++;
-            const data = JSON.parse(line.substring(6));
-            if (data.id && !responseId) {
-              responseId = data.id;
-            }
-            if (data.choices && data.choices[0] && data.choices[0].delta && data.choices[0].delta.content) {
-              hasContent = true;
-            }
-          } catch (e) {
-            // Ignore parsing errors for non-JSON lines
-          }
-        }
-      }
-    }
-    
-    console.log(`[ADD STREAMING MESSAGE] ✅ Success! Response ID: ${responseId}`);
-    console.log(`[ADD STREAMING MESSAGE] ✅ Received ${chunkCount} chunks`);
-    console.log(`[ADD STREAMING MESSAGE] ✅ Has content: ${hasContent}`);
-    console.log(`[ADD STREAMING MESSAGE] ✅ Stream completed: ${hasDone}`);
-    
-    if (!hasDone) {
-      console.log('[ADD STREAMING MESSAGE] ⚠️ Warning: No [DONE] signal received');
-    }
-    
-    if (isFirstMessage) {
-      console.log(`[ADD STREAMING MESSAGE] ✅ First streaming message in conversation ${conversationId}`);
-    }
-    
-    successes.add(1);
-    return true;
-  } else {
-    console.log('[ADD STREAMING MESSAGE] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function getConversation() {
-  if (!conversationId) {
-    console.log('[GET CONV] ⚠️ No conversation ID available, skipping');
-    return false;
-  }
-  
-  console.log('[GET CONV] Loading conversation details...');
-  
-  const headers = buildHeaders();
-  const url = `${BASE}/v1/conversations/${conversationId}`;
-  
-  debugRequest('GET', url, headers);
-  
-  const startTime = Date.now();
-  const res = http.get(url, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  conversationTime.add(duration);
-  
-  const ok = check(res, {
-    'get conversation status 200': (r) => r.status === 200,
-    'get conversation has data': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.id && body.title;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      console.log(`[GET CONV] ✅ Success! ID: ${body.id}`);
-      console.log(`[GET CONV] ✅ Title: ${body.title}`);
-      console.log(`[GET CONV] ✅ Object: ${body.object}`);
-      console.log(`[GET CONV] ✅ Created: ${body.created_at}`);
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[GET CONV] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[GET CONV] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function listConversations() {
-  console.log('[LIST CONV] Loading conversations list...');
-  
-  const headers = buildHeaders();
-  const url = `${BASE}/v1/conversations`;
-  
-  debugRequest('GET', url, headers);
-  
-  const startTime = Date.now();
-  const res = http.get(url, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  listConversationsTime.add(duration);
-  
-  const ok = check(res, {
-    'list conversations status 200': (r) => r.status === 200,
-    'list conversations has data': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.object === 'list' && body.data && Array.isArray(body.data);
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      console.log(`[LIST CONV] ✅ Success! Object: ${body.object}`);
-      console.log(`[LIST CONV] ✅ Total conversations: ${body.data.length}`);
-      console.log(`[LIST CONV] ✅ Has more: ${body.has_more}`);
-      
-      // Check if our conversation is in the list
-      const ourConv = body.data.find(conv => conv.id === conversationId);
-      if (ourConv) {
-        console.log(`[LIST CONV] ✅ Found our conversation: ${ourConv.title}`);
-      }
-      
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[LIST CONV] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[LIST CONV] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function getConversationItems() {
-  if (!conversationId) {
-    console.log('[GET ITEMS] ⚠️ No conversation ID available, skipping');
-    return false;
-  }
-  
-  console.log('[GET ITEMS] Loading conversation items...');
-  
-  const headers = buildHeaders();
-  const url = `${BASE}/v1/conversations/${conversationId}/items`;
-  
-  debugRequest('GET', url, headers);
-  
-  const startTime = Date.now();
-  const res = http.get(url, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  conversationItemsTime.add(duration);
-  
-  const ok = check(res, {
-    'get items status 200': (r) => r.status === 200,
-    'get items has data': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.data && Array.isArray(body.data);
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      console.log(`[GET ITEMS] ✅ Success! Object: ${body.object}`);
-      console.log(`[GET ITEMS] ✅ Items count: ${body.data.length}`);
-      console.log(`[GET ITEMS] ✅ Total: ${body.total}`);
-      console.log(`[GET ITEMS] ✅ Has more: ${body.has_more}`);
-      
-      if (body.data.length > 0) {
-        console.log('[GET ITEMS] ✅ Items:');
-        body.data.forEach((item, index) => {
-          console.log(`[GET ITEMS]   ${index + 1}. ID: ${item.id}, Role: ${item.role}, Type: ${item.type}`);
-          if (item.content && item.content.length > 0 && item.content[0].text) {
-            const content = item.content[0].text.value;
-            const preview = content.length > 50 ? content.substring(0, 50) + '...' : content;
-            console.log(`[GET ITEMS]      Content: ${preview}`);
-          }
-        });
-      }
-      
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[GET ITEMS] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[GET ITEMS] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-// ====== Main Test Function ======
-export default function() {
-  console.log('\n========================================');
-  console.log('  CONVERSATION COMPLETION TESTS');
-  console.log('========================================');
-  console.log(`Base URL: ${BASE}`);
-  console.log(`Model: ${MODEL}`);
-  console.log(`Debug Mode: ${DEBUG ? 'ENABLED' : 'DISABLED'}`);
-  console.log('');
-  
-  // Step 1: Guest Login
-  console.log('[1/8] Guest Login');
-  if (!guestLogin()) {
-    console.log('❌ Guest login failed, aborting test');
-    return;
-  }
-  sleep(1);
-  
-  // Step 2: Refresh Token
-  console.log('\n[2/8] Refresh Token');
-  refreshAccessToken();
-  sleep(1);
-  
-  // Step 3: Create Conversation
-  console.log('\n[3/8] Create Conversation');
-  if (!createConversation()) {
-    console.log('❌ Create conversation failed, aborting test');
-    return;
-  }
-  sleep(1);
-  
-  // Step 4: Add First Message (Non-Streaming)
-  console.log('\n[4/9] Add First Message to Conversation (Non-Streaming)');
-  refreshAccessToken(); // Refresh before completion
-  addMessageToConversation('Hello! What is artificial intelligence?', true);
-  sleep(1);
-  
-  // Step 5: Add Second Message (Streaming)
-  console.log('\n[5/9] Add Second Message to Conversation (Streaming)');
-  refreshAccessToken(); // Refresh before completion
-  addStreamingMessageToConversation('Can you explain machine learning in simple terms?');
-  sleep(1);
-  
-  // Step 6: Get Conversation Details
-  console.log('\n[6/9] Get Conversation Details');
-  getConversation();
-  sleep(1);
-  
-  // Step 7: List All Conversations
-  console.log('\n[7/9] List All Conversations');
-  listConversations();
-  sleep(1);
-  
-  // Step 8: Get Conversation Items
-  console.log('\n[8/9] Get Conversation Items');
-  getConversationItems();
-  
-  // Summary
-  console.log('\n========================================');
-  console.log('           TEST SUMMARY');
-  console.log('========================================');
-  console.log('✅ Guest authentication');
-  console.log('✅ Token refresh');
-  console.log('✅ Conversation creation');
-  console.log('✅ Non-streaming message exchange');
-  console.log('✅ Streaming message exchange');
-  console.log('✅ Conversation retrieval');
-  console.log('✅ Conversation listing');
-  console.log('✅ Item retrieval');
-  console.log('');
-  console.log('Conversation completion tests completed!');
-  console.log(`Conversation ID: ${conversationId}`);
-  console.log('========================================\n');
-  
-  sleep(2);
-}
diff --git a/tests/src/test-completion-standard.js b/tests/src/test-completion-standard.js
deleted file mode 100644
index a0eee982..00000000
--- a/tests/src/test-completion-standard.js
+++ /dev/null
@@ -1,525 +0,0 @@
-import http from 'k6/http';
-import { check, sleep } from 'k6';
-import { Trend, Counter } from 'k6/metrics';
-
-// ====== Config via ENV (with defaults) ======
-const BASE = __ENV.BASE || 'https://api-dev.jan.ai';
-const MODEL = __ENV.MODEL || 'jan-v1-4b';
-const DEBUG = __ENV.DEBUG === 'true' || __ENV.DEBUG === '1';
-const API_KEY = __ENV.API_KEY || '';
-const LOADTEST_TOKEN = __ENV.LOADTEST_TOKEN || '';
-
-// ====== Global state ======
-let accessToken = '';
-let refreshToken = '';
-
-// ====== Common headers ======
-function buildHeaders(extra = {}) {
-  const h = { 'Content-Type': 'application/json' };
-  for (const key in extra) {
-    if (extra.hasOwnProperty(key)) {
-      h[key] = extra[key];
-    }
-  }
-  if (API_KEY) h['Authorization'] = `Bearer ${API_KEY}`;
-  if (LOADTEST_TOKEN) h['x-loadtest-token'] = LOADTEST_TOKEN;
-  if (accessToken) h['Authorization'] = `Bearer ${accessToken}`;
-  return h;
-}
-
-// ====== Test Configuration ======
-const TEST_ID = `test-completion-standard-${Date.now()}`;
-const TEST_CASE = 'completion-standard';
-
-// ====== Custom metrics ======
-const guestLoginTime = new Trend('guest_login_time_ms', true);
-const refreshTokenTime = new Trend('refresh_token_time_ms', true);
-const modelsTime = new Trend('models_time_ms', true);
-const completionTime = new Trend('completion_time_ms', true);
-const streamingTime = new Trend('streaming_time_ms', true);
-const errors = new Counter('completion_errors');
-const successes = new Counter('completion_successes');
-
-// ====== LLM-specific metrics ======
-const ttfb = new Trend('llm_ttfb_ms', true);
-const recvTime = new Trend('llm_receiving_ms', true);
-const totalDur = new Trend('llm_total_ms', true);
-const queueDur = new Trend('llm_queue_ms', true);
-// tokens per second is NOT a time metric; don't mark as time
-const tokRate = new Trend('llm_tokens_per_sec');
-const llmErrors = new Counter('llm_errors');
-
-// ====== Helper functions ======
-// helper: record timings with tags (scenario + status + promptType)
-function recordTimings(res, scenario, promptType) {
-  const status = String(res.status || 0);
-  const tags = { scenario, status, prompt: promptType };
-
-  ttfb.add(res.timings.waiting, tags);
-  recvTime.add(res.timings.receiving, tags);
-  totalDur.add(res.timings.duration, tags);
-
-  // custom: queue time header (ms)
-  const q = res.headers['X-Queue-Time'];
-  if (q) {
-    const val = parseFloat(q);
-    if (!isNaN(val)) queueDur.add(val, tags);
-  }
-
-  // custom: tokens/sec if usage present
-  if (status === '200') {
-    try {
-      const j = res.json();
-      const comp = j.usage?.completion_tokens || 0;
-      if (comp > 0) {
-        tokRate.add(comp / (res.timings.duration / 1000), tags);
-      }
-    } catch {}
-  }
-}
-
-// ====== Options ======
-export const options = {
-  iterations: 1,
-  vus: 1,
-  thresholds: {
-    'http_req_failed': ['rate<0.05'],
-    'guest_login_time_ms': ['p(95)<2000'],
-    'refresh_token_time_ms': ['p(95)<2000'],
-    'models_time_ms': ['p(95)<2000'],
-    'completion_time_ms': ['p(95)<10000'],
-    'streaming_time_ms': ['p(95)<15000'],
-  },
-  discardResponseBodies: false,
-  tags: {
-    testid: TEST_ID,
-    test_case: TEST_CASE,
-  },
-};
-
-// ====== Debug Functions ======
-function debugLog(message, data = null) {
-  if (DEBUG) {
-    console.log(`[DEBUG] ${message}`);
-    if (data) {
-      console.log(`[DEBUG] Data:`, JSON.stringify(data, null, 2));
-    }
-  }
-}
-
-function debugRequest(method, url, headers, body) {
-  if (DEBUG) {
-    console.log(`[DEBUG] ====== REQUEST ======`);
-    console.log(`[DEBUG] Method: ${method}`);
-    console.log(`[DEBUG] URL: ${url}`);
-    console.log(`[DEBUG] Headers:`, JSON.stringify(headers, null, 2));
-    if (body) {
-      console.log(`[DEBUG] Body:`, JSON.stringify(JSON.parse(body), null, 2));
-    }
-    console.log(`[DEBUG] ====================`);
-  }
-}
-
-function debugResponse(response) {
-  if (DEBUG) {
-    console.log(`[DEBUG] ====== RESPONSE ======`);
-    console.log(`[DEBUG] Status: ${response.status}`);
-    console.log(`[DEBUG] Headers:`, JSON.stringify(response.headers, null, 2));
-    console.log(`[DEBUG] Body:`, response.body);
-    console.log(`[DEBUG] =====================`);
-  }
-}
-
-
-// ====== Test Functions ======
-function guestLogin() {
-  console.log('[GUEST LOGIN] Starting guest login...');
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify({});
-  const url = `${BASE}/v1/auth/guest-login`;
-  
-  debugRequest('POST', url, headers, body);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  guestLoginTime.add(duration);
-  
-  const ok = check(res, {
-    'guest login status 200': (r) => r.status === 200,
-    'guest login has access_token': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.access_token && body.access_token.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      accessToken = body.access_token;
-      
-      // Extract refresh token from Set-Cookie header
-      const setCookieHeader = res.headers['Set-Cookie'];
-      if (setCookieHeader) {
-        const refreshTokenMatch = setCookieHeader.match(/jan_refresh_token=([^;]+)/);
-        if (refreshTokenMatch) {
-          refreshToken = refreshTokenMatch[1];
-          console.log(`[GUEST LOGIN] ✅ Success! Token: ${accessToken.substring(0, 20)}...`);
-          console.log(`[GUEST LOGIN] ✅ Refresh token extracted`);
-        } else {
-          console.log('[GUEST LOGIN] ⚠️ No refresh token found in cookies');
-        }
-      }
-      
-      return true;
-    } catch (e) {
-      console.log('[GUEST LOGIN] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[GUEST LOGIN] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function refreshAccessToken() {
-  if (!refreshToken) {
-    console.log('[REFRESH TOKEN] ⚠️ No refresh token available, skipping');
-    return false;
-  }
-  
-  console.log('[REFRESH TOKEN] Refreshing access token...');
-  
-  const headers = {
-    'Content-Type': 'application/json',
-    'Cookie': `jan_refresh_token=${refreshToken}`,
-    'Authorization': `Bearer ${accessToken}`
-  };
-  
-  const url = `${BASE}/v1/auth/refresh-token`;
-  
-  debugRequest('GET', url, headers);
-  
-  const startTime = Date.now();
-  const res = http.get(url, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  refreshTokenTime.add(duration);
-  
-  const ok = check(res, {
-    'refresh token status 200': (r) => r.status === 200,
-    'refresh token has access_token': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.access_token && body.access_token.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      accessToken = body.access_token;
-      
-      // Update refresh token from new Set-Cookie header
-      const setCookieHeader = res.headers['Set-Cookie'];
-      if (setCookieHeader) {
-        const refreshTokenMatch = setCookieHeader.match(/jan_refresh_token=([^;]+)/);
-        if (refreshTokenMatch) {
-          refreshToken = refreshTokenMatch[1];
-        }
-      }
-      
-      console.log(`[REFRESH TOKEN] ✅ Success! New token: ${accessToken.substring(0, 20)}...`);
-      console.log(`[REFRESH TOKEN] ✅ Expires in: ${body.expires_in} seconds`);
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[REFRESH TOKEN] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[REFRESH TOKEN] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function testModels() {
-  console.log('[MODELS] Testing models endpoint...');
-  
-  const headers = buildHeaders();
-  const url = `${BASE}/v1/models`;
-  
-  debugRequest('GET', url, headers);
-  
-  const startTime = Date.now();
-  const res = http.get(url, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  modelsTime.add(duration);
-  
-  const ok = check(res, {
-    'models status 200': (r) => r.status === 200,
-    'models has data': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.object === 'list' && body.data && Array.isArray(body.data);
-      } catch (e) {
-        return false;
-      }
-    },
-    'models includes target model': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.data.some(model => model.id === MODEL);
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      console.log(`[MODELS] ✅ Success! Found ${body.data.length} models`);
-      console.log(`[MODELS] ✅ Target model ${MODEL} is available`);
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[MODELS] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[MODELS] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function testNonStreamingCompletion() {
-  console.log('[NON-STREAMING] Testing standard completion...');
-  
-  const payload = {
-    model: MODEL,
-    messages: [
-      { role: 'user', content: 'Hello! Tell me a short interesting fact about artificial intelligence.' }
-    ],
-    temperature: 0.7,
-    max_tokens: 150,
-    stream: false
-  };
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify(payload);
-  const url = `${BASE}/v1/chat/completions`;
-  
-  debugRequest('POST', url, headers, body);
-  debugLog('Payload details:', payload);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  completionTime.add(duration);
-  
-  // Record LLM-specific timings
-  recordTimings(res, 'completion_nonstream', 'standard');
-  
-  const ok = check(res, {
-    'completion status 200': (r) => r.status === 200,
-    'completion has choices': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.choices && body.choices.length > 0;
-      } catch (e) {
-        return false;
-      }
-    },
-    'completion has content': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.choices[0].message && body.choices[0].message.content;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      const content = body.choices[0].message.content;
-      console.log(`[NON-STREAMING] ✅ Success! Response ID: ${body.id}`);
-      console.log(`[NON-STREAMING] ✅ Content: ${content.substring(0, 100)}...`);
-      console.log(`[NON-STREAMING] ✅ Usage: ${body.usage.total_tokens} tokens`);
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[NON-STREAMING] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[NON-STREAMING] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function testStreamingCompletion() {
-  console.log('[STREAMING] Testing streaming completion...');
-  
-  const payload = {
-    model: MODEL,
-    messages: [
-      { role: 'user', content: 'Write a short poem about technology in exactly 4 lines.' }
-    ],
-    temperature: 0.8,
-    max_tokens: 100,
-    stream: true
-  };
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify(payload);
-  const url = `${BASE}/v1/chat/completions`;
-  
-  debugRequest('POST', url, headers, body);
-  debugLog('Payload details:', payload);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  streamingTime.add(duration);
-  
-  // Record LLM-specific timings
-  recordTimings(res, 'completion_stream', 'standard');
-  
-  const ok = check(res, {
-    'streaming status 200': (r) => r.status === 200,
-    'streaming has content': (r) => r.body && r.body.length > 0,
-    'streaming is event-stream': (r) => r.headers['Content-Type'] && r.headers['Content-Type'].includes('text/event-stream')
-  });
-  
-  if (ok) {
-    const lines = res.body.split('\n');
-    let chunkCount = 0;
-    let hasContent = false;
-    let hasDone = false;
-    
-    for (let i = 0; i < lines.length; i++) {
-      const line = lines[i].trim();
-      if (line.startsWith('data: ')) {
-        if (line.includes('[DONE]')) {
-          hasDone = true;
-          console.log(`[STREAMING] ✅ Received completion signal: data: [DONE]`);
-        } else {
-          try {
-            chunkCount++;
-            const data = JSON.parse(line.substring(6));
-            if (data.choices && data.choices[0].delta && data.choices[0].delta.content) {
-              hasContent = true;
-            }
-          } catch (e) {
-            // Ignore parsing errors for non-JSON lines
-          }
-        }
-      }
-    }
-    
-    console.log(`[STREAMING] ✅ Success! Received ${chunkCount} chunks`);
-    console.log(`[STREAMING] ✅ Has content: ${hasContent}`);
-    console.log(`[STREAMING] ✅ Stream completed: ${hasDone}`);
-    
-    if (!hasDone) {
-      console.log('[STREAMING] ⚠️ Warning: No [DONE] signal received');
-    }
-    
-    successes.add(1);
-    return true;
-  } else {
-    console.log('[STREAMING] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-// ====== Main Test Function ======
-export default function() {
-  console.log('\n========================================');
-  console.log('  STANDARD COMPLETION TESTS');
-  console.log('========================================');
-  console.log(`Base URL: ${BASE}`);
-  console.log(`Model: ${MODEL}`);
-  console.log(`Debug Mode: ${DEBUG ? 'ENABLED' : 'DISABLED'}`);
-  console.log('');
-  
-  // Step 1: Guest Login
-  console.log('[1/5] Guest Login');
-  if (!guestLogin()) {
-    console.log('❌ Guest login failed, aborting test');
-    return;
-  }
-  sleep(1);
-  
-  // Step 2: Refresh Token
-  console.log('\n[2/5] Refresh Token');
-  refreshAccessToken();
-  sleep(1);
-  
-  // Step 3: Test Models
-  console.log('\n[3/5] Test Models Endpoint');
-  testModels();
-  sleep(1);
-  
-  // Step 4: Non-Streaming Completion
-  console.log('\n[4/5] Non-Streaming Completion');
-  refreshAccessToken(); // Refresh before completion
-  testNonStreamingCompletion();
-  sleep(1);
-  
-  // Step 5: Streaming Completion
-  console.log('\n[5/5] Streaming Completion');
-  refreshAccessToken(); // Refresh before completion
-  testStreamingCompletion();
-  
-  // Summary
-  console.log('\n========================================');
-  console.log('           TEST SUMMARY');
-  console.log('========================================');
-  console.log('✅ Guest authentication');
-  console.log('✅ Token refresh');
-  console.log('✅ Models endpoint');
-  console.log('✅ Non-streaming completions');
-  console.log('✅ Streaming completions');
-  console.log('');
-  console.log('Standard completion tests completed!');
-  console.log('========================================\n');
-  
-  sleep(2);
-}
diff --git a/tests/src/test-responses.js b/tests/src/test-responses.js
deleted file mode 100644
index 884e1e8b..00000000
--- a/tests/src/test-responses.js
+++ /dev/null
@@ -1,695 +0,0 @@
-import http from 'k6/http';
-import { check, sleep } from 'k6';
-import { Trend, Counter } from 'k6/metrics';
-
-// ====== Config via ENV (with defaults) ======
-const BASE = __ENV.BASE || 'https://api-dev.jan.ai';
-const MODEL = __ENV.MODEL || 'jan-v1-4b';
-const DEBUG = __ENV.DEBUG === 'true' || __ENV.DEBUG === '1';
-const API_KEY = __ENV.API_KEY || '';
-const LOADTEST_TOKEN = __ENV.LOADTEST_TOKEN || '';
-
-// ====== Global state ======
-let accessToken = '';
-let refreshToken = '';
-
-// ====== Common headers ======
-function buildHeaders(extra = {}) {
-  const h = { 'Content-Type': 'application/json' };
-  for (const key in extra) {
-    if (extra.hasOwnProperty(key)) {
-      h[key] = extra[key];
-    }
-  }
-  if (API_KEY) h['Authorization'] = `Bearer ${API_KEY}`;
-  if (LOADTEST_TOKEN) h['x-loadtest-token'] = LOADTEST_TOKEN;
-  if (accessToken) h['Authorization'] = `Bearer ${accessToken}`;
-  return h;
-}
-
-// ====== Test Configuration ======
-const TEST_ID = `test-responses-${Date.now()}`;
-const TEST_CASE = 'responses';
-
-// ====== Custom metrics ======
-const guestLoginTime = new Trend('guest_login_time_ms', true);
-const refreshTokenTime = new Trend('refresh_token_time_ms', true);
-const responseTime = new Trend('response_time_ms', true);
-const responseStreamTime = new Trend('response_stream_time_ms', true);
-const responseTimeWithTools = new Trend('response_time_with_tools_ms', true);
-const responseStreamTimeWithTools = new Trend('response_stream_time_with_tools_ms', true);
-const errors = new Counter('response_errors');
-const successes = new Counter('response_successes');
-
-// ====== LLM-specific metrics ======
-const ttfb = new Trend('llm_ttfb_ms', true);
-const recvTime = new Trend('llm_receiving_ms', true);
-const totalDur = new Trend('llm_total_ms', true);
-const queueDur = new Trend('llm_queue_ms', true);
-// tokens per second is NOT a time metric; don't mark as time
-const tokRate = new Trend('llm_tokens_per_sec');
-const llmErrors = new Counter('llm_errors');
-
-// ====== Helper functions ======
-// helper: record timings with tags (scenario + status + promptType)
-function recordTimings(res, scenario, promptType) {
-  const status = String(res.status || 0);
-  const tags = { scenario, status, prompt: promptType };
-
-  ttfb.add(res.timings.waiting, tags);
-  recvTime.add(res.timings.receiving, tags);
-  totalDur.add(res.timings.duration, tags);
-
-  // custom: queue time header (ms)
-  const q = res.headers['X-Queue-Time'];
-  if (q) {
-    const val = parseFloat(q);
-    if (!isNaN(val)) queueDur.add(val, tags);
-  }
-
-  // custom: tokens/sec if usage present
-  if (status === '200') {
-    try {
-      const j = res.json();
-      const comp = j.usage?.completion_tokens || 0;
-      if (comp > 0) {
-        tokRate.add(comp / (res.timings.duration / 1000), tags);
-      }
-    } catch {}
-  }
-}
-export const options = {
-  iterations: 1,
-  vus: 1,
-  thresholds: {
-    'http_req_failed': ['rate<0.05'],
-    'guest_login_time_ms': ['p(95)<2000'],
-    'refresh_token_time_ms': ['p(95)<2000'],
-    'response_time_ms': ['p(95)<60000'],
-    'response_stream_time_ms': ['p(95)<60000'],
-    'response_time_with_tools_ms': ['p(95)<300000'],
-    'response_stream_time_with_tools_ms': ['p(95)<300000'],
-  },
-  discardResponseBodies: false,
-  tags: {
-    testid: TEST_ID,
-    test_case: TEST_CASE,
-  },
-};
-
-// ====== Debug Functions ======
-function debugLog(message, data = null) {
-  if (DEBUG) {
-    console.log(`[DEBUG] ${message}`);
-    if (data) {
-      console.log(`[DEBUG] Data:`, JSON.stringify(data, null, 2));
-    }
-  }
-}
-
-function debugRequest(method, url, headers, body) {
-  if (DEBUG) {
-    console.log(`[DEBUG] ====== REQUEST ======`);
-    console.log(`[DEBUG] Method: ${method}`);
-    console.log(`[DEBUG] URL: ${url}`);
-    console.log(`[DEBUG] Headers:`, JSON.stringify(headers, null, 2));
-    if (body) {
-      console.log(`[DEBUG] Body:`, JSON.stringify(JSON.parse(body), null, 2));
-    }
-    console.log(`[DEBUG] ====================`);
-  }
-}
-
-function debugResponse(response) {
-  if (DEBUG) {
-    console.log(`[DEBUG] ====== RESPONSE ======`);
-    console.log(`[DEBUG] Status: ${response.status}`);
-    console.log(`[DEBUG] Headers:`, JSON.stringify(response.headers, null, 2));
-    console.log(`[DEBUG] Body:`, response.body);
-    console.log(`[DEBUG] =====================`);
-  }
-}
-
-
-// ====== Test Functions ======
-function guestLogin() {
-  console.log('[GUEST LOGIN] Starting guest login...');
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify({});
-  const url = `${BASE}/v1/auth/guest-login`;
-  
-  debugRequest('POST', url, headers, body);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  guestLoginTime.add(duration);
-  
-  const ok = check(res, {
-    'guest login status 200': (r) => r.status === 200,
-    'guest login has access_token': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.access_token && body.access_token.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      accessToken = body.access_token;
-      
-      // Extract refresh token from Set-Cookie header
-      const setCookieHeader = res.headers['Set-Cookie'];
-      if (setCookieHeader) {
-        const refreshTokenMatch = setCookieHeader.match(/jan_refresh_token=([^;]+)/);
-        if (refreshTokenMatch) {
-          refreshToken = refreshTokenMatch[1];
-          console.log(`[GUEST LOGIN] ✅ Success! Token: ${accessToken.substring(0, 20)}...`);
-          console.log(`[GUEST LOGIN] ✅ Refresh token extracted`);
-        } else {
-          console.log('[GUEST LOGIN] ⚠️ No refresh token found in cookies');
-        }
-      }
-      
-      return true;
-    } catch (e) {
-      console.log('[GUEST LOGIN] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[GUEST LOGIN] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function refreshAccessToken() {
-  if (!refreshToken) {
-    console.log('[REFRESH TOKEN] ⚠️ No refresh token available, skipping');
-    return false;
-  }
-  
-  console.log('[REFRESH TOKEN] Refreshing access token...');
-  
-  const headers = {
-    'Content-Type': 'application/json',
-    'Cookie': `jan_refresh_token=${refreshToken}`,
-    'Authorization': `Bearer ${accessToken}`
-  };
-  
-  const url = `${BASE}/v1/auth/refresh-token`;
-  
-  debugRequest('GET', url, headers);
-  
-  const startTime = Date.now();
-  const res = http.get(url, { headers });
-  
-  debugResponse(res);
-  
-  const duration = Date.now() - startTime;
-  refreshTokenTime.add(duration);
-  
-  const ok = check(res, {
-    'refresh token status 200': (r) => r.status === 200,
-    'refresh token has access_token': (r) => {
-      try {
-        const body = JSON.parse(r.body);
-        return body.access_token && body.access_token.length > 0;
-      } catch (e) {
-        return false;
-      }
-    }
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      accessToken = body.access_token;
-      
-      // Update refresh token from new Set-Cookie header
-      const setCookieHeader = res.headers['Set-Cookie'];
-      if (setCookieHeader) {
-        const refreshTokenMatch = setCookieHeader.match(/jan_refresh_token=([^;]+)/);
-        if (refreshTokenMatch) {
-          refreshToken = refreshTokenMatch[1];
-        }
-      }
-      
-      console.log(`[REFRESH TOKEN] ✅ Success! New token: ${accessToken.substring(0, 20)}...`);
-      console.log(`[REFRESH TOKEN] ✅ Expires in: ${body.expires_in} seconds`);
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[REFRESH TOKEN] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[REFRESH TOKEN] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function testResponseApiNonStreamWithoutTools() {
-  console.log('[RESPONSE NO-STREAM] Testing response API non-stream without tools...');
-  
-  const payload = {
-    model: MODEL,
-    input: [
-      {
-        role: 'user',
-        content: 'Tell me about the latest advancements in renewable energy technology.'
-      }
-    ],
-    stream: false
-  };
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify(payload);
-  const url = `${BASE}/v1/responses`;
-  
-  debugRequest('POST', url, headers, body);
-  debugLog('Payload details:', payload);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  responseTime.add(duration);
-  
-  // Record LLM-specific timings
-  recordTimings(res, 'response_nonstream', 'no_tools');
-  
-  const ok = check(res, {
-    'response non-stream status 200': (r) => r.status === 200,
-    'response non-stream has content': (r) => r.body && r.body.length > 0
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      console.log(`[RESPONSE NO-STREAM] ✅ Success! Response received`);
-      if (body.object) {
-        console.log(`[RESPONSE NO-STREAM] ✅ Object: ${body.object}`);
-      }
-      if (body.choices && body.choices.length > 0) {
-        const content = body.choices[0].message.content;
-        console.log(`[RESPONSE NO-STREAM] ✅ Content: ${content.substring(0, 100)}...`);
-      }
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[RESPONSE NO-STREAM] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[RESPONSE NO-STREAM] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function testResponseApiNonStreamWithTools() {
-  console.log('[RESPONSE TOOLS] Testing response API non-stream with tools...');
-  
-  const payload = {
-    model: MODEL,
-    input: [
-      {
-        role: 'user',
-        content: 'Google all news about the latest FED rate and summarize it for me'
-      }
-    ],
-    stream: false,
-    tools: [
-      {
-        type: 'function',
-        function: {
-          name: 'google_search',
-          description: 'Tool to perform web searches via Serper API and retrieve rich results. It is able to retrieve organic search results, people also ask, related searches, and knowledge graph.',
-          parameters: {
-            type: 'object',
-            properties: {
-              autocorrect: {
-                description: 'Whether to autocorrect spelling in query',
-                type: 'boolean'
-              },
-              gl: {
-                description: 'Optional region code for search results in ISO 3166-1 alpha-2 format (e.g. us, uk)',
-                type: 'string'
-              },
-              hl: {
-                description: 'Optional language code for search results in ISO 639-1 format (e.g. en, es)',
-                type: 'string'
-              },
-              num: {
-                description: 'Number of search results to return (1-100, default 10)',
-                type: 'integer',
-                minimum: 1,
-                maximum: 100
-              },
-              q: {
-                description: 'Search query string',
-                type: 'string'
-              }
-            },
-            required: ['q']
-          }
-        }
-      }
-    ]
-  };
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify(payload);
-  const url = `${BASE}/v1/responses`;
-  
-  debugRequest('POST', url, headers, body);
-  debugLog('Payload details:', payload);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  responseTimeWithTools.add(duration);
-  
-  // Record LLM-specific timings
-  recordTimings(res, 'response_nonstream', 'with_tools');
-  
-  const ok = check(res, {
-    'response tools status 200': (r) => r.status === 200,
-    'response tools has content': (r) => r.body && r.body.length > 0
-  });
-  
-  if (ok) {
-    try {
-      const body = JSON.parse(res.body);
-      console.log(`[RESPONSE TOOLS] ✅ Success! Response with tools received`);
-      if (body.object) {
-        console.log(`[RESPONSE TOOLS] ✅ Object: ${body.object}`);
-      }
-      if (body.choices && body.choices.length > 0) {
-        const choice = body.choices[0];
-        if (choice.message && choice.message.content) {
-          const content = choice.message.content;
-          console.log(`[RESPONSE TOOLS] ✅ Content: ${content.substring(0, 100)}...`);
-        }
-        if (choice.message && choice.message.tool_calls) {
-          console.log(`[RESPONSE TOOLS] ✅ Tool calls: ${choice.message.tool_calls.length}`);
-        }
-      }
-      successes.add(1);
-      return true;
-    } catch (e) {
-      console.log('[RESPONSE TOOLS] ❌ Failed to parse response');
-      errors.add(1);
-      return false;
-    }
-  } else {
-    console.log('[RESPONSE TOOLS] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function testResponseApiStreamWithoutTools() {
-  console.log('[RESPONSE STREAM] Testing response API stream without tools...');
-  
-  const payload = {
-    model: MODEL,
-    input: [
-      {
-        role: 'user',
-        content: 'Write a detailed explanation of quantum computing in 3 paragraphs.'
-      }
-    ],
-    stream: true
-  };
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify(payload);
-  const url = `${BASE}/v1/responses`;
-  
-  debugRequest('POST', url, headers, body);
-  debugLog('Payload details:', payload);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  responseStreamTime.add(duration);
-  
-  // Record LLM-specific timings
-  recordTimings(res, 'response_stream', 'no_tools');
-  
-  const ok = check(res, {
-    'response stream status 200': (r) => r.status === 200,
-    'response stream has content': (r) => r.body && r.body.length > 0,
-    'response stream is event-stream': (r) => r.headers['Content-Type'] && r.headers['Content-Type'].includes('text/event-stream')
-  });
-  
-  if (ok) {
-    const lines = res.body.split('\n');
-    let chunkCount = 0;
-    let hasContent = false;
-    let hasMetadata = false;
-    let hasDone = false;
-    
-    for (let i = 0; i < lines.length; i++) {
-      const line = lines[i].trim();
-      if (line.startsWith('data: ')) {
-        if (line.includes('[DONE]')) {
-          hasDone = true;
-          console.log(`[RESPONSE STREAM] ✅ Received completion signal: data: [DONE]`);
-        } else {
-          try {
-            chunkCount++;
-            const data = JSON.parse(line.substring(6));
-            if (data.choices && data.choices[0] && data.choices[0].delta && data.choices[0].delta.content) {
-              hasContent = true;
-            }
-            if (data.metadata) {
-              hasMetadata = true;
-            }
-          } catch (e) {
-            // Ignore parsing errors for non-JSON lines
-          }
-        }
-      }
-    }
-    
-    console.log(`[RESPONSE STREAM] ✅ Success! Received ${chunkCount} chunks`);
-    console.log(`[RESPONSE STREAM] ✅ Has content: ${hasContent}`);
-    console.log(`[RESPONSE STREAM] ✅ Has metadata: ${hasMetadata}`);
-    console.log(`[RESPONSE STREAM] ✅ Stream completed: ${hasDone}`);
-    
-    if (!hasDone) {
-      console.log('[RESPONSE STREAM] ⚠️ Warning: No [DONE] signal received');
-    }
-    
-    successes.add(1);
-    return true;
-  } else {
-    console.log('[RESPONSE STREAM] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-function testResponseApiStreamWithTools() {
-  console.log('[RESPONSE STREAM TOOLS] Testing response API stream with tools...');
-  
-  const payload = {
-    model: MODEL,
-    input: [
-      {
-        role: 'user',
-        content: 'Search for the latest AI breakthroughs and explain their significance'
-      }
-    ],
-    stream: true,
-    tools: [
-      {
-        type: 'function',
-        function: {
-          name: 'google_search',
-          description: 'Tool to perform web searches via Serper API and retrieve rich results.',
-          parameters: {
-            type: 'object',
-            properties: {
-              q: {
-                description: 'Search query string',
-                type: 'string'
-              },
-              num: {
-                description: 'Number of search results to return',
-                type: 'integer',
-                minimum: 1,
-                maximum: 10
-              }
-            },
-            required: ['q']
-          }
-        }
-      }
-    ]
-  };
-  
-  const headers = buildHeaders();
-  const body = JSON.stringify(payload);
-  const url = `${BASE}/v1/responses`;
-  
-  debugRequest('POST', url, headers, body);
-  debugLog('Payload details:', payload);
-  
-  const startTime = Date.now();
-  const res = http.post(url, body, { headers });
-  
-  debugResponse(res);
-  
-  const endTime = Date.now();
-  const duration = endTime - startTime;
-  responseStreamTimeWithTools.add(duration);
-  
-  // Record LLM-specific timings
-  recordTimings(res, 'response_stream', 'with_tools');
-  
-  const ok = check(res, {
-    'response stream tools status 200': (r) => r.status === 200,
-    'response stream tools has content': (r) => r.body && r.body.length > 0,
-    'response stream tools is event-stream': (r) => r.headers['Content-Type'] && r.headers['Content-Type'].includes('text/event-stream')
-  });
-  
-  if (ok) {
-    const lines = res.body.split('\n');
-    let chunkCount = 0;
-    let hasContent = false;
-    let hasToolCalls = false;
-    let hasDone = false;
-    
-    for (let i = 0; i < lines.length; i++) {
-      const line = lines[i].trim();
-      if (line.startsWith('data: ')) {
-        if (line.includes('[DONE]')) {
-          hasDone = true;
-          console.log(`[RESPONSE STREAM TOOLS] ✅ Received completion signal: data: [DONE]`);
-        } else {
-          try {
-            chunkCount++;
-            const data = JSON.parse(line.substring(6));
-            if (data.choices && data.choices[0]) {
-              const choice = data.choices[0];
-              if (choice.delta && choice.delta.content) {
-                hasContent = true;
-              }
-              if (choice.delta && choice.delta.tool_calls) {
-                hasToolCalls = true;
-              }
-            }
-          } catch (e) {
-            // Ignore parsing errors for non-JSON lines
-          }
-        }
-      }
-    }
-    
-    console.log(`[RESPONSE STREAM TOOLS] ✅ Success! Received ${chunkCount} chunks`);
-    console.log(`[RESPONSE STREAM TOOLS] ✅ Has content: ${hasContent}`);
-    console.log(`[RESPONSE STREAM TOOLS] ✅ Has tool calls: ${hasToolCalls}`);
-    console.log(`[RESPONSE STREAM TOOLS] ✅ Stream completed: ${hasDone}`);
-    
-    if (!hasDone) {
-      console.log('[RESPONSE STREAM TOOLS] ⚠️ Warning: No [DONE] signal received');
-    }
-    
-    successes.add(1);
-    return true;
-  } else {
-    console.log('[RESPONSE STREAM TOOLS] ❌ Failed');
-    errors.add(1);
-    return false;
-  }
-}
-
-// ====== Main Test Function ======
-export default function() {
-  console.log('\n========================================');
-  console.log('  RESPONSE API TESTS');
-  console.log('========================================');
-  console.log(`Base URL: ${BASE}`);
-  console.log(`Model: ${MODEL}`);
-  console.log(`Debug Mode: ${DEBUG ? 'ENABLED' : 'DISABLED'}`);
-  console.log('');
-  
-  // Step 1: Guest Login
-  console.log('[1/6] Guest Login');
-  if (!guestLogin()) {
-    console.log('❌ Guest login failed, aborting test');
-    return;
-  }
-  sleep(1);
-  
-  // Step 2: Refresh Token
-  console.log('\n[2/6] Refresh Token');
-  refreshAccessToken();
-  sleep(1);
-  
-  // Step 3: Response API Non-Stream Without Tools
-  console.log('\n[3/6] Response API Non-Stream Without Tools');
-  refreshAccessToken(); // Refresh before response
-  testResponseApiNonStreamWithoutTools();
-  sleep(2);
-  
-  // Step 4: Response API Non-Stream With Tools
-  console.log('\n[4/6] Response API Non-Stream With Tools');
-  refreshAccessToken(); // Refresh before response
-  testResponseApiNonStreamWithTools();
-  sleep(2);
-  
-  // Step 5: Response API Stream Without Tools
-  console.log('\n[5/6] Response API Stream Without Tools');
-  refreshAccessToken(); // Refresh before response
-  testResponseApiStreamWithoutTools();
-  sleep(2);
-  
-  // Step 6: Response API Stream With Tools
-  console.log('\n[6/6] Response API Stream With Tools');
-  refreshAccessToken(); // Refresh before response
-  testResponseApiStreamWithTools();
-  
-  // Summary
-  console.log('\n========================================');
-  console.log('           TEST SUMMARY');
-  console.log('========================================');
-  console.log('✅ Guest authentication');
-  console.log('✅ Token refresh');
-  console.log('✅ Response API non-stream without tools');
-  console.log('✅ Response API non-stream with tools');
-  console.log('✅ Response API stream without tools');
-  console.log('✅ Response API stream with tools');
-  console.log('');
-  console.log('Response API tests completed!');
-  console.log('========================================\n');
-  
-  sleep(2);
-}
diff --git a/tests/test-run-single-flow.bat b/tests/test-run-single-flow.bat
deleted file mode 100644
index 68d6020a..00000000
--- a/tests/test-run-single-flow.bat
+++ /dev/null
@@ -1,53 +0,0 @@
-@echo off
-REM Jan Server Test Runner - Single Flow Tests
-REM Usage: test-run-single-flow.bat [BASE_URL] [MODEL] [DEBUG] [TEST_TYPE]
-
-set BASE_URL=%1
-if "%BASE_URL%"=="" set BASE_URL=https://api-stag.jan.ai
-
-set MODEL=%2
-if "%MODEL%"=="" set MODEL=jan-v1-4b
-
-set DEBUG_MODE=%3
-if "%DEBUG_MODE%"=="" set DEBUG_MODE=true
-
-set TEST_TYPE=%4
-if "%TEST_TYPE%"=="" set TEST_TYPE=conversation
-
-echo ========================================
-echo   JAN SERVER TEST - SINGLE RUN
-echo ========================================
-echo Base URL: %BASE_URL%
-echo Model: %MODEL%
-echo Debug Mode: %DEBUG_MODE%
-echo Test Type: %TEST_TYPE%
-echo ========================================
-echo.
-
-REM Set environment variables for single run
-set BASE=%BASE_URL%
-set MODEL=%MODEL%
-set SINGLE_RUN=true
-set DEBUG=%DEBUG_MODE%
-
-REM Run the selected test
-if "%TEST_TYPE%"=="standard" (
-    echo Running Standard Completion Test...
-    k6 run src\test-completion-standard.js
-) else if "%TEST_TYPE%"=="conversation" (
-    echo Running Conversation Flow Test...
-    k6 run src\test-completion-conversation.js
-) else if "%TEST_TYPE%"=="responses" (
-    echo Running Response API Test...
-    k6 run src\test-responses.js
-) else (
-    echo Invalid test type. Available options: standard, conversation, responses
-    echo Running default conversation test...
-    k6 run src\test-completion-conversation.js
-)
-
-echo.
-echo ========================================
-echo   TEST COMPLETED
-echo ========================================
-pause