Multi-Tenant Isolation
Series
Building Production RAGAdding multi-tenancy to a RAG system is one of those tasks that sounds straightforward — "just filter by tenant ID" — until you're in a post-incident review explaining why customer A's confidential pricing data appeared in customer B's answer. The failure modes are subtle, the blast radius is large, and most teams underinvest in isolation architecture until after the first incident.
This post covers the three isolation models, how to implement each with real vector databases, and what to test to confirm isolation actually holds.
The Three Isolation Models
Choose based on your compliance requirements and tenant count:
Per-tenant index: Every tenant gets a dedicated collection or index. Isolation is enforced by infrastructure — there is no code path that can accidentally cross tenant boundaries. Required for SOC 2 Type II, HIPAA, or any compliance regime that requires data residency guarantees. Cost: proportional to number of tenants. Practical up to ~500 tenants.
Namespace isolation: A shared cluster with one namespace per tenant, enforced by the vector database's access control. Middle ground — one logical database, but query paths are namespace-scoped. Qdrant, Weaviate, and Pinecone all support this natively.
Query-time filter: A single shared index where every document is tagged with tenant_id and every query is filtered by the current user's tenant. The simplest to implement and the most dangerous to get wrong.
Implementation: Per-Tenant Collections (Qdrant)
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance, Filter, FieldCondition, MatchValue
from typing import Optional
import hashlib
class TenantIsolatedVectorStore:
def __init__(self, qdrant_url: str, embedding_dim: int = 1024):
self.client = QdrantClient(url=qdrant_url)
self.dim = embedding_dim
def _collection_name(self, tenant_id: str) -> str:
"""Deterministic, opaque collection name — avoids leaking tenant identifiers."""
return f"tenant_{hashlib.sha256(tenant_id.encode()).hexdigest()[:16]}"
def ensure_tenant_collection(self, tenant_id: str) -> str:
name = self._collection_name(tenant_id)
existing = {c.name for c in self.client.get_collections().collections}
if name not in existing:
self.client.create_collection(
collection_name=name,
vectors_config=VectorParams(size=self.dim, distance=Distance.COSINE),
)
return name
def upsert_chunks(
self,
tenant_id: str,
chunks: list[dict], # [{id, text, embedding, metadata}, ...]
) -> None:
from qdrant_client.models import PointStruct
collection = self.ensure_tenant_collection(tenant_id)
points = [
PointStruct(
id=chunk["id"],
vector=chunk["embedding"],
payload={
"text": chunk["text"],
"doc_id": chunk.get("doc_id", ""),
**chunk.get("metadata", {}),
# Never include tenant_id in payload — it's implicit in the collection
},
)
for chunk in chunks
]
self.client.upsert(collection_name=collection, points=points)
def search(
self,
tenant_id: str,
query_embedding: list[float],
top_k: int = 20,
) -> list[dict]:
collection = self._collection_name(tenant_id)
results = self.client.search(
collection_name=collection,
query_vector=query_embedding,
limit=top_k,
with_payload=True,
)
return [
{"id": str(r.id), "text": r.payload["text"], "score": r.score, **r.payload}
for r in results
]The key design choice: use a hashed, opaque collection name rather than tenant_acme_corp. This avoids leaking tenant identifiers in error messages, logs, and admin console screens.
Implementation: Namespace Isolation with ACLs
For Weaviate (supports multi-tenancy natively since v1.20):
import weaviate
from weaviate.classes.config import Configure, Property, DataType
class WeaviateMultiTenantStore:
def __init__(self, weaviate_url: str):
self.client = weaviate.connect_to_local(host=weaviate_url)
self._ensure_class()
def _ensure_class(self):
if not self.client.collections.exists("RagChunk"):
self.client.collections.create(
name="RagChunk",
multi_tenancy_config=Configure.multi_tenancy(enabled=True),
properties=[
Property(name="text", data_type=DataType.TEXT),
Property(name="doc_id", data_type=DataType.TEXT),
Property(name="chunk_index", data_type=DataType.INT),
],
vectorizer_config=Configure.Vectorizer.none(), # bring your own vectors
)
def add_tenant(self, tenant_id: str) -> None:
collection = self.client.collections.get("RagChunk")
from weaviate.classes.tenants import Tenant
collection.tenants.create([Tenant(name=tenant_id)])
def upsert(self, tenant_id: str, chunks: list[dict]) -> None:
collection = self.client.collections.get("RagChunk")
tenant_collection = collection.with_tenant(tenant_id)
with tenant_collection.batch.fixed_size(batch_size=100) as batch:
for chunk in chunks:
batch.add_object(
properties={"text": chunk["text"], "doc_id": chunk["doc_id"]},
vector=chunk["embedding"],
uuid=chunk["id"],
)
def search(self, tenant_id: str, query_vector: list[float], top_k: int = 20) -> list[dict]:
collection = self.client.collections.get("RagChunk")
results = collection.with_tenant(tenant_id).query.near_vector(
near_vector=query_vector,
limit=top_k,
return_properties=["text", "doc_id"],
)
return [
{"id": str(obj.uuid), "text": obj.properties["text"], "score": obj.metadata.certainty}
for obj in results.objects
]Weaviate enforces the tenant boundary at the database layer — a query issued without with_tenant() fails, not returns cross-tenant data.
Implementation: Query-Time Filter (the Dangerous One)
If you must use a shared index with metadata filters, the implementation must be airtight:
from functools import wraps
from typing import Callable
class FilterEnforcedSearchClient:
"""
Wraps a vector DB client and injects mandatory tenant_id filter on every search.
Do not expose the underlying client directly.
"""
def __init__(self, base_client, tenant_id: str):
self._client = base_client
self._tenant_id = tenant_id
def search(self, query_embedding: list[float], top_k: int = 20, **kwargs) -> list[dict]:
# CRITICAL: this filter MUST be applied before any user-controlled parameters
mandatory_filter = {"tenant_id": {"$eq": self._tenant_id}}
user_filter = kwargs.pop("filter", {})
# Combine: user filter is ANDed with mandatory tenant filter
combined_filter = {"$and": [mandatory_filter, user_filter]} if user_filter else mandatory_filter
results = self._client.search(
query_embedding=query_embedding,
top_k=top_k,
filter=combined_filter,
**kwargs,
)
# Double-check: validate no result has wrong tenant_id (defense in depth)
validated = []
for r in results:
if r.get("tenant_id") != self._tenant_id:
# Log this as a CRITICAL security event, do not return the result
import logging
logging.critical(
"TENANT ISOLATION VIOLATION: expected %s, got %s, doc_id %s",
self._tenant_id, r.get("tenant_id"), r.get("id"),
)
continue
validated.append(r)
return validatedThe defense-in-depth validation on every result is not paranoia — it's the audit trail that proves your isolation is working. A critical log event on a tenant mismatch should page someone immediately.
Access Control: Per-Tenant Document ACLs
Within a tenant, you may need document-level ACLs (only users in role X can retrieve from documents tagged Y):
from enum import Enum
class AccessLevel(str, Enum):
PUBLIC = "public"
INTERNAL = "internal"
CONFIDENTIAL = "confidential"
RESTRICTED = "restricted"
def get_user_access_levels(user_id: str, tenant_id: str) -> set[AccessLevel]:
"""Fetch from your auth service."""
# Example: look up user's roles and map to access levels
return {AccessLevel.PUBLIC, AccessLevel.INTERNAL} # placeholder
def build_acl_filter(
user_id: str,
tenant_id: str,
base_filter: Optional[dict] = None,
) -> dict:
allowed_levels = get_user_access_levels(user_id, tenant_id)
acl_filter = {"access_level": {"$in": [level.value for level in allowed_levels]}}
if base_filter:
return {"$and": [acl_filter, base_filter]}
return acl_filterApply ACL filters server-side, never client-side. Any ACL logic that runs in the browser or in a client SDK can be bypassed.
Testing Isolation: The Adversarial Test Suite
Isolation must be tested explicitly — it's not something you verify by code review:
import pytest
class TestTenantIsolation:
def test_tenant_a_cannot_see_tenant_b_documents(self, store):
store.upsert("tenant_a", [{"id": "a1", "text": "secret A", "embedding": [...]}])
store.upsert("tenant_b", [{"id": "b1", "text": "secret B", "embedding": [...]}])
results_a = store.search("tenant_a", query_embedding=[...], top_k=10)
result_ids = [r["id"] for r in results_a]
assert "b1" not in result_ids, "Tenant B document leaked into Tenant A results"
assert "a1" in result_ids
def test_nonexistent_tenant_returns_empty(self, store):
results = store.search("nonexistent_tenant_xyz", query_embedding=[...], top_k=10)
assert results == []
def test_deleted_tenant_documents_not_retrievable(self, store):
store.upsert("tenant_c", [{"id": "c1", "text": "to be deleted", "embedding": [...]}])
store.delete_tenant("tenant_c")
results = store.search("tenant_c", query_embedding=[...], top_k=10)
assert results == []Run this test suite in CI on every commit. If any test fails, block the deploy immediately — isolation failures are security incidents, not bugs.
Key Takeaways
- Multi-tenant RAG isolation is a security problem, not just an engineering convenience — treat isolation failures as incidents and design accordingly.
- Per-tenant index separation is the safest model and required for strict compliance; prefer it when tenant count is below 500.
- Vector databases with native multi-tenancy support (Weaviate, Qdrant) enforce isolation at the database layer — prefer this over application-layer filtering.
- Query-time metadata filters on a shared index work but require defense-in-depth: mandatory server-side filter injection plus post-query result validation.
- Document-level ACLs (not just tenant-level) are often required in enterprise deployments; implement them server-side and test them adversarially.
- Write explicit isolation tests that attempt to retrieve cross-tenant data and gate deploys on them — isolation that isn't tested isn't isolation.