Overview
This page provides practical examples for common KnowledgeBase patterns. For a step-by-step first setup, see the Getting Started guide.Multiple Documents
Process multiple documents and query across all of them:from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="multi_doc_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./kb_db")
))
kb = KnowledgeBase(
sources=["doc1.pdf", "doc2.md", "doc3.docx"],
embedding_provider=embedding,
vectordb=vectordb
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="Compare the information across all documents",
context=[kb]
)
result = agent.do(task)
print(result)
Directory Processing
Process all supported files in a directory:from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="directory_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.EMBEDDED, db_path="./kb_db")
))
kb = KnowledgeBase(
sources=["data/"], # Processes all supported files in directory
embedding_provider=embedding,
vectordb=vectordb
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="Summarize the key information from all files",
context=[kb]
)
result = agent.do(task)
print(result)
Custom Loaders and Splitters
Use specific loaders and splitters for precise control over document processing:from upsonic import Agent, Task, KnowledgeBase
from upsonic.loaders.pdf import PdfLoader
from upsonic.loaders.config import PdfLoaderConfig
from upsonic.text_splitter.recursive import RecursiveChunker, RecursiveChunkingConfig
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
loader = PdfLoader(PdfLoaderConfig())
splitter = RecursiveChunker(RecursiveChunkingConfig(
chunk_size=512,
chunk_overlap=50
))
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="custom_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))
kb = KnowledgeBase(
sources=["document.pdf"],
embedding_provider=embedding,
vectordb=vectordb,
loaders=[loader],
splitters=[splitter]
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="Extract key information from the document",
context=[kb]
)
result = agent.do(task)
print(result)
Indexed Processing (Per-Source Loaders)
Use different loaders and splitters for different sources by matching them by index:from upsonic import Agent, Task, KnowledgeBase
from upsonic.loaders.pdf import PdfLoader
from upsonic.loaders.markdown import MarkdownLoader
from upsonic.loaders.config import PdfLoaderConfig, MarkdownLoaderConfig
from upsonic.text_splitter.recursive import RecursiveChunker, RecursiveChunkingConfig
from upsonic.text_splitter.semantic import SemanticChunker, SemanticChunkingConfig
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
# Different loader for each source
loaders = [
PdfLoader(PdfLoaderConfig()),
MarkdownLoader(MarkdownLoaderConfig())
]
# Different splitter for each source
splitters = [
RecursiveChunker(RecursiveChunkingConfig(chunk_size=512)),
SemanticChunker(SemanticChunkingConfig(
embedding_provider=embedding,
chunk_size=1024
))
]
vectordb = ChromaProvider(ChromaConfig(
collection_name="indexed_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))
kb = KnowledgeBase(
sources=["manual.pdf", "guide.md"],
embedding_provider=embedding,
vectordb=vectordb,
loaders=loaders,
splitters=splitters
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="What information is available in both documents?",
context=[kb]
)
result = agent.do(task)
print(result)
When using multiple loaders/splitters, the count must match the number of file sources. String content sources don’t need loaders.
Multiple Knowledge Bases
Query multiple knowledge bases in a single task usingname and description so the agent can distinguish them:
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
kb1 = KnowledgeBase(
sources=["technical_docs/"],
embedding_provider=embedding,
vectordb=ChromaProvider(ChromaConfig(
collection_name="tech_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.IN_MEMORY)
)),
name="technical_docs",
description="Technical API documentation and architecture specs"
)
kb2 = KnowledgeBase(
sources=["user_guides/"],
embedding_provider=embedding,
vectordb=ChromaProvider(ChromaConfig(
collection_name="guides_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.IN_MEMORY)
)),
name="user_guides",
description="User guides, tutorials, and FAQs"
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="Compare technical documentation with user guides",
context=[kb1, kb2]
)
result = agent.do(task)
print(result)
Async Usage
Use async/await for better performance in async applications:import asyncio
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
async def main():
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="async_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))
kb = KnowledgeBase(
sources=["document.pdf"],
embedding_provider=embedding,
vectordb=vectordb
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="Summarize the document",
context=[kb]
)
result = await agent.do_async(task)
print(result)
asyncio.run(main())
Streaming Response
Stream the agent’s response for real-time output:from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="streaming_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))
kb = KnowledgeBase(
sources=["document.pdf"],
embedding_provider=embedding,
vectordb=vectordb
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="Explain the main concepts",
context=[kb]
)
# Synchronous streaming
for text in agent.stream(task):
print(text, end='', flush=True)
print()
Async Streaming
import asyncio
from upsonic import Agent, Task, KnowledgeBase
from upsonic.embeddings import OpenAIEmbedding, OpenAIEmbeddingConfig
from upsonic.vectordb import ChromaProvider, ChromaConfig, ConnectionConfig, Mode
async def main():
embedding = OpenAIEmbedding(OpenAIEmbeddingConfig())
vectordb = ChromaProvider(ChromaConfig(
collection_name="async_stream_kb",
vector_size=1536,
connection=ConnectionConfig(mode=Mode.IN_MEMORY)
))
kb = KnowledgeBase(
sources=["document.pdf"],
embedding_provider=embedding,
vectordb=vectordb
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="Explain the main concepts",
context=[kb]
)
async for text in agent.astream(task):
print(text, end='', flush=True)
print()
asyncio.run(main())
SuperMemory (Zero-Config RAG)
SuperMemory handles embeddings internally — no embedding provider needed:from upsonic import Agent, Task, KnowledgeBase
from upsonic.vectordb import SuperMemoryProvider, SuperMemoryConfig
vectordb = SuperMemoryProvider(SuperMemoryConfig(
collection_name="my_kb",
api_key="sm_your_api_key_here"
))
kb = KnowledgeBase(
sources=["document.pdf", "data/"],
vectordb=vectordb
)
agent = Agent("anthropic/claude-sonnet-4-5")
task = Task(
description="What are the main topics in the documents?",
context=[kb]
)
result = agent.do(task)
print(result)

