Back to Docs
Getting Started
Local Quickstart
Set up Vecta's SDK for local development
Last updated: December 19, 2024
Category: getting-started
Local SDK Quickstart
Run Vecta evaluations entirely on your local machine or private infrastructure. Perfect for sensitive data, offline development, or when you need full control over the evaluation process.
Prerequisites
- Python 3.8+
- Local vector database (ChromaDB, Weaviate, etc.)
- OpenAI API key (for LLM-based evaluations)
Step 1: Install Vecta SDK
pip install vecta[local]
The [local]
extra includes dependencies for local vector database connectors.
Step 2: Set Up Your Vector Database
ChromaDB Example
import chromadb
from vecta import VectaClient, ChromaLocalConnector
# Initialize ChromaDB
chroma_client = chromadb.Client()
collection = chroma_client.create_collection("my_knowledge_base")
# Add some sample documents
documents = [
"Vecta is a RAG evaluation platform that helps teams build reliable AI systems.",
"RAG stands for Retrieval-Augmented Generation, combining search with language models.",
"Vector databases store embeddings that enable semantic search capabilities."
]
collection.add(
documents=documents,
ids=[f"doc_{i}" for i in range(len(documents))],
metadatas=[{"source": f"doc_{i}.txt"} for i in range(len(documents))]
)
Weaviate Example
import weaviate
from vecta import VectaClient, WeaviateLocalConnector
# Connect to local Weaviate instance
weaviate_client = weaviate.Client("http://localhost:8080")
# Create Vecta connector
connector = WeaviateLocalConnector(
client=weaviate_client,
class_name="Document"
)
Step 3: Initialize Vecta Client
from vecta import VectaClient
# Create ChromaDB connector
connector = ChromaLocalConnector(chroma_client, "my_knowledge_base")
# Initialize Vecta client
vecta = VectaClient(
vector_db_connector=connector,
openai_api_key="your-openai-api-key" # For LLM evaluations
)
Step 4: Create a Local Benchmark
Auto-Generate from Your Data
# Generate benchmark from your local vector database
benchmark = vecta.create_benchmark(
name="Local Knowledge Base Test",
description="Testing retrieval on my local documents",
num_questions=50,
question_types=["factual", "conceptual"]
)
print(f"Generated {len(benchmark.questions)} test questions")
Load Existing Benchmark
# Load benchmark from CSV file
benchmark = vecta.load_benchmark("my_benchmark.csv")
# Or create manually
from vecta.types import BenchmarkQuestion
questions = [
BenchmarkQuestion(
question="What is Vecta?",
answer="Vecta is a RAG evaluation platform",
chunk_ids=["doc_0"],
metadata={"difficulty": "easy"}
),
BenchmarkQuestion(
question="What does RAG stand for?",
answer="Retrieval-Augmented Generation",
chunk_ids=["doc_1"],
metadata={"difficulty": "medium"}
)
]
benchmark = vecta.create_benchmark_from_questions(
name="Manual Benchmark",
questions=questions
)
Step 5: Run Local Evaluations
Retrieval-Only Evaluation
def my_retrieval_function(query: str) -> list[str]:
"""Your custom retrieval function."""
# Use your vector database's search
results = connector.semantic_search(query, k=5)
return [result.id for result in results]
# Run retrieval evaluation
results = vecta.evaluate_retrieval(
benchmark=benchmark,
retrieval_function=my_retrieval_function,
evaluation_name="Local Retrieval Test"
)
print(f"Chunk F1: {results.chunk_level.f1_score:.3f}")
print(f"Precision: {results.chunk_level.precision:.3f}")
print(f"Recall: {results.chunk_level.recall:.3f}")
Full RAG Evaluation
def my_rag_function(query: str) -> tuple[list[str], str]:
"""Your complete RAG pipeline."""
# 1. Retrieve relevant chunks
search_results = connector.semantic_search(query, k=3)
chunk_ids = [result.id for result in search_results]
# 2. Build context
context = "\n".join([result.content for result in search_results])
# 3. Generate answer (replace with your LLM)
prompt = f"Answer the question based on this context:\n\nContext: {context}\n\nQuestion: {query}"
# Using OpenAI (built into VectaClient)
generated_answer = vecta.llm_client.generate(prompt)
return chunk_ids, generated_answer
# Run full RAG evaluation
results = vecta.evaluate_retrieval_and_generation(
benchmark=benchmark,
retrieval_generation_function=my_rag_function,
evaluation_name="Local RAG Pipeline Test"
)
print(f"Retrieval F1: {results.chunk_level.f1_score:.3f}")
print(f"Generation Accuracy: {results.generation_metrics.accuracy:.3f}")
print(f"Generation Factuality: {results.generation_metrics.factuality:.3f}")
Generation-Only Evaluation
def my_generation_function(query: str) -> str:
"""Test generation without retrieval."""
# Your LLM generation logic
prompt = f"Answer this question: {query}"
return vecta.llm_client.generate(prompt)
# Evaluate generation quality
results = vecta.evaluate_generation_only(
benchmark=benchmark,
generation_function=my_generation_function,
evaluation_name="LLM Generation Test"
)
print(f"Accuracy: {results.generation_metrics.accuracy:.3f}")
print(f"Factuality: {results.generation_metrics.factuality:.3f}")
Step 6: Analyze Results Locally
Export Results
# Save results to file
results.save("evaluation_results.json")
# Export to CSV for analysis
results.to_csv("detailed_results.csv")
View Detailed Metrics
# Per-question breakdown
for question_result in results.question_results:
print(f"Question: {question_result.question}")
print(f"Expected: {question_result.expected_answer}")
print(f"Generated: {question_result.generated_answer}")
print(f"F1 Score: {question_result.chunk_f1:.3f}")
print("---")
Generate Reports
# Create HTML report
report = vecta.generate_report(results)
report.save("evaluation_report.html")
# Create summary statistics
summary = vecta.create_summary(results)
print(summary)