SDK Reference
Providers
Niitaka instruments LLM providers by patching the client at import time. Call the instrument function once at startup — all subsequent calls are captured automatically.
Instrument all at once
If you use multiple providers or want zero-maintenance instrumentation, use auto_instrument(). It silently skips providers that aren't installed.
# Instruments OpenAI, Anthropic, Gemini, and Groq (skips any that aren't installed)
niitaka.auto_instrument()
# Equivalent to:
niitaka.instrument_openai()
niitaka.instrument_anthropic()
niitaka.instrument_gemini()
niitaka.instrument_groq()
# LiteLLM is NOT included in auto_instrument() — call it separately if you use it:
niitaka.instrument_litellm()instrument_litellm() instead of individual provider instruments. A single call covers every provider LiteLLM supports — switch between gpt-4o, anthropic/claude-3-haiku-20240307, and gemini/gemini-1.5-flash by changing the model string in your variant config, with no extra instrumentation code. See the LiteLLM section below.OpenAI
pip install "niitaka-sdk[openai]"import niitaka
import openai
niitaka.configure(api_key=os.getenv("NIITAKA_API_KEY"))
niitaka.instrument_openai()
client = openai.OpenAI()
with niitaka.start_session(goal="...", agent_id="my-agent"):
# Chat completions
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello"}],
)
# Streaming — also supported
stream = client.chat.completions.create(
model="gpt-4o", messages=[...], stream=True
)What gets logged
Anthropic
pip install "niitaka-sdk[anthropic]"import niitaka
import anthropic
niitaka.configure(api_key=os.getenv("NIITAKA_API_KEY"))
niitaka.instrument_anthropic()
client = anthropic.Anthropic()
with niitaka.start_session(goal="...", agent_id="my-agent"):
message = client.messages.create(
model="claude-opus-4-7",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello"}],
)What gets logged
Google Gemini
pip install "niitaka-sdk[gemini]"import niitaka
import google.generativeai as genai
niitaka.configure(api_key=os.getenv("NIITAKA_API_KEY"))
niitaka.instrument_gemini()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
model = genai.GenerativeModel("gemini-1.5-pro")
with niitaka.start_session(goal="...", agent_id="my-agent"):
response = model.generate_content("Hello")What gets logged
Groq
pip install "niitaka-sdk[groq]"import niitaka
from groq import Groq
niitaka.configure(api_key=os.getenv("NIITAKA_API_KEY"))
niitaka.instrument_groq()
client = Groq()
with niitaka.start_session(goal="...", agent_id="my-agent"):
response = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[{"role": "user", "content": "Hello"}],
)What gets logged
LangChain
pip install "niitaka-sdk[langchain]"import niitaka
from niitaka import NiitakaCallbackHandler
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage
niitaka.configure(api_key=os.getenv("NIITAKA_API_KEY"))
llm = ChatOpenAI(model="gpt-4o")
handler = NiitakaCallbackHandler()
with niitaka.start_session(goal="...", agent_id="my-agent"):
# Pass handler in the config dict — works with any LangChain chain
response = llm.invoke(
[HumanMessage(content="Hello")],
config={"callbacks": [handler]},
)What gets logged
LiteLLM
pip install litellm niitaka-sdkimport niitaka
import litellm
niitaka.configure(api_key=os.getenv("NIITAKA_API_KEY"))
niitaka.instrument_litellm() # patches both litellm.completion and litellm.acompletion
with niitaka.start_session(goal="...", agent_id="my-agent"):
# Sync — unified interface across all providers
response = litellm.completion(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "Hello"}],
)
# Async — also fully supported
response = await litellm.acompletion(
model="anthropic/claude-3-haiku-20240307",
messages=[{"role": "user", "content": "Hello"}],
)
# LiteLLM model name conventions:
# OpenAI: "gpt-4o", "gpt-4o-mini"
# Anthropic: "anthropic/claude-3-haiku-20240307"
# Gemini: "gemini/gemini-1.5-flash"
# Groq: "groq/llama-3.3-70b-versatile"
# Ollama: "ollama/llama3.2" (see Ollama section below)What gets logged
Ollama
# 1. Install Ollama from https://ollama.com and pull a model
ollama pull llama3.2
ollama pull deepseek-r1:8b
# 2. Install Python deps — no separate Ollama SDK needed
pip install litellm niitaka-sdkimport os
import niitaka
import litellm
niitaka.configure(api_key=os.getenv("NIITAKA_API_KEY"))
niitaka.instrument_litellm() # captures Ollama calls automatically
with niitaka.start_session(goal="...", agent_id="my-agent"):
response = litellm.completion(
model="ollama/llama3.2", # ollama/<model-name> — no API key needed
messages=[{"role": "user", "content": "Hello"}],
api_base="http://localhost:11434", # default Ollama address
)
print(response.choices[0].message.content)
# Popular models (run "ollama pull <name>" first):
# ollama/llama3.2 — Meta Llama 3.2 (3B / 8B)
# ollama/deepseek-r1:8b — DeepSeek R1 reasoning model
# ollama/mistral — Mistral 7B
# ollama/deepseek-coder — optimised for code generation
# ollama/phi4 — Microsoft Phi-4 (14B)
# ollama/qwen2.5-coder — Alibaba Qwen 2.5 CoderWhat gets logged
Other local LLM servers
LM Studio, vLLM, and llama.cpp server all expose an OpenAI-compatible /v1/chat/completions endpoint. Use the openai/ prefix in LiteLLM with a custom api_base — no extra instrumentation needed beyond instrument_litellm().
LM Studio
localhost:1234/v1
GUI app, easy model browser
vLLM
localhost:8000/v1
Production-grade, batching, multi-GPU
llama.cpp server
localhost:8080/v1
Lightest option, runs on CPU
# LM Studio, vLLM, and llama.cpp all expose an OpenAI-compatible API.
# Use model="openai/<any-name>" + api_base pointing to the server.
# api_key can be any non-empty string — local servers don't validate it.
import litellm
# LM Studio — download from lmstudio.ai, enable the local server in the app
response = litellm.completion(
model="openai/local-model",
messages=[{"role": "user", "content": "Hello"}],
api_base="http://localhost:1234/v1",
api_key="none",
)
# vLLM — production-grade inference server, good for teams / self-hosted infra
# python -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-3.1-8B-Instruct
response = litellm.completion(
model="openai/meta-llama/Llama-3.1-8B-Instruct",
messages=[{"role": "user", "content": "Hello"}],
api_base="http://localhost:8000/v1",
api_key="none",
)
# llama.cpp server — lightest option, runs on CPU, no GPU required
# ./llama-server --model model.gguf --port 8080
response = litellm.completion(
model="openai/local",
messages=[{"role": "user", "content": "Hello"}],
api_base="http://localhost:8080/v1",
api_key="none",
)
# Niitaka instruments all of these identically via instrument_litellm().
# Cost is always $0.00. Token counts depend on what the server reports.Async support
All providers support async usage — instrumentation is applied at import time so await calls are captured automatically, just like sync ones. LiteLLM is the recommended approach for async workloads because litellm.acompletion gives you a single async interface across all providers.
import niitaka
import litellm
niitaka.configure(api_key=os.getenv("NIITAKA_API_KEY"))
niitaka.instrument_litellm()
async def run_agent(prompt: str) -> str:
async with niitaka.start_session(goal="...", agent_id="my-agent"):
response = await litellm.acompletion(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
)
return response.choices[0].message.content
# Run multiple sessions concurrently — each gets its own Niitaka session:
import asyncio
results = await asyncio.gather(
run_agent("Summarise report A"),
run_agent("Summarise report B"),
run_agent("Summarise report C"),
)