Skip to content

Embedding Providers

The providers module defines an abstract base class for embedding providers and three concrete implementations for OpenAI, Ollama, and FastEmbed.

Quick Start

from markdown_vault_mcp.providers import get_embedding_provider

# Auto-detect based on environment variables
provider = get_embedding_provider()

# Embed a batch of texts
vectors = provider.embed(["hello world", "example text"])
print(f"Dimension: {provider.dimension}")

Provider Selection

The get_embedding_provider() function auto-detects the best available provider:

  1. OpenAI — if OPENAI_API_KEY is set
  2. Ollama — if OLLAMA_HOST is reachable
  3. FastEmbed — if the package is installed

Override with EMBEDDING_PROVIDER=openai|ollama|fastembed.

API Reference

EmbeddingProvider

Bases: ABC

Abstract base class for embedding providers.

dimension abstractmethod property

Embedding dimension size.

Returns:

Type Description
int

Integer dimension of each embedding vector.

embed(texts) abstractmethod

Embed a batch of texts.

Parameters:

Name Type Description Default
texts list[str]

List of strings to embed.

required

Returns:

Type Description
list[list[float]]

List of embedding vectors, one per input text.

Source code in src/markdown_vault_mcp/providers.py
@abstractmethod
def embed(self, texts: list[str]) -> list[list[float]]:
    """Embed a batch of texts.

    Args:
        texts: List of strings to embed.

    Returns:
        List of embedding vectors, one per input text.
    """
    ...

OllamaProvider()

Bases: EmbeddingProvider

Embedding provider backed by the Ollama REST API.

Configuration via environment variables:

  • OLLAMA_HOST: base URL of the Ollama server (default: http://localhost:11434).
  • MARKDOWN_VAULT_MCP_OLLAMA_MODEL: model name to use (default: nomic-embed-text).
  • MARKDOWN_VAULT_MCP_OLLAMA_CPU_ONLY: set to true to force CPU-only inference (default: false).

Initialise OllamaProvider from environment variables.

Raises:

Type Description
ImportError

If httpx is not installed.

Source code in src/markdown_vault_mcp/providers.py
def __init__(self) -> None:
    """Initialise OllamaProvider from environment variables.

    Raises:
        ImportError: If ``httpx`` is not installed.
    """
    try:
        import httpx
    except ImportError as exc:
        raise ImportError(
            "OllamaProvider requires 'httpx'. "
            "Install it with: pip install 'markdown-vault-mcp[embeddings-api]'"
        ) from exc

    self._httpx = httpx
    self._host = os.environ.get("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
    self._model = os.environ.get(f"{_ENV_PREFIX}_OLLAMA_MODEL", "nomic-embed-text")
    cpu_only_raw = os.environ.get(f"{_ENV_PREFIX}_OLLAMA_CPU_ONLY", "false").lower()
    self._cpu_only = cpu_only_raw in ("1", "true", "yes")
    self._dimension: int | None = None

    logger.debug(
        "OllamaProvider initialised: host=%s model=%s cpu_only=%s",
        self._host,
        self._model,
        self._cpu_only,
    )

dimension property

Embedding dimension size.

Embeds a test string on first access to determine the dimension.

Returns:

Type Description
int

Integer dimension of each embedding vector.

embed(texts)

Embed a batch of texts via the Ollama REST API.

Parameters:

Name Type Description Default
texts list[str]

List of strings to embed.

required

Returns:

Type Description
list[list[float]]

List of embedding vectors, one per input text.

Raises:

Type Description
RuntimeError

If the Ollama API returns an error response.

Source code in src/markdown_vault_mcp/providers.py
def embed(self, texts: list[str]) -> list[list[float]]:
    """Embed a batch of texts via the Ollama REST API.

    Args:
        texts: List of strings to embed.

    Returns:
        List of embedding vectors, one per input text.

    Raises:
        RuntimeError: If the Ollama API returns an error response.
    """
    payload: dict[str, object] = {"model": self._model, "input": texts}
    if self._cpu_only:
        payload["options"] = {"num_gpu": 0}

    url = f"{self._host}/api/embed"
    logger.debug("POST %s model=%s texts=%d", url, self._model, len(texts))

    with self._httpx.Client() as client:
        response = client.post(url, json=payload, timeout=30.0)

    if response.status_code != 200:
        raise RuntimeError(
            f"Ollama API error {response.status_code}: {response.text}"
        )

    data = response.json()
    embeddings: list[list[float]] = data["embeddings"]

    # Cache dimension from first successful call.
    if self._dimension is None and embeddings:
        self._dimension = len(embeddings[0])

    return embeddings

OpenAIProvider()

Bases: EmbeddingProvider

Embedding provider backed by the OpenAI Embeddings API.

Configuration via environment variables:

  • OPENAI_API_KEY: required API key.

Uses the text-embedding-3-small model.

Initialise OpenAIProvider from environment variables.

Raises:

Type Description
ImportError

If httpx is not installed.

RuntimeError

If OPENAI_API_KEY is not set.

Source code in src/markdown_vault_mcp/providers.py
def __init__(self) -> None:
    """Initialise OpenAIProvider from environment variables.

    Raises:
        ImportError: If ``httpx`` is not installed.
        RuntimeError: If ``OPENAI_API_KEY`` is not set.
    """
    try:
        import httpx
    except ImportError as exc:
        raise ImportError(
            "OpenAIProvider requires 'httpx'. "
            "Install it with: pip install 'markdown-vault-mcp[embeddings-api]'"
        ) from exc

    self._httpx = httpx
    api_key = os.environ.get("OPENAI_API_KEY")
    if not api_key:
        raise RuntimeError(
            "OpenAIProvider requires the OPENAI_API_KEY environment variable."
        )
    self._api_key = api_key
    self._dimension: int | None = None

    logger.debug("OpenAIProvider initialised: model=%s", self._MODEL)

dimension property

Embedding dimension size.

Embeds a test string on first access to determine the dimension.

Returns:

Type Description
int

Integer dimension of each embedding vector.

embed(texts)

Embed a batch of texts via the OpenAI Embeddings API.

Parameters:

Name Type Description Default
texts list[str]

List of strings to embed.

required

Returns:

Type Description
list[list[float]]

List of embedding vectors in input order.

Raises:

Type Description
RuntimeError

If the OpenAI API returns an error response.

Source code in src/markdown_vault_mcp/providers.py
def embed(self, texts: list[str]) -> list[list[float]]:
    """Embed a batch of texts via the OpenAI Embeddings API.

    Args:
        texts: List of strings to embed.

    Returns:
        List of embedding vectors in input order.

    Raises:
        RuntimeError: If the OpenAI API returns an error response.
    """
    payload = {"input": texts, "model": self._MODEL}
    headers = {
        "Authorization": f"Bearer {self._api_key}",
        "Content-Type": "application/json",
    }

    logger.debug(
        "POST %s model=%s texts=%d", self._ENDPOINT, self._MODEL, len(texts)
    )

    with self._httpx.Client() as client:
        response = client.post(
            self._ENDPOINT, json=payload, headers=headers, timeout=30.0
        )

    if response.status_code != 200:
        raise RuntimeError(
            f"OpenAI API error {response.status_code}: {response.text}"
        )

    data = response.json()
    # Sort by index to guarantee input order is preserved.
    items: list[dict] = sorted(data["data"], key=lambda d: d["index"])
    embeddings: list[list[float]] = [item["embedding"] for item in items]

    # Cache dimension from first successful call.
    if self._dimension is None and embeddings:
        self._dimension = len(embeddings[0])

    return embeddings

FastEmbedProvider(model_name=None, cache_dir=None)

Bases: EmbeddingProvider

Embedding provider backed by the local fastembed library.

The fastembed package is imported lazily at instantiation time so that it does not need to be installed unless this provider is used.

Initialise FastEmbed model.

Parameters:

Name Type Description Default
model_name str | None

FastEmbed model identifier.

None
cache_dir str | None

Optional model cache directory.

None

Raises:

Type Description
ImportError

If fastembed is not installed.

Source code in src/markdown_vault_mcp/providers.py
def __init__(
    self,
    model_name: str | None = None,
    cache_dir: str | None = None,
) -> None:
    """Initialise FastEmbed model.

    Args:
        model_name: FastEmbed model identifier.
        cache_dir: Optional model cache directory.

    Raises:
        ImportError: If ``fastembed`` is not installed.
    """
    try:
        from fastembed import TextEmbedding
    except ImportError as exc:
        raise ImportError(
            "FastEmbedProvider requires 'fastembed'. "
            "Install it with: pip install 'markdown-vault-mcp[embeddings]'"
        ) from exc

    self._model_name = model_name or os.environ.get(
        f"{_ENV_PREFIX}_FASTEMBED_MODEL", "nomic-ai/nomic-embed-text-v1.5"
    )
    self._cache_dir = cache_dir or os.environ.get(
        f"{_ENV_PREFIX}_FASTEMBED_CACHE_DIR"
    )
    kwargs: dict[str, object] = {"model_name": self._model_name}
    if self._cache_dir:
        kwargs["cache_dir"] = self._cache_dir
    self._model = TextEmbedding(**kwargs)
    self._dimension: int | None = None
    logger.debug(
        "FastEmbedProvider initialised: model=%s cache_dir=%s",
        self._model_name,
        self._cache_dir,
    )

dimension property

Embedding dimension size from the loaded model.

Returns:

Type Description
int

Integer dimension of each embedding vector.

embed(texts)

Embed a batch of texts using the local fastembed model.

Parameters:

Name Type Description Default
texts list[str]

List of strings to embed.

required

Returns:

Type Description
list[list[float]]

List of embedding vectors, one per input text.

Source code in src/markdown_vault_mcp/providers.py
def embed(self, texts: list[str]) -> list[list[float]]:
    """Embed a batch of texts using the local fastembed model.

    Args:
        texts: List of strings to embed.

    Returns:
        List of embedding vectors, one per input text.
    """
    vectors = [
        vector.tolist()
        for vector in self._model.embed(
            texts, batch_size=_FASTEMBED_ONNX_BATCH_SIZE
        )
    ]
    if self._dimension is None and vectors:
        self._dimension = len(vectors[0])
    return vectors

get_embedding_provider()

Auto-detect and return an embedding provider.

Checks the EMBEDDING_PROVIDER environment variable first. When that variable is not set, probes for available providers in this order:

  1. If OPENAI_API_KEY is set → :class:OpenAIProvider.
  2. If Ollama is reachable at OLLAMA_HOST → :class:OllamaProvider.
  3. If fastembed can be imported → :class:FastEmbedProvider.
  4. Raises :class:RuntimeError with installation instructions.

Returns:

Type Description
EmbeddingProvider

An initialised :class:EmbeddingProvider instance.

Raises:

Type Description
RuntimeError

If no provider is available and EMBEDDING_PROVIDER is not set, or if the explicitly requested provider cannot be initialised.

ValueError

If EMBEDDING_PROVIDER is set to an unrecognised value.

Source code in src/markdown_vault_mcp/providers.py
def get_embedding_provider() -> EmbeddingProvider:
    """Auto-detect and return an embedding provider.

    Checks the ``EMBEDDING_PROVIDER`` environment variable first. When that
    variable is not set, probes for available providers in this order:

    1. If ``OPENAI_API_KEY`` is set → :class:`OpenAIProvider`.
    2. If Ollama is reachable at ``OLLAMA_HOST`` → :class:`OllamaProvider`.
    3. If ``fastembed`` can be imported →
       :class:`FastEmbedProvider`.
    4. Raises :class:`RuntimeError` with installation instructions.

    Returns:
        An initialised :class:`EmbeddingProvider` instance.

    Raises:
        RuntimeError: If no provider is available and ``EMBEDDING_PROVIDER``
            is not set, or if the explicitly requested provider cannot be
            initialised.
        ValueError: If ``EMBEDDING_PROVIDER`` is set to an unrecognised value.
    """
    explicit = os.environ.get("EMBEDDING_PROVIDER", "").strip().lower()

    if explicit == "openai":
        logger.info("Using OpenAIProvider (EMBEDDING_PROVIDER=openai)")
        return OpenAIProvider()

    if explicit == "ollama":
        logger.info("Using OllamaProvider (EMBEDDING_PROVIDER=ollama)")
        return OllamaProvider()

    if explicit == "fastembed":
        logger.info(
            "Using FastEmbedProvider (EMBEDDING_PROVIDER=%s)",
            explicit,
        )
        return FastEmbedProvider()

    if explicit:
        raise ValueError(
            f"Unrecognised EMBEDDING_PROVIDER value: {explicit!r}. "
            "Valid values: 'openai', 'ollama', 'fastembed'."
        )

    # Auto-detect: OpenAI API key present?
    if os.environ.get("OPENAI_API_KEY"):
        logger.info("Auto-detected OpenAIProvider (OPENAI_API_KEY is set)")
        return OpenAIProvider()

    # Auto-detect: Ollama reachable?
    host = os.environ.get("OLLAMA_HOST", "http://localhost:11434").rstrip("/")
    try:
        import httpx

        with httpx.Client(timeout=2.0) as client:
            response = client.get(f"{host}/api/tags")
        if response.status_code == 200:
            logger.info("Auto-detected OllamaProvider (Ollama reachable at %s)", host)
            return OllamaProvider()
    except Exception:
        logger.debug("Ollama not reachable at %s, skipping", host)

    # Auto-detect: fastembed importable?
    try:
        import fastembed  # noqa: F401

        logger.info("Auto-detected FastEmbedProvider")
        return FastEmbedProvider()
    except ImportError:
        logger.debug("fastembed not available, skipping")

    raise RuntimeError(
        "No embedding provider is available. Install one of:\n"
        "  pip install 'markdown-vault-mcp[embeddings-api]'  # httpx for Ollama or OpenAI\n"
        "  pip install 'markdown-vault-mcp[embeddings]'       # fastembed (local)\n"
        "Or set OPENAI_API_KEY for the OpenAI provider, "
        "or start an Ollama server for the Ollama provider."
    )