Quickstart

This guide will walk you through installing Embex, generating real embeddings, and performing semantic search.

We recommend starting with LanceDB because it runs embedded locally—no Docker, no cloud setup, and no API keys required!

1. Installation

Install the Embex client and an embedding library.

Python
Node.js

pip install embex lancedb sentence-transformers

npm install @bridgerust/embex lancedb @xenova/transformers

2. Write Code

Create a file named quickstart.py or quickstart.ts. We will use a small, efficient model (all-MiniLM-L6-v2) to generate embeddings locally.

Python
Node.js

import asyncio
from embex import EmbexClient, Vector
from sentence_transformers import SentenceTransformer

async def main():
    # 1. Setup Embedding Model
    model = SentenceTransformer('all-MiniLM-L6-v2')

    # 2. Initialize Client (uses LanceDB by default)
    client = await EmbexClient.new_async(provider="lancedb", url="./data")

    # 3. Create Collection
    # 'all-MiniLM-L6-v2' produces 384-dimensional vectors
    await client.create_collection("products", dimension=384)

    # 4. Prepare Data
    documents = [
        {"id": "1", "text": "Apple iPhone 14", "category": "electronics"},
        {"id": "2", "text": "Samsung Galaxy S23", "category": "electronics"},
        {"id": "3", "text": "Organic Bananas", "category": "groceries"},
    ]

    # 5. Generate Embeddings & Insert
    vectors = []
    for doc in documents:
        embedding = model.encode(doc["text"]).tolist()
        vectors.append(Vector(
            id=doc["id"],
            vector=embedding,
            metadata={"text": doc["text"], "category": doc["category"]}
        ))

    await client.insert("products", vectors)

    # 6. Semantic Search
    query_text = "smartphone"
    query_vector = model.encode(query_text).tolist()

    results = await client.search(
        collection_name="products",
        vector=query_vector,
        limit=1
    )

    print(f"Query: '{query_text}'")
    print(f"Match: {results[0].metadata['text']}")

if __name__ == "__main__":
    asyncio.run(main())

import { EmbexClient, Vector } from "@bridgerust/embex";
import { pipeline } from "@xenova/transformers";

async function main() {
    // 1. Setup Embedding Model
    const generateEmbedding = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');

    // Helper to get array from tensor
    const embed = async (text: string) => {
        const output = await generateEmbedding(text, { pooling: 'mean', normalize: true });
        return Array.from(output.data);
    };

    // 2. Initialize Client
    const client = await EmbexClient.newAsync("lancedb", "./data");

    // 3. Create Collection (384 dimensions for MiniLM)
    await client.createCollection("products", 384);

    // 4. Prepare Data
    const documents = [
        { id: "1", text: "Apple iPhone 14", category: "electronics" },
        { id: "2", text: "Samsung Galaxy S23", category: "electronics" },
        { id: "3", text: "Organic Bananas", category: "groceries" },
    ];

    // 5. Generate Embeddings & Insert
    const vectors: Vector[] = [];
    for (const doc of documents) {
        vectors.push({
            id: doc.id,
            vector: await embed(doc.text),
            metadata: { text: doc.text, category: doc.category }
        });
    }

    await client.insert("products", vectors);

    // 6. Semantic Search
    const queryText = "smartphone";
    const queryVector = await embed(queryText);

    const results = await client.search({
        collection_name: "products",
        vector: queryVector,
        limit: 1
    });

    console.log(`Query: '${queryText}'`);
    console.log(`Match: ${results[0].metadata.text}`);
}

main();

python quickstart.py

npx tsx quickstart.ts

You just built a semantic search engine! Notice how searching for “smartphone” found “Apple iPhone 14” even though the word “smartphone” isn’t in the text? That’s the power of vector embeddings. 🚀

Next Steps

Deploy to Production: Ready to scale? Switch to Qdrant or Pinecone by changing just the initialization line.
Learn Core Concepts: Understand Collections and Filters.