Paste ofCode

# Importing Embeddings
import chromadb
import time
from chromadb import Documents, EmbeddingFunction, Embeddings
from google.api_core import retry
from google.genai import types

# Retry helper for API errors
is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

class GeminiEmbeddingFunction(EmbeddingFunction):
    # Specify whether to generate embeddings for documents, or queries
    document_mode = True

    @retry.Retry(predicate=is_retriable)
    def __call__(self, input: Documents) -> Embeddings:
        # Define the embedding task based on document mode
        embedding_task = "retrieval_document" if self.document_mode else "retrieval_query"

        # Make the API call to Google GenAI for embeddings
        response = client.models.embed_content(
            model="models/text-embedding-004",
            contents=input,
            config=types.EmbedContentConfig(task_type=embedding_task),
        )
        
        # Return embeddings
        return [e.values for e in response.embeddings]

# Initialize Chroma client
chroma_client = chromadb.Client(chromadb.config.Settings(
    persist_directory="./chroma_db"  # Set the path for persistence
))

# Get or create the collection in ChromaDB
DB_NAME = "googlecardb"
embed_fn = GeminiEmbeddingFunction()
embed_fn.document_mode = True
db = chroma_client.get_or_create_collection(name=DB_NAME, embedding_function=embed_fn)

# Add documents to the collection
documents = concatenated_list  # Assuming `concatenated_list` is already defined

for i, doc in enumerate(documents):
    db.add(documents=[doc], ids=[str(i)])
    time.sleep(0.5)  # Add a delay after each successful addition
    print(f"Added document with ID: {i}, Content (first 100 chars): {str(doc[:100])}")

# Persist the entire Chroma client, not just the collection
#db.persist()
#or
#client = chromadb.PersistentClient(path="./output")

print(f"\nFinished adding {len(documents)} documents to the '{DB_NAME}' collection.")

# Check how many documents are in the collection
print(f"Collection contains {db.count()} documents.")

# Peek at a sample document in the collection
print(f"Sample document: {db.peek(1)}")