AI Integration

Add AI capabilities to your SaaS application with LLMs and vector databases.

Supported Providers

OpenAI (GPT-4, GPT-3.5)
Anthropic (Claude)
Google AI (Gemini)
Cloudflare Workers AI
Ollama (self-hosted)

Configuration

Environment Variables

# OpenAI
OPENAI_API_KEY=sk-xxx

# Anthropic
ANTHROPIC_API_KEY=sk-ant-xxx

# Cloudflare Workers AI
CLOUDFLARE_ACCOUNT_ID=your-account-id
CLOUDFLARE_AI_API_TOKEN=your-token

Basic Chat Completion

// server/api/ai/chat.post.ts
import OpenAI from 'openai'

const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY
})

export default defineEventHandler(async (event) => {
  const { message } = await readBody(event)
  
  const completion = await openai.chat.completions.create({
    model: 'gpt-4',
    messages: [
      { role: 'system', content: 'You are a helpful assistant.' },
      { role: 'user', content: message }
    ]
  })
  
  return {
    response: completion.choices[0].message.content
  }
})

Streaming Responses

export default defineEventHandler(async (event) => {
  const { message } = await readBody(event)
  
  const stream = await openai.chat.completions.create({
    model: 'gpt-4',
    messages: [{ role: 'user', content: message }],
    stream: true
  })
  
  const encoder = new TextEncoder()
  const readable = new ReadableStream({
    async start(controller) {
      for await (const chunk of stream) {
        const text = chunk.choices[0]?.delta?.content || ''
        controller.enqueue(encoder.encode(text))
      }
      controller.close()
    }
  })
  
  return readable
})

Vector Search

Setup Vectorize (Cloudflare)

# Create vector index
wrangler vectorize create my-index \
  --dimensions=1536 \
  --metric=cosine

Embedding Generation

async function generateEmbedding(text: string) {
  const response = await openai.embeddings.create({
    model: 'text-embedding-3-small',
    input: text
  })
  
  return response.data[0].embedding
}

Store and Search

// Store embeddings
async function storeDocument(text: string, metadata: any) {
  const embedding = await generateEmbedding(text)
  
  await env.VECTORIZE.insert([{
    id: crypto.randomUUID(),
    values: embedding,
    metadata: { text, ...metadata }
  }])
}

// Search similar documents
async function searchDocuments(query: string, limit = 5) {
  const queryEmbedding = await generateEmbedding(query)
  
  const results = await env.VECTORIZE.query(queryEmbedding, {
    topK: limit
  })
  
  return results.matches
}

RAG (Retrieval Augmented Generation)

export default defineEventHandler(async (event) => {
  const { question } = await readBody(event)
  
  // 1. Find relevant documents
  const relevantDocs = await searchDocuments(question)
  
  // 2. Build context from documents
  const context = relevantDocs
    .map(doc => doc.metadata.text)
    .join('\n\n')
  
  // 3. Generate answer with context
  const completion = await openai.chat.completions.create({
    model: 'gpt-4',
    messages: [
      {
        role: 'system',
        content: `Answer questions based on this context:\n\n${context}`
      },
      {
        role: 'user',
        content: question
      }
    ]
  })
  
  return {
    answer: completion.choices[0].message.content,
    sources: relevantDocs
  }
})

Function Calling

const tools = [{
  type: 'function',
  function: {
    name: 'get_weather',
    description: 'Get current weather for a location',
    parameters: {
      type: 'object',
      properties: {
        location: {
          type: 'string',
          description: 'City name'
        }
      },
      required: ['location']
    }
  }
}]

const completion = await openai.chat.completions.create({
  model: 'gpt-4',
  messages: [{ role: 'user', content: 'What\'s the weather in Paris?' }],
  tools
})

Image Generation

// Generate image with DALL-E
export default defineEventHandler(async (event) => {
  const { prompt } = await readBody(event)
  
  const response = await openai.images.generate({
    model: 'dall-e-3',
    prompt,
    n: 1,
    size: '1024x1024'
  })
  
  return {
    url: response.data[0].url
  }
})

Best Practices

Implement rate limiting
Cache responses when possible
Handle errors gracefully
Monitor token usage
Set appropriate timeouts
Sanitize user input
Add content moderation
Track costs per user