AI Integration

Building AI-powered features with LLMs and vector stores

AI Integration

Add AI capabilities to your SaaS application with LLMs and vector databases.

Supported Providers

  • OpenAI (GPT-4, GPT-3.5)
  • Anthropic (Claude)
  • Google AI (Gemini)
  • Cloudflare Workers AI
  • Ollama (self-hosted)

Configuration

Environment Variables

# OpenAI
OPENAI_API_KEY=sk-xxx

# Anthropic
ANTHROPIC_API_KEY=sk-ant-xxx

# Cloudflare Workers AI
CLOUDFLARE_ACCOUNT_ID=your-account-id
CLOUDFLARE_AI_API_TOKEN=your-token

Basic Chat Completion

// server/api/ai/chat.post.ts
import OpenAI from 'openai'

const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY
})

export default defineEventHandler(async (event) => {
  const { message } = await readBody(event)
  
  const completion = await openai.chat.completions.create({
    model: 'gpt-4',
    messages: [
      { role: 'system', content: 'You are a helpful assistant.' },
      { role: 'user', content: message }
    ]
  })
  
  return {
    response: completion.choices[0].message.content
  }
})

Streaming Responses

export default defineEventHandler(async (event) => {
  const { message } = await readBody(event)
  
  const stream = await openai.chat.completions.create({
    model: 'gpt-4',
    messages: [{ role: 'user', content: message }],
    stream: true
  })
  
  const encoder = new TextEncoder()
  const readable = new ReadableStream({
    async start(controller) {
      for await (const chunk of stream) {
        const text = chunk.choices[0]?.delta?.content || ''
        controller.enqueue(encoder.encode(text))
      }
      controller.close()
    }
  })
  
  return readable
})

Setup Vectorize (Cloudflare)

# Create vector index
wrangler vectorize create my-index \
  --dimensions=1536 \
  --metric=cosine

Embedding Generation

async function generateEmbedding(text: string) {
  const response = await openai.embeddings.create({
    model: 'text-embedding-3-small',
    input: text
  })
  
  return response.data[0].embedding
}
// Store embeddings
async function storeDocument(text: string, metadata: any) {
  const embedding = await generateEmbedding(text)
  
  await env.VECTORIZE.insert([{
    id: crypto.randomUUID(),
    values: embedding,
    metadata: { text, ...metadata }
  }])
}

// Search similar documents
async function searchDocuments(query: string, limit = 5) {
  const queryEmbedding = await generateEmbedding(query)
  
  const results = await env.VECTORIZE.query(queryEmbedding, {
    topK: limit
  })
  
  return results.matches
}

RAG (Retrieval Augmented Generation)

export default defineEventHandler(async (event) => {
  const { question } = await readBody(event)
  
  // 1. Find relevant documents
  const relevantDocs = await searchDocuments(question)
  
  // 2. Build context from documents
  const context = relevantDocs
    .map(doc => doc.metadata.text)
    .join('\n\n')
  
  // 3. Generate answer with context
  const completion = await openai.chat.completions.create({
    model: 'gpt-4',
    messages: [
      {
        role: 'system',
        content: `Answer questions based on this context:\n\n${context}`
      },
      {
        role: 'user',
        content: question
      }
    ]
  })
  
  return {
    answer: completion.choices[0].message.content,
    sources: relevantDocs
  }
})

Function Calling

const tools = [{
  type: 'function',
  function: {
    name: 'get_weather',
    description: 'Get current weather for a location',
    parameters: {
      type: 'object',
      properties: {
        location: {
          type: 'string',
          description: 'City name'
        }
      },
      required: ['location']
    }
  }
}]

const completion = await openai.chat.completions.create({
  model: 'gpt-4',
  messages: [{ role: 'user', content: 'What\'s the weather in Paris?' }],
  tools
})

Image Generation

// Generate image with DALL-E
export default defineEventHandler(async (event) => {
  const { prompt } = await readBody(event)
  
  const response = await openai.images.generate({
    model: 'dall-e-3',
    prompt,
    n: 1,
    size: '1024x1024'
  })
  
  return {
    url: response.data[0].url
  }
})

Best Practices

  1. Implement rate limiting
  2. Cache responses when possible
  3. Handle errors gracefully
  4. Monitor token usage
  5. Set appropriate timeouts
  6. Sanitize user input
  7. Add content moderation
  8. Track costs per user

Next Steps