feat: FluxAI multi-step autonomy + rate limiting + image pipeline

Two production-grade hardening additions and one cost optimisation. FLUXAI AUTONOMY RESTORED (api/chat) - Brings back the multi-step agentic flow that the system prompt was always designed for. The "temporarily removed maxSteps" comment is gone — replaced with the AI SDK 6 equivalent stopWhen: stepCountIs(5). - Cap at 5 chained tool calls per turn bounds latency + LLM cost. - maxDuration raised 30s → 60s to absorb tool-chain runs. - Result: one user prompt now triggers, e.g. search_installations → energy_savings_calculator → show_case_study → schedule_consultation in a single turn — exactly the SPIN methodology in the prompt. RATE LIMITING (src/lib/rateLimit.ts + api/chat) - Token-bucket per IP: 30 messages burst, sustained 30/minute. Trips to 429 with Retry-After + X-RateLimit-Remaining headers when abused. - IP extracted from x-forwarded-for (Nginx already passes this). - In-memory Map with 10-min GC of stale buckets — no Redis dep. If we scale to multiple replicas later, swap the Map for Upstash. - Protects the OpenAI quota from someone hammering the chat endpoint. IMAGE PIPELINE (src/lib/imageOptimizer.ts) - sharp-based optimizer: auto-orient (EXIF), cap at 2560px long side, re-encode WebP@85, content-hash filename. Re-uploads with same content reuse the same hash; new content gets a new URL — perfect cache invalidation without header tricks. - Opt-in via optimize=1 form/query param on /api/assets POST. - Hero CMS and Site Settings uploads turn it on automatically (those are user-facing brand assets where compression matters most). - App/news/parts uploads remain untouched (editors may be uploading CAD drawings, datasheets, etc. that shouldn't be transcoded). - Falls back gracefully to a no-op for unsupported formats (SVG, GIF, videos, anything sharp can't decode) so it never breaks an upload. DOCKERFILE - Adds vips/vips-dev for sharp on Alpine + --include=optional so the @img/sharp-linuxmusl-x64 prebuilt is downloaded - Explicitly copies node_modules/sharp + node_modules/@img to the runner stage (Next.js trace can miss conditional deps). NO DB SCHEMA CHANGES.
2026-05-04 14:48:37 -05:00
parent 09e6d0c7cf
commit a199891a3c
9 changed files with 303 additions and 25 deletions
@@ -31,6 +31,7 @@ import { NextRequest, NextResponse } from "next/server";
 import fs from "fs";
 import path from "path";
 import { revalidateContent, type RevalidateScope } from "@/lib/revalidate";
+import { optimizeImage, isOptimizable } from "@/lib/imageOptimizer";

 const SCOPE_ROOTS: Record<string, string> = {
  applications: path.join(process.cwd(), "public", "applications"),
@@ -186,6 +187,13 @@ export async function GET(request: NextRequest) {
 }

 // POST — Upload a file
+//
+// Optional query / form param `optimize=true` (or `optimize=1`) routes the
+// upload through the sharp pipeline: auto-orient, cap at 2560px, encode to
+// WebP, and save under a content-hashed filename. The same image always
+// produces the same hash, so re-uploading is idempotent. Different content
+// produces a different hash, so the browser cache invalidates instantly
+// without any header trickery.
 export async function POST(request: NextRequest) {
  try {
    const formData = await request.formData();
@@ -194,6 +202,12 @@ export async function POST(request: NextRequest) {
    const subPath = formData.get("path") as string || "";
    const file = formData.get("file") as File;

+    // Two ways to opt into optimization: ?optimize=1 query or form field "optimize".
+    const optFlag =
+      formData.get("optimize") ??
+      new URL(request.url).searchParams.get("optimize");
+    const shouldOptimize = optFlag === "true" || optFlag === "1" || optFlag === "on";
+
    if (!file) return NextResponse.json({ error: "Missing file" }, { status: 400 });
    if (!SCOPE_ROOTS[scope]) return NextResponse.json({ error: "Invalid scope" }, { status: 400 });
    if (!FLAT_SCOPES.has(scope) && !slug) return NextResponse.json({ error: "Missing slug" }, { status: 400 });
@@ -211,13 +225,26 @@ export async function POST(request: NextRequest) {

    fs.mkdirSync(dirPath, { recursive: true });

-    const safeName = file.name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9._-]/g, "");
-    const filePath = path.join(dirPath, safeName);
+    const inputBuffer: Buffer = Buffer.from(await file.arrayBuffer());
+
+    // Optimization branch: replace filename with a content-hashed WebP one.
+    let saveName = file.name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9._-]/g, "");
+    let outputBuffer: Buffer | Uint8Array = inputBuffer;
+    let optimizedMeta: { width: number | null; height: number | null; bytes: number } | null = null;
+
+    if (shouldOptimize && isOptimizable(file.name)) {
+      const opt = await optimizeImage(inputBuffer, file.name);
+      saveName = opt.filename;
+      outputBuffer = opt.buffer;
+      optimizedMeta = { width: opt.width, height: opt.height, bytes: opt.bytes };
+    }
+
+    const filePath = path.join(dirPath, saveName);
    const existed = fs.existsSync(filePath);

-    fs.writeFileSync(filePath, Buffer.from(await file.arrayBuffer()));
+    fs.writeFileSync(filePath, outputBuffer);

-    const rel = subPath ? `${subPath}/${safeName}` : safeName;
+    const rel = subPath ? `${subPath}/${saveName}` : saveName;

    // 🔥 Invalida caché para que la imagen aparezca sin recompilar
    revalidateContent({ scope: scope as RevalidateScope, slug });
@@ -225,12 +252,21 @@ export async function POST(request: NextRequest) {
    return NextResponse.json({
      success: true,
      file: {
-        name: safeName,
+        name: saveName,
        publicUrl: buildPublicUrl(scope, slug, rel),
        path: rel,
-        mediaType: getFileType(safeName),
-        size: getFileSize(file.size),
+        mediaType: getFileType(saveName),
+        size: getFileSize(outputBuffer.byteLength),
        overwritten: existed,
+        optimized: optimizedMeta !== null,
+        ...(optimizedMeta
+          ? {
+              width: optimizedMeta.width,
+              height: optimizedMeta.height,
+              originalBytes: file.size,
+              savedBytes: file.size - optimizedMeta.bytes,
+            }
+          : {}),
      }
    });
  } catch (error) {
@@ -1,9 +1,10 @@
 import { openai } from '@ai-sdk/openai';
-import { streamText, UIMessage, convertToModelMessages, tool } from 'ai';
+import { streamText, stepCountIs, UIMessage, convertToModelMessages, tool } from 'ai';
 import { z } from 'zod';
 import { prisma } from '@/lib/prisma';
+import { checkChatRateLimit } from '@/lib/rateLimit';

-export const maxDuration = 30;
+export const maxDuration = 60;

 // ─── PHYSICS CONSTANTS (NOT from DB — these are engineering benchmarks) ──────
 // These stay hardcoded because they are physical/scientific constants,
@@ -150,6 +151,25 @@ function industryFromSlug(slug: string): string {
 // ─── ROUTE HANDLER ──────────────────────────────────────────────

 export async function POST(req: Request) {
+  // ─── Rate limit (per-IP token bucket, 30 req/min) ──────────────
+  const rate = checkChatRateLimit(req);
+  if (!rate.ok) {
+    return new Response(
+      JSON.stringify({
+        error: "Too many requests. Please slow down.",
+        retryAfterSec: rate.retryAfterSec,
+      }),
+      {
+        status: 429,
+        headers: {
+          "Content-Type": "application/json",
+          "Retry-After": String(rate.retryAfterSec),
+          "X-RateLimit-Remaining": String(rate.remaining),
+        },
+      }
+    );
+  }
+
  const { messages, context }: {
    messages: UIMessage[];
    context?: { section?: string; activeTab?: string };
@@ -168,7 +188,11 @@ export async function POST(req: Request) {
    model: openai('gpt-4o'),
    system: systemPrompt + contextNote,
    messages: coreMessages,
-    // maxSteps has been temporarily removed to ensure compatibility with the installed AI SDK version
+    // 🔥 RESTORED: AI SDK 6 multi-step autonomy. The agent can now chain
+    // search → calculator → case-study → consultation in a single turn,
+    // exactly as the SPIN methodology in the system prompt was designed for.
+    // Cap at 5 steps to bound LLM cost and latency.
+    stopWhen: stepCountIs(5),
    tools: {

      // ══════════════════════════════════════════════════════════════
@@ -66,6 +66,7 @@ export default function HeroDashboard() {
    try {
      const fd = new FormData();
      fd.append("scope", "footage");
+      fd.append("optimize", "1");
      fd.append("file", file);
      const res = await fetch("/api/assets", { method: "POST", body: fd });
      const data = await res.json();
@@ -389,6 +389,7 @@ function ImageField({
    try {
      const fd = new FormData();
      fd.append("scope", "branding");
+      fd.append("optimize", "1");
      fd.append("file", file);
      const res = await fetch("/api/assets", { method: "POST", body: fd });
      const data = await res.json();
@@ -0,0 +1,128 @@
+// src/lib/imageOptimizer.ts
+// ─────────────────────────────────────────────────────────────────────────────
+// Server-side image optimization for CMS uploads.
+//
+// What it does:
+//   - Auto-orients (respects EXIF rotation from phone cameras)
+//   - Caps very large images at 2560px on the long side (no point storing
+//     phone megapixels — Next.js Image Optimizer will downsize on the fly)
+//   - Re-encodes to WebP at quality 85 (typically 60–80% smaller than JPEG)
+//   - Computes a content-hash filename so the same image can never collide
+//     with itself across re-uploads, AND new versions get a new URL (perfect
+//     cache invalidation on the browser side)
+//
+// What it does NOT do:
+//   - Generate responsive variants — next/image handles that automatically
+//   - Touch GIFs or videos — those pass through unchanged
+//   - Remove the original — the optimized buffer fully replaces it
+// ─────────────────────────────────────────────────────────────────────────────
+
+import "server-only";
+import sharp from "sharp";
+import crypto from "crypto";
+import path from "path";
+
+const MAX_LONG_SIDE = 2560;
+const WEBP_QUALITY = 85;
+
+// File extensions sharp can decode and we want to optimize.
+const OPTIMIZABLE = new Set([".jpg", ".jpeg", ".png", ".webp", ".tiff", ".heic", ".heif"]);
+
+export interface OptimizedImage {
+  buffer: Buffer;
+  ext: string;            // ".webp" for optimized, original ext otherwise
+  filename: string;       // sanitized name with content hash, e.g. "hero-9f3a2c.webp"
+  width: number | null;
+  height: number | null;
+  bytes: number;
+}
+
+export function isOptimizable(filename: string): boolean {
+  return OPTIMIZABLE.has(path.extname(filename).toLowerCase());
+}
+
+function sanitizeBaseName(name: string): string {
+  const withoutExt = name.replace(/\.[^.]+$/, "");
+  return (
+    withoutExt
+      .toLowerCase()
+      .replace(/\s+/g, "-")
+      .replace(/[^a-z0-9._-]/g, "")
+      .replace(/-+/g, "-")
+      .replace(/^[-.]+|[-.]+$/g, "")
+      .slice(0, 60) || "image"
+  );
+}
+
+function shortHash(buffer: Buffer, length = 8): string {
+  return crypto.createHash("sha256").update(buffer).digest("hex").slice(0, length);
+}
+
+/**
+ * Optimize an uploaded image buffer.
+ * Falls back to a no-op (returns the original) if sharp can't decode the file
+ * or the extension isn't in OPTIMIZABLE — this keeps SVGs / GIFs / videos /
+ * unsupported formats working transparently.
+ */
+export async function optimizeImage(
+  inputBuffer: Buffer,
+  originalFilename: string
+): Promise<OptimizedImage> {
+  const ext = path.extname(originalFilename).toLowerCase();
+  const baseName = sanitizeBaseName(originalFilename);
+
+  if (!OPTIMIZABLE.has(ext)) {
+    const hash = shortHash(inputBuffer);
+    return {
+      buffer: inputBuffer,
+      ext,
+      filename: `${baseName}-${hash}${ext}`,
+      width: null,
+      height: null,
+      bytes: inputBuffer.byteLength,
+    };
+  }
+
+  try {
+    const pipeline = sharp(inputBuffer, { failOn: "none" }).rotate(); // honour EXIF
+
+    const meta = await pipeline.metadata();
+    const longSide = Math.max(meta.width || 0, meta.height || 0);
+
+    let processed = pipeline;
+    if (longSide > MAX_LONG_SIDE) {
+      processed = processed.resize({
+        width: meta.width && meta.width >= meta.height! ? MAX_LONG_SIDE : undefined,
+        height: meta.height && meta.height > meta.width! ? MAX_LONG_SIDE : undefined,
+        withoutEnlargement: true,
+        fit: "inside",
+      });
+    }
+
+    const out = await processed
+      .webp({ quality: WEBP_QUALITY, effort: 4 })
+      .toBuffer({ resolveWithObject: true });
+
+    const hash = shortHash(out.data);
+
+    return {
+      buffer: out.data,
+      ext: ".webp",
+      filename: `${baseName}-${hash}.webp`,
+      width: out.info.width,
+      height: out.info.height,
+      bytes: out.data.byteLength,
+    };
+  } catch (error) {
+    console.warn(`[imageOptimizer] Failed to optimize "${originalFilename}", keeping original:`, error);
+    const hash = shortHash(inputBuffer);
+    return {
+      buffer: inputBuffer,
+      ext,
+      filename: `${baseName}-${hash}${ext}`,
+      width: null,
+      height: null,
+      bytes: inputBuffer.byteLength,
+    };
+  }
+}
@@ -0,0 +1,86 @@
+// src/lib/rateLimit.ts
+// ─────────────────────────────────────────────────────────────────────────────
+// Lightweight in-memory rate limiter (token bucket per IP).
+// Single Node process, no Redis dep — protects /api/chat from quota burning.
+// Scales to one container; if you add replicas, swap the Map for Upstash Redis.
+// ─────────────────────────────────────────────────────────────────────────────
+
+interface Bucket {
+  tokens: number;
+  updatedAt: number;
+}
+
+interface RateLimitConfig {
+  capacity: number;       // Max tokens in the bucket
+  refillPerSec: number;   // Tokens added each second
+}
+
+const buckets = new Map<string, Bucket>();
+
+// Garbage-collect stale buckets every 10 min so memory doesn't grow unbounded
+let lastGc = Date.now();
+const GC_INTERVAL = 10 * 60 * 1000;
+const STALE_THRESHOLD = 30 * 60 * 1000;
+
+function gc(now: number) {
+  if (now - lastGc < GC_INTERVAL) return;
+  for (const [key, bucket] of buckets) {
+    if (now - bucket.updatedAt > STALE_THRESHOLD) buckets.delete(key);
+  }
+  lastGc = now;
+}
+
+export interface RateLimitResult {
+  ok: boolean;
+  remaining: number;
+  retryAfterSec: number;
+}
+
+export function rateLimit(key: string, config: RateLimitConfig): RateLimitResult {
+  const now = Date.now();
+  gc(now);
+
+  const existing = buckets.get(key);
+  let bucket: Bucket;
+
+  if (!existing) {
+    bucket = { tokens: config.capacity - 1, updatedAt: now };
+    buckets.set(key, bucket);
+    return { ok: true, remaining: bucket.tokens, retryAfterSec: 0 };
+  }
+
+  const elapsedSec = (now - existing.updatedAt) / 1000;
+  const refilled = Math.min(config.capacity, existing.tokens + elapsedSec * config.refillPerSec);
+
+  if (refilled < 1) {
+    const retryAfterSec = Math.ceil((1 - refilled) / config.refillPerSec);
+    existing.tokens = refilled;
+    existing.updatedAt = now;
+    return { ok: false, remaining: 0, retryAfterSec };
+  }
+
+  existing.tokens = refilled - 1;
+  existing.updatedAt = now;
+  return { ok: true, remaining: Math.floor(existing.tokens), retryAfterSec: 0 };
+}
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+export function getClientIp(req: Request): string {
+  // Nginx sets x-forwarded-for; first value is the real client.
+  const xff = req.headers.get("x-forwarded-for");
+  if (xff) return xff.split(",")[0].trim();
+  const real = req.headers.get("x-real-ip");
+  if (real) return real;
+  return "unknown";
+}
+
+const CHAT_LIMIT: RateLimitConfig = {
+  capacity: 30,        // Burst of 30 messages
+  refillPerSec: 0.5,   // = 30/min sustained
+};
+
+export function checkChatRateLimit(req: Request): RateLimitResult {
+  const ip = getClientIp(req);
+  return rateLimit(`chat:${ip}`, CHAT_LIMIT);
+}