feat: FluxAI multi-step autonomy + rate limiting + image pipeline

Two production-grade hardening additions and one cost optimisation. FLUXAI AUTONOMY RESTORED (api/chat) - Brings back the multi-step agentic flow that the system prompt was always designed for. The "temporarily removed maxSteps" comment is gone — replaced with the AI SDK 6 equivalent stopWhen: stepCountIs(5). - Cap at 5 chained tool calls per turn bounds latency + LLM cost. - maxDuration raised 30s → 60s to absorb tool-chain runs. - Result: one user prompt now triggers, e.g. search_installations → energy_savings_calculator → show_case_study → schedule_consultation in a single turn — exactly the SPIN methodology in the prompt. RATE LIMITING (src/lib/rateLimit.ts + api/chat) - Token-bucket per IP: 30 messages burst, sustained 30/minute. Trips to 429 with Retry-After + X-RateLimit-Remaining headers when abused. - IP extracted from x-forwarded-for (Nginx already passes this). - In-memory Map with 10-min GC of stale buckets — no Redis dep. If we scale to multiple replicas later, swap the Map for Upstash. - Protects the OpenAI quota from someone hammering the chat endpoint. IMAGE PIPELINE (src/lib/imageOptimizer.ts) - sharp-based optimizer: auto-orient (EXIF), cap at 2560px long side, re-encode WebP@85, content-hash filename. Re-uploads with same content reuse the same hash; new content gets a new URL — perfect cache invalidation without header tricks. - Opt-in via optimize=1 form/query param on /api/assets POST. - Hero CMS and Site Settings uploads turn it on automatically (those are user-facing brand assets where compression matters most). - App/news/parts uploads remain untouched (editors may be uploading CAD drawings, datasheets, etc. that shouldn't be transcoded). - Falls back gracefully to a no-op for unsupported formats (SVG, GIF, videos, anything sharp can't decode) so it never breaks an upload. DOCKERFILE - Adds vips/vips-dev for sharp on Alpine + --include=optional so the @img/sharp-linuxmusl-x64 prebuilt is downloaded - Explicitly copies node_modules/sharp + node_modules/@img to the runner stage (Next.js trace can miss conditional deps). NO DB SCHEMA CHANGES.
2026-05-04 14:48:37 -05:00
parent 09e6d0c7cf
commit a199891a3c
9 changed files with 303 additions and 25 deletions
@@ -5,11 +5,15 @@
 # ── Stage 1: Install dependencies ──
 FROM node:22-alpine AS deps
-RUN apk add --no-cache libc6-compat
+# libc6-compat: glibc shim for prebuilt native binaries (Prisma engines)
 # vips-dev: required for sharp on Alpine — image processing native lib
 RUN apk add --no-cache libc6-compat vips-dev
 WORKDIR /app
 COPY package.json package-lock.json ./
-RUN npm ci
+# --include=optional ensures @img/sharp-linuxmusl-x64 (the Alpine sharp
 # prebuilt binary) is downloaded; otherwise sharp errors at runtime.
 RUN npm ci --include=optional
 # ── Stage 2: Build the application ──
 FROM node:22-alpine AS builder
@@ -36,6 +40,9 @@ WORKDIR /app
 ENV NODE_ENV=production
 ENV NEXT_TELEMETRY_DISABLED=1
 # vips runtime — required for sharp at runtime, not just build
 RUN apk add --no-cache vips
 # Security: run as non-root user
 RUN addgroup --system --gid 1001 nodejs
 RUN adduser --system --uid 1001 nextjs
@@ -52,6 +59,12 @@ COPY --from=builder /app/prisma ./prisma
 COPY --from=builder /app/node_modules/.prisma ./node_modules/.prisma
 COPY --from=builder /app/node_modules/@prisma ./node_modules/@prisma
 # Copy sharp binary explicitly — Next.js standalone trace usually picks it
 # up, but the @img/sharp-linuxmusl-x64 prebuilt is platform-conditional and
 # can be missed. Copying both directories guarantees runtime availability.
 COPY --from=builder /app/node_modules/sharp ./node_modules/sharp
 COPY --from=builder /app/node_modules/@img ./node_modules/@img
 # Copy i18n message files (required by next-intl at runtime)
 COPY --from=builder /app/messages ./messages
@@ -29,6 +29,7 @@
        "react": "19.2.4",
        "react-dom": "19.2.4",
        "resend": "^6.9.3",
        "sharp": "^0.34.5",
        "speakeasy": "^2.0.0",
        "tailwind-merge": "^3.5.0",
        "three": "^0.183.2",
@@ -754,7 +755,6 @@
      "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz",
      "integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==",
      "license": "MIT",
      "optional": true,
      "engines": {
        "node": ">=18"
      }
@@ -7056,16 +7056,6 @@
        }
      }
    },
    "node_modules/next-intl/node_modules/@swc/helpers": {
      "version": "0.5.19",
      "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.19.tgz",
      "integrity": "sha512-QamiFeIK3txNjgUTNppE6MiG3p7TdninpZu0E0PbqVh1a9FNLT2FRhisaa4NcaX52XVhA5l7Pk58Ft7Sqi/2sA==",
      "extraneous": true,
      "license": "Apache-2.0",
      "dependencies": {
        "tslib": "^2.8.0"
      }
    },
    "node_modules/next/node_modules/postcss": {
      "version": "8.4.31",
      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
@@ -8164,7 +8154,6 @@
      "version": "7.7.4",
      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
      "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
      "devOptional": true,
      "license": "ISC",
      "bin": {
        "semver": "bin/semver.js"
@@ -8240,7 +8229,6 @@
      "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==",
      "hasInstallScript": true,
      "license": "Apache-2.0",
      "optional": true,
      "dependencies": {
        "@img/colour": "^1.0.0",
        "detect-libc": "^2.1.2",
@@ -30,6 +30,7 @@
    "react": "19.2.4",
    "react-dom": "19.2.4",
    "resend": "^6.9.3",
    "sharp": "^0.34.5",
    "speakeasy": "^2.0.0",
    "tailwind-merge": "^3.5.0",
    "three": "^0.183.2",
@@ -31,6 +31,7 @@ import { NextRequest, NextResponse } from "next/server";
 import fs from "fs";
 import path from "path";
 import { revalidateContent, type RevalidateScope } from "@/lib/revalidate";
 import { optimizeImage, isOptimizable } from "@/lib/imageOptimizer";
 const SCOPE_ROOTS: Record<string, string> = {
  applications: path.join(process.cwd(), "public", "applications"),
@@ -186,6 +187,13 @@ export async function GET(request: NextRequest) {
 }
 // POST — Upload a file
 //
 // Optional query / form param `optimize=true` (or `optimize=1`) routes the
 // upload through the sharp pipeline: auto-orient, cap at 2560px, encode to
 // WebP, and save under a content-hashed filename. The same image always
 // produces the same hash, so re-uploading is idempotent. Different content
 // produces a different hash, so the browser cache invalidates instantly
 // without any header trickery.
 export async function POST(request: NextRequest) {
  try {
    const formData = await request.formData();
@@ -194,6 +202,12 @@ export async function POST(request: NextRequest) {
    const subPath = formData.get("path") as string || "";
    const file = formData.get("file") as File;
    // Two ways to opt into optimization: ?optimize=1 query or form field "optimize".
    const optFlag =
      formData.get("optimize") ??
      new URL(request.url).searchParams.get("optimize");
    const shouldOptimize = optFlag === "true" || optFlag === "1" || optFlag === "on";
    if (!file) return NextResponse.json({ error: "Missing file" }, { status: 400 });
    if (!SCOPE_ROOTS[scope]) return NextResponse.json({ error: "Invalid scope" }, { status: 400 });
    if (!FLAT_SCOPES.has(scope) && !slug) return NextResponse.json({ error: "Missing slug" }, { status: 400 });
@@ -211,13 +225,26 @@ export async function POST(request: NextRequest) {
    fs.mkdirSync(dirPath, { recursive: true });
-    const safeName = file.name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9._-]/g, "");
+    const inputBuffer: Buffer = Buffer.from(await file.arrayBuffer());
-    const filePath = path.join(dirPath, safeName);
+
    // Optimization branch: replace filename with a content-hashed WebP one.
    let saveName = file.name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9._-]/g, "");
    let outputBuffer: Buffer | Uint8Array = inputBuffer;
    let optimizedMeta: { width: number | null; height: number | null; bytes: number } | null = null;
    if (shouldOptimize && isOptimizable(file.name)) {
      const opt = await optimizeImage(inputBuffer, file.name);
      saveName = opt.filename;
      outputBuffer = opt.buffer;
      optimizedMeta = { width: opt.width, height: opt.height, bytes: opt.bytes };
    }
    const filePath = path.join(dirPath, saveName);
    const existed = fs.existsSync(filePath);
-    fs.writeFileSync(filePath, Buffer.from(await file.arrayBuffer()));
+    fs.writeFileSync(filePath, outputBuffer);
-    const rel = subPath ? `${subPath}/${safeName}` : safeName;
+    const rel = subPath ? `${subPath}/${saveName}` : saveName;
    // 🔥 Invalida caché para que la imagen aparezca sin recompilar
    revalidateContent({ scope: scope as RevalidateScope, slug });
@@ -225,12 +252,21 @@ export async function POST(request: NextRequest) {
    return NextResponse.json({
      success: true,
      file: {
-        name: safeName,
+        name: saveName,
        publicUrl: buildPublicUrl(scope, slug, rel),
        path: rel,
-        mediaType: getFileType(safeName),
+        mediaType: getFileType(saveName),
-        size: getFileSize(file.size),
+        size: getFileSize(outputBuffer.byteLength),
        overwritten: existed,
        optimized: optimizedMeta !== null,
        ...(optimizedMeta
          ? {
              width: optimizedMeta.width,
              height: optimizedMeta.height,
              originalBytes: file.size,
              savedBytes: file.size - optimizedMeta.bytes,
            }
          : {}),
      }
    });
  } catch (error) {
@@ -1,9 +1,10 @@
 import { openai } from '@ai-sdk/openai';
-import { streamText, UIMessage, convertToModelMessages, tool } from 'ai';
+import { streamText, stepCountIs, UIMessage, convertToModelMessages, tool } from 'ai';
 import { z } from 'zod';
 import { prisma } from '@/lib/prisma';
 import { checkChatRateLimit } from '@/lib/rateLimit';
-export const maxDuration = 30;
+export const maxDuration = 60;
 // ─── PHYSICS CONSTANTS (NOT from DB — these are engineering benchmarks) ──────
 // These stay hardcoded because they are physical/scientific constants,
@@ -150,6 +151,25 @@ function industryFromSlug(slug: string): string {
 // ─── ROUTE HANDLER ──────────────────────────────────────────────
 export async function POST(req: Request) {
  // ─── Rate limit (per-IP token bucket, 30 req/min) ──────────────
  const rate = checkChatRateLimit(req);
  if (!rate.ok) {
    return new Response(
      JSON.stringify({
        error: "Too many requests. Please slow down.",
        retryAfterSec: rate.retryAfterSec,
      }),
      {
        status: 429,
        headers: {
          "Content-Type": "application/json",
          "Retry-After": String(rate.retryAfterSec),
          "X-RateLimit-Remaining": String(rate.remaining),
        },
      }
    );
  }
  const { messages, context }: {
    messages: UIMessage[];
    context?: { section?: string; activeTab?: string };
@@ -168,7 +188,11 @@ export async function POST(req: Request) {
    model: openai('gpt-4o'),
    system: systemPrompt + contextNote,
    messages: coreMessages,
-    // maxSteps has been temporarily removed to ensure compatibility with the installed AI SDK version
+    // 🔥 RESTORED: AI SDK 6 multi-step autonomy. The agent can now chain
    // search → calculator → case-study → consultation in a single turn,
    // exactly as the SPIN methodology in the system prompt was designed for.
    // Cap at 5 steps to bound LLM cost and latency.
    stopWhen: stepCountIs(5),
    tools: {
      // ══════════════════════════════════════════════════════════════
@@ -66,6 +66,7 @@ export default function HeroDashboard() {
    try {
      const fd = new FormData();
      fd.append("scope", "footage");
      fd.append("optimize", "1");
      fd.append("file", file);
      const res = await fetch("/api/assets", { method: "POST", body: fd });
      const data = await res.json();
@@ -389,6 +389,7 @@ function ImageField({
    try {
      const fd = new FormData();
      fd.append("scope", "branding");
      fd.append("optimize", "1");
      fd.append("file", file);
      const res = await fetch("/api/assets", { method: "POST", body: fd });
      const data = await res.json();
@@ -0,0 +1,128 @@
 // src/lib/imageOptimizer.ts
 // ─────────────────────────────────────────────────────────────────────────────
 // Server-side image optimization for CMS uploads.
 //
 // What it does:
 //   - Auto-orients (respects EXIF rotation from phone cameras)
 //   - Caps very large images at 2560px on the long side (no point storing
 //     phone megapixels — Next.js Image Optimizer will downsize on the fly)
 //   - Re-encodes to WebP at quality 85 (typically 60–80% smaller than JPEG)
 //   - Computes a content-hash filename so the same image can never collide
 //     with itself across re-uploads, AND new versions get a new URL (perfect
 //     cache invalidation on the browser side)
 //
 // What it does NOT do:
 //   - Generate responsive variants — next/image handles that automatically
 //   - Touch GIFs or videos — those pass through unchanged
 //   - Remove the original — the optimized buffer fully replaces it
 // ─────────────────────────────────────────────────────────────────────────────
 import "server-only";
 import sharp from "sharp";
 import crypto from "crypto";
 import path from "path";
 const MAX_LONG_SIDE = 2560;
 const WEBP_QUALITY = 85;
 // File extensions sharp can decode and we want to optimize.
 const OPTIMIZABLE = new Set([".jpg", ".jpeg", ".png", ".webp", ".tiff", ".heic", ".heif"]);
 export interface OptimizedImage {
  buffer: Buffer;
  ext: string;            // ".webp" for optimized, original ext otherwise
  filename: string;       // sanitized name with content hash, e.g. "hero-9f3a2c.webp"
  width: number | null;
  height: number | null;
  bytes: number;
 }
 export function isOptimizable(filename: string): boolean {
  return OPTIMIZABLE.has(path.extname(filename).toLowerCase());
 }
 function sanitizeBaseName(name: string): string {
  const withoutExt = name.replace(/\.[^.]+$/, "");
  return (
    withoutExt
      .toLowerCase()
      .replace(/\s+/g, "-")
      .replace(/[^a-z0-9._-]/g, "")
      .replace(/-+/g, "-")
      .replace(/^[-.]+|[-.]+$/g, "")
      .slice(0, 60) || "image"
  );
 }
 function shortHash(buffer: Buffer, length = 8): string {
  return crypto.createHash("sha256").update(buffer).digest("hex").slice(0, length);
 }
 /**
 * Optimize an uploaded image buffer.
 * Falls back to a no-op (returns the original) if sharp can't decode the file
 * or the extension isn't in OPTIMIZABLE — this keeps SVGs / GIFs / videos /
 * unsupported formats working transparently.
 */
 export async function optimizeImage(
  inputBuffer: Buffer,
  originalFilename: string
 ): Promise<OptimizedImage> {
  const ext = path.extname(originalFilename).toLowerCase();
  const baseName = sanitizeBaseName(originalFilename);
  if (!OPTIMIZABLE.has(ext)) {
    const hash = shortHash(inputBuffer);
    return {
      buffer: inputBuffer,
      ext,
      filename: `${baseName}-${hash}${ext}`,
      width: null,
      height: null,
      bytes: inputBuffer.byteLength,
    };
  }
  try {
    const pipeline = sharp(inputBuffer, { failOn: "none" }).rotate(); // honour EXIF
    const meta = await pipeline.metadata();
    const longSide = Math.max(meta.width || 0, meta.height || 0);
    let processed = pipeline;
    if (longSide > MAX_LONG_SIDE) {
      processed = processed.resize({
        width: meta.width && meta.width >= meta.height! ? MAX_LONG_SIDE : undefined,
        height: meta.height && meta.height > meta.width! ? MAX_LONG_SIDE : undefined,
        withoutEnlargement: true,
        fit: "inside",
      });
    }
    const out = await processed
      .webp({ quality: WEBP_QUALITY, effort: 4 })
      .toBuffer({ resolveWithObject: true });
    const hash = shortHash(out.data);
    return {
      buffer: out.data,
      ext: ".webp",
      filename: `${baseName}-${hash}.webp`,
      width: out.info.width,
      height: out.info.height,
      bytes: out.data.byteLength,
    };
  } catch (error) {
    console.warn(`[imageOptimizer] Failed to optimize "${originalFilename}", keeping original:`, error);
    const hash = shortHash(inputBuffer);
    return {
      buffer: inputBuffer,
      ext,
      filename: `${baseName}-${hash}${ext}`,
      width: null,
      height: null,
      bytes: inputBuffer.byteLength,
    };
  }
 }
@@ -0,0 +1,86 @@
 // src/lib/rateLimit.ts
 // ─────────────────────────────────────────────────────────────────────────────
 // Lightweight in-memory rate limiter (token bucket per IP).
 // Single Node process, no Redis dep — protects /api/chat from quota burning.
 // Scales to one container; if you add replicas, swap the Map for Upstash Redis.
 // ─────────────────────────────────────────────────────────────────────────────
 interface Bucket {
  tokens: number;
  updatedAt: number;
 }
 interface RateLimitConfig {
  capacity: number;       // Max tokens in the bucket
  refillPerSec: number;   // Tokens added each second
 }
 const buckets = new Map<string, Bucket>();
 // Garbage-collect stale buckets every 10 min so memory doesn't grow unbounded
 let lastGc = Date.now();
 const GC_INTERVAL = 10 * 60 * 1000;
 const STALE_THRESHOLD = 30 * 60 * 1000;
 function gc(now: number) {
  if (now - lastGc < GC_INTERVAL) return;
  for (const [key, bucket] of buckets) {
    if (now - bucket.updatedAt > STALE_THRESHOLD) buckets.delete(key);
  }
  lastGc = now;
 }
 export interface RateLimitResult {
  ok: boolean;
  remaining: number;
  retryAfterSec: number;
 }
 export function rateLimit(key: string, config: RateLimitConfig): RateLimitResult {
  const now = Date.now();
  gc(now);
  const existing = buckets.get(key);
  let bucket: Bucket;
  if (!existing) {
    bucket = { tokens: config.capacity - 1, updatedAt: now };
    buckets.set(key, bucket);
    return { ok: true, remaining: bucket.tokens, retryAfterSec: 0 };
  }
  const elapsedSec = (now - existing.updatedAt) / 1000;
  const refilled = Math.min(config.capacity, existing.tokens + elapsedSec * config.refillPerSec);
  if (refilled < 1) {
    const retryAfterSec = Math.ceil((1 - refilled) / config.refillPerSec);
    existing.tokens = refilled;
    existing.updatedAt = now;
    return { ok: false, remaining: 0, retryAfterSec };
  }
  existing.tokens = refilled - 1;
  existing.updatedAt = now;
  return { ok: true, remaining: Math.floor(existing.tokens), retryAfterSec: 0 };
 }
 // ── Helpers ──────────────────────────────────────────────────────────────────
 export function getClientIp(req: Request): string {
  // Nginx sets x-forwarded-for; first value is the real client.
  const xff = req.headers.get("x-forwarded-for");
  if (xff) return xff.split(",")[0].trim();
  const real = req.headers.get("x-real-ip");
  if (real) return real;
  return "unknown";
 }
 const CHAT_LIMIT: RateLimitConfig = {
  capacity: 30,        // Burst of 30 messages
  refillPerSec: 0.5,   // = 30/min sustained
 };
 export function checkChatRateLimit(req: Request): RateLimitResult {
  const ip = getClientIp(req);
  return rateLimit(`chat:${ip}`, CHAT_LIMIT);
 }