feat: FluxAI multi-step autonomy + rate limiting + image pipeline
Deploy to VPS / deploy (push) Has been cancelled

Two production-grade hardening additions and one cost optimisation.

FLUXAI AUTONOMY RESTORED (api/chat)
- Brings back the multi-step agentic flow that the system prompt was
  always designed for. The "temporarily removed maxSteps" comment is
  gone — replaced with the AI SDK 6 equivalent stopWhen: stepCountIs(5).
- Cap at 5 chained tool calls per turn bounds latency + LLM cost.
- maxDuration raised 30s → 60s to absorb tool-chain runs.
- Result: one user prompt now triggers, e.g. search_installations →
  energy_savings_calculator → show_case_study → schedule_consultation
  in a single turn — exactly the SPIN methodology in the prompt.

RATE LIMITING (src/lib/rateLimit.ts + api/chat)
- Token-bucket per IP: 30 messages burst, sustained 30/minute. Trips
  to 429 with Retry-After + X-RateLimit-Remaining headers when abused.
- IP extracted from x-forwarded-for (Nginx already passes this).
- In-memory Map with 10-min GC of stale buckets — no Redis dep.
  If we scale to multiple replicas later, swap the Map for Upstash.
- Protects the OpenAI quota from someone hammering the chat endpoint.

IMAGE PIPELINE (src/lib/imageOptimizer.ts)
- sharp-based optimizer: auto-orient (EXIF), cap at 2560px long side,
  re-encode WebP@85, content-hash filename. Re-uploads with same
  content reuse the same hash; new content gets a new URL — perfect
  cache invalidation without header tricks.
- Opt-in via optimize=1 form/query param on /api/assets POST.
- Hero CMS and Site Settings uploads turn it on automatically (those
  are user-facing brand assets where compression matters most).
- App/news/parts uploads remain untouched (editors may be uploading
  CAD drawings, datasheets, etc. that shouldn't be transcoded).
- Falls back gracefully to a no-op for unsupported formats (SVG, GIF,
  videos, anything sharp can't decode) so it never breaks an upload.

DOCKERFILE
- Adds vips/vips-dev for sharp on Alpine + --include=optional so the
  @img/sharp-linuxmusl-x64 prebuilt is downloaded
- Explicitly copies node_modules/sharp + node_modules/@img to the
  runner stage (Next.js trace can miss conditional deps).

NO DB SCHEMA CHANGES.
This commit is contained in:
2026-05-04 14:48:37 -05:00
parent 09e6d0c7cf
commit a199891a3c
9 changed files with 303 additions and 25 deletions
+15 -2
View File
@@ -5,11 +5,15 @@
# ── Stage 1: Install dependencies ── # ── Stage 1: Install dependencies ──
FROM node:22-alpine AS deps FROM node:22-alpine AS deps
RUN apk add --no-cache libc6-compat # libc6-compat: glibc shim for prebuilt native binaries (Prisma engines)
# vips-dev: required for sharp on Alpine — image processing native lib
RUN apk add --no-cache libc6-compat vips-dev
WORKDIR /app WORKDIR /app
COPY package.json package-lock.json ./ COPY package.json package-lock.json ./
RUN npm ci # --include=optional ensures @img/sharp-linuxmusl-x64 (the Alpine sharp
# prebuilt binary) is downloaded; otherwise sharp errors at runtime.
RUN npm ci --include=optional
# ── Stage 2: Build the application ── # ── Stage 2: Build the application ──
FROM node:22-alpine AS builder FROM node:22-alpine AS builder
@@ -36,6 +40,9 @@ WORKDIR /app
ENV NODE_ENV=production ENV NODE_ENV=production
ENV NEXT_TELEMETRY_DISABLED=1 ENV NEXT_TELEMETRY_DISABLED=1
# vips runtime — required for sharp at runtime, not just build
RUN apk add --no-cache vips
# Security: run as non-root user # Security: run as non-root user
RUN addgroup --system --gid 1001 nodejs RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs RUN adduser --system --uid 1001 nextjs
@@ -52,6 +59,12 @@ COPY --from=builder /app/prisma ./prisma
COPY --from=builder /app/node_modules/.prisma ./node_modules/.prisma COPY --from=builder /app/node_modules/.prisma ./node_modules/.prisma
COPY --from=builder /app/node_modules/@prisma ./node_modules/@prisma COPY --from=builder /app/node_modules/@prisma ./node_modules/@prisma
# Copy sharp binary explicitly — Next.js standalone trace usually picks it
# up, but the @img/sharp-linuxmusl-x64 prebuilt is platform-conditional and
# can be missed. Copying both directories guarantees runtime availability.
COPY --from=builder /app/node_modules/sharp ./node_modules/sharp
COPY --from=builder /app/node_modules/@img ./node_modules/@img
# Copy i18n message files (required by next-intl at runtime) # Copy i18n message files (required by next-intl at runtime)
COPY --from=builder /app/messages ./messages COPY --from=builder /app/messages ./messages
+1 -13
View File
@@ -29,6 +29,7 @@
"react": "19.2.4", "react": "19.2.4",
"react-dom": "19.2.4", "react-dom": "19.2.4",
"resend": "^6.9.3", "resend": "^6.9.3",
"sharp": "^0.34.5",
"speakeasy": "^2.0.0", "speakeasy": "^2.0.0",
"tailwind-merge": "^3.5.0", "tailwind-merge": "^3.5.0",
"three": "^0.183.2", "three": "^0.183.2",
@@ -754,7 +755,6 @@
"resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz", "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz",
"integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==", "integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==",
"license": "MIT", "license": "MIT",
"optional": true,
"engines": { "engines": {
"node": ">=18" "node": ">=18"
} }
@@ -7056,16 +7056,6 @@
} }
} }
}, },
"node_modules/next-intl/node_modules/@swc/helpers": {
"version": "0.5.19",
"resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.19.tgz",
"integrity": "sha512-QamiFeIK3txNjgUTNppE6MiG3p7TdninpZu0E0PbqVh1a9FNLT2FRhisaa4NcaX52XVhA5l7Pk58Ft7Sqi/2sA==",
"extraneous": true,
"license": "Apache-2.0",
"dependencies": {
"tslib": "^2.8.0"
}
},
"node_modules/next/node_modules/postcss": { "node_modules/next/node_modules/postcss": {
"version": "8.4.31", "version": "8.4.31",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
@@ -8164,7 +8154,6 @@
"version": "7.7.4", "version": "7.7.4",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
"integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
"devOptional": true,
"license": "ISC", "license": "ISC",
"bin": { "bin": {
"semver": "bin/semver.js" "semver": "bin/semver.js"
@@ -8240,7 +8229,6 @@
"integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==", "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==",
"hasInstallScript": true, "hasInstallScript": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"optional": true,
"dependencies": { "dependencies": {
"@img/colour": "^1.0.0", "@img/colour": "^1.0.0",
"detect-libc": "^2.1.2", "detect-libc": "^2.1.2",
+1
View File
@@ -30,6 +30,7 @@
"react": "19.2.4", "react": "19.2.4",
"react-dom": "19.2.4", "react-dom": "19.2.4",
"resend": "^6.9.3", "resend": "^6.9.3",
"sharp": "^0.34.5",
"speakeasy": "^2.0.0", "speakeasy": "^2.0.0",
"tailwind-merge": "^3.5.0", "tailwind-merge": "^3.5.0",
"three": "^0.183.2", "three": "^0.183.2",
+43 -7
View File
@@ -31,6 +31,7 @@ import { NextRequest, NextResponse } from "next/server";
import fs from "fs"; import fs from "fs";
import path from "path"; import path from "path";
import { revalidateContent, type RevalidateScope } from "@/lib/revalidate"; import { revalidateContent, type RevalidateScope } from "@/lib/revalidate";
import { optimizeImage, isOptimizable } from "@/lib/imageOptimizer";
const SCOPE_ROOTS: Record<string, string> = { const SCOPE_ROOTS: Record<string, string> = {
applications: path.join(process.cwd(), "public", "applications"), applications: path.join(process.cwd(), "public", "applications"),
@@ -186,6 +187,13 @@ export async function GET(request: NextRequest) {
} }
// POST — Upload a file // POST — Upload a file
//
// Optional query / form param `optimize=true` (or `optimize=1`) routes the
// upload through the sharp pipeline: auto-orient, cap at 2560px, encode to
// WebP, and save under a content-hashed filename. The same image always
// produces the same hash, so re-uploading is idempotent. Different content
// produces a different hash, so the browser cache invalidates instantly
// without any header trickery.
export async function POST(request: NextRequest) { export async function POST(request: NextRequest) {
try { try {
const formData = await request.formData(); const formData = await request.formData();
@@ -194,6 +202,12 @@ export async function POST(request: NextRequest) {
const subPath = formData.get("path") as string || ""; const subPath = formData.get("path") as string || "";
const file = formData.get("file") as File; const file = formData.get("file") as File;
// Two ways to opt into optimization: ?optimize=1 query or form field "optimize".
const optFlag =
formData.get("optimize") ??
new URL(request.url).searchParams.get("optimize");
const shouldOptimize = optFlag === "true" || optFlag === "1" || optFlag === "on";
if (!file) return NextResponse.json({ error: "Missing file" }, { status: 400 }); if (!file) return NextResponse.json({ error: "Missing file" }, { status: 400 });
if (!SCOPE_ROOTS[scope]) return NextResponse.json({ error: "Invalid scope" }, { status: 400 }); if (!SCOPE_ROOTS[scope]) return NextResponse.json({ error: "Invalid scope" }, { status: 400 });
if (!FLAT_SCOPES.has(scope) && !slug) return NextResponse.json({ error: "Missing slug" }, { status: 400 }); if (!FLAT_SCOPES.has(scope) && !slug) return NextResponse.json({ error: "Missing slug" }, { status: 400 });
@@ -211,13 +225,26 @@ export async function POST(request: NextRequest) {
fs.mkdirSync(dirPath, { recursive: true }); fs.mkdirSync(dirPath, { recursive: true });
const safeName = file.name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9._-]/g, ""); const inputBuffer: Buffer = Buffer.from(await file.arrayBuffer());
const filePath = path.join(dirPath, safeName);
// Optimization branch: replace filename with a content-hashed WebP one.
let saveName = file.name.toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9._-]/g, "");
let outputBuffer: Buffer | Uint8Array = inputBuffer;
let optimizedMeta: { width: number | null; height: number | null; bytes: number } | null = null;
if (shouldOptimize && isOptimizable(file.name)) {
const opt = await optimizeImage(inputBuffer, file.name);
saveName = opt.filename;
outputBuffer = opt.buffer;
optimizedMeta = { width: opt.width, height: opt.height, bytes: opt.bytes };
}
const filePath = path.join(dirPath, saveName);
const existed = fs.existsSync(filePath); const existed = fs.existsSync(filePath);
fs.writeFileSync(filePath, Buffer.from(await file.arrayBuffer())); fs.writeFileSync(filePath, outputBuffer);
const rel = subPath ? `${subPath}/${safeName}` : safeName; const rel = subPath ? `${subPath}/${saveName}` : saveName;
// 🔥 Invalida caché para que la imagen aparezca sin recompilar // 🔥 Invalida caché para que la imagen aparezca sin recompilar
revalidateContent({ scope: scope as RevalidateScope, slug }); revalidateContent({ scope: scope as RevalidateScope, slug });
@@ -225,12 +252,21 @@ export async function POST(request: NextRequest) {
return NextResponse.json({ return NextResponse.json({
success: true, success: true,
file: { file: {
name: safeName, name: saveName,
publicUrl: buildPublicUrl(scope, slug, rel), publicUrl: buildPublicUrl(scope, slug, rel),
path: rel, path: rel,
mediaType: getFileType(safeName), mediaType: getFileType(saveName),
size: getFileSize(file.size), size: getFileSize(outputBuffer.byteLength),
overwritten: existed, overwritten: existed,
optimized: optimizedMeta !== null,
...(optimizedMeta
? {
width: optimizedMeta.width,
height: optimizedMeta.height,
originalBytes: file.size,
savedBytes: file.size - optimizedMeta.bytes,
}
: {}),
} }
}); });
} catch (error) { } catch (error) {
+27 -3
View File
@@ -1,9 +1,10 @@
import { openai } from '@ai-sdk/openai'; import { openai } from '@ai-sdk/openai';
import { streamText, UIMessage, convertToModelMessages, tool } from 'ai'; import { streamText, stepCountIs, UIMessage, convertToModelMessages, tool } from 'ai';
import { z } from 'zod'; import { z } from 'zod';
import { prisma } from '@/lib/prisma'; import { prisma } from '@/lib/prisma';
import { checkChatRateLimit } from '@/lib/rateLimit';
export const maxDuration = 30; export const maxDuration = 60;
// ─── PHYSICS CONSTANTS (NOT from DB — these are engineering benchmarks) ────── // ─── PHYSICS CONSTANTS (NOT from DB — these are engineering benchmarks) ──────
// These stay hardcoded because they are physical/scientific constants, // These stay hardcoded because they are physical/scientific constants,
@@ -150,6 +151,25 @@ function industryFromSlug(slug: string): string {
// ─── ROUTE HANDLER ────────────────────────────────────────────── // ─── ROUTE HANDLER ──────────────────────────────────────────────
export async function POST(req: Request) { export async function POST(req: Request) {
// ─── Rate limit (per-IP token bucket, 30 req/min) ──────────────
const rate = checkChatRateLimit(req);
if (!rate.ok) {
return new Response(
JSON.stringify({
error: "Too many requests. Please slow down.",
retryAfterSec: rate.retryAfterSec,
}),
{
status: 429,
headers: {
"Content-Type": "application/json",
"Retry-After": String(rate.retryAfterSec),
"X-RateLimit-Remaining": String(rate.remaining),
},
}
);
}
const { messages, context }: { const { messages, context }: {
messages: UIMessage[]; messages: UIMessage[];
context?: { section?: string; activeTab?: string }; context?: { section?: string; activeTab?: string };
@@ -168,7 +188,11 @@ export async function POST(req: Request) {
model: openai('gpt-4o'), model: openai('gpt-4o'),
system: systemPrompt + contextNote, system: systemPrompt + contextNote,
messages: coreMessages, messages: coreMessages,
// maxSteps has been temporarily removed to ensure compatibility with the installed AI SDK version // 🔥 RESTORED: AI SDK 6 multi-step autonomy. The agent can now chain
// search → calculator → case-study → consultation in a single turn,
// exactly as the SPIN methodology in the system prompt was designed for.
// Cap at 5 steps to bound LLM cost and latency.
stopWhen: stepCountIs(5),
tools: { tools: {
// ══════════════════════════════════════════════════════════════ // ══════════════════════════════════════════════════════════════
@@ -66,6 +66,7 @@ export default function HeroDashboard() {
try { try {
const fd = new FormData(); const fd = new FormData();
fd.append("scope", "footage"); fd.append("scope", "footage");
fd.append("optimize", "1");
fd.append("file", file); fd.append("file", file);
const res = await fetch("/api/assets", { method: "POST", body: fd }); const res = await fetch("/api/assets", { method: "POST", body: fd });
const data = await res.json(); const data = await res.json();
@@ -389,6 +389,7 @@ function ImageField({
try { try {
const fd = new FormData(); const fd = new FormData();
fd.append("scope", "branding"); fd.append("scope", "branding");
fd.append("optimize", "1");
fd.append("file", file); fd.append("file", file);
const res = await fetch("/api/assets", { method: "POST", body: fd }); const res = await fetch("/api/assets", { method: "POST", body: fd });
const data = await res.json(); const data = await res.json();
+128
View File
@@ -0,0 +1,128 @@
// src/lib/imageOptimizer.ts
// ─────────────────────────────────────────────────────────────────────────────
// Server-side image optimization for CMS uploads.
//
// What it does:
// - Auto-orients (respects EXIF rotation from phone cameras)
// - Caps very large images at 2560px on the long side (no point storing
// phone megapixels — Next.js Image Optimizer will downsize on the fly)
// - Re-encodes to WebP at quality 85 (typically 6080% smaller than JPEG)
// - Computes a content-hash filename so the same image can never collide
// with itself across re-uploads, AND new versions get a new URL (perfect
// cache invalidation on the browser side)
//
// What it does NOT do:
// - Generate responsive variants — next/image handles that automatically
// - Touch GIFs or videos — those pass through unchanged
// - Remove the original — the optimized buffer fully replaces it
// ─────────────────────────────────────────────────────────────────────────────
import "server-only";
import sharp from "sharp";
import crypto from "crypto";
import path from "path";
const MAX_LONG_SIDE = 2560;
const WEBP_QUALITY = 85;
// File extensions sharp can decode and we want to optimize.
const OPTIMIZABLE = new Set([".jpg", ".jpeg", ".png", ".webp", ".tiff", ".heic", ".heif"]);
export interface OptimizedImage {
buffer: Buffer;
ext: string; // ".webp" for optimized, original ext otherwise
filename: string; // sanitized name with content hash, e.g. "hero-9f3a2c.webp"
width: number | null;
height: number | null;
bytes: number;
}
export function isOptimizable(filename: string): boolean {
return OPTIMIZABLE.has(path.extname(filename).toLowerCase());
}
function sanitizeBaseName(name: string): string {
const withoutExt = name.replace(/\.[^.]+$/, "");
return (
withoutExt
.toLowerCase()
.replace(/\s+/g, "-")
.replace(/[^a-z0-9._-]/g, "")
.replace(/-+/g, "-")
.replace(/^[-.]+|[-.]+$/g, "")
.slice(0, 60) || "image"
);
}
function shortHash(buffer: Buffer, length = 8): string {
return crypto.createHash("sha256").update(buffer).digest("hex").slice(0, length);
}
/**
* Optimize an uploaded image buffer.
* Falls back to a no-op (returns the original) if sharp can't decode the file
* or the extension isn't in OPTIMIZABLE — this keeps SVGs / GIFs / videos /
* unsupported formats working transparently.
*/
export async function optimizeImage(
inputBuffer: Buffer,
originalFilename: string
): Promise<OptimizedImage> {
const ext = path.extname(originalFilename).toLowerCase();
const baseName = sanitizeBaseName(originalFilename);
if (!OPTIMIZABLE.has(ext)) {
const hash = shortHash(inputBuffer);
return {
buffer: inputBuffer,
ext,
filename: `${baseName}-${hash}${ext}`,
width: null,
height: null,
bytes: inputBuffer.byteLength,
};
}
try {
const pipeline = sharp(inputBuffer, { failOn: "none" }).rotate(); // honour EXIF
const meta = await pipeline.metadata();
const longSide = Math.max(meta.width || 0, meta.height || 0);
let processed = pipeline;
if (longSide > MAX_LONG_SIDE) {
processed = processed.resize({
width: meta.width && meta.width >= meta.height! ? MAX_LONG_SIDE : undefined,
height: meta.height && meta.height > meta.width! ? MAX_LONG_SIDE : undefined,
withoutEnlargement: true,
fit: "inside",
});
}
const out = await processed
.webp({ quality: WEBP_QUALITY, effort: 4 })
.toBuffer({ resolveWithObject: true });
const hash = shortHash(out.data);
return {
buffer: out.data,
ext: ".webp",
filename: `${baseName}-${hash}.webp`,
width: out.info.width,
height: out.info.height,
bytes: out.data.byteLength,
};
} catch (error) {
console.warn(`[imageOptimizer] Failed to optimize "${originalFilename}", keeping original:`, error);
const hash = shortHash(inputBuffer);
return {
buffer: inputBuffer,
ext,
filename: `${baseName}-${hash}${ext}`,
width: null,
height: null,
bytes: inputBuffer.byteLength,
};
}
}
+86
View File
@@ -0,0 +1,86 @@
// src/lib/rateLimit.ts
// ─────────────────────────────────────────────────────────────────────────────
// Lightweight in-memory rate limiter (token bucket per IP).
// Single Node process, no Redis dep — protects /api/chat from quota burning.
// Scales to one container; if you add replicas, swap the Map for Upstash Redis.
// ─────────────────────────────────────────────────────────────────────────────
interface Bucket {
tokens: number;
updatedAt: number;
}
interface RateLimitConfig {
capacity: number; // Max tokens in the bucket
refillPerSec: number; // Tokens added each second
}
const buckets = new Map<string, Bucket>();
// Garbage-collect stale buckets every 10 min so memory doesn't grow unbounded
let lastGc = Date.now();
const GC_INTERVAL = 10 * 60 * 1000;
const STALE_THRESHOLD = 30 * 60 * 1000;
function gc(now: number) {
if (now - lastGc < GC_INTERVAL) return;
for (const [key, bucket] of buckets) {
if (now - bucket.updatedAt > STALE_THRESHOLD) buckets.delete(key);
}
lastGc = now;
}
export interface RateLimitResult {
ok: boolean;
remaining: number;
retryAfterSec: number;
}
export function rateLimit(key: string, config: RateLimitConfig): RateLimitResult {
const now = Date.now();
gc(now);
const existing = buckets.get(key);
let bucket: Bucket;
if (!existing) {
bucket = { tokens: config.capacity - 1, updatedAt: now };
buckets.set(key, bucket);
return { ok: true, remaining: bucket.tokens, retryAfterSec: 0 };
}
const elapsedSec = (now - existing.updatedAt) / 1000;
const refilled = Math.min(config.capacity, existing.tokens + elapsedSec * config.refillPerSec);
if (refilled < 1) {
const retryAfterSec = Math.ceil((1 - refilled) / config.refillPerSec);
existing.tokens = refilled;
existing.updatedAt = now;
return { ok: false, remaining: 0, retryAfterSec };
}
existing.tokens = refilled - 1;
existing.updatedAt = now;
return { ok: true, remaining: Math.floor(existing.tokens), retryAfterSec: 0 };
}
// ── Helpers ──────────────────────────────────────────────────────────────────
export function getClientIp(req: Request): string {
// Nginx sets x-forwarded-for; first value is the real client.
const xff = req.headers.get("x-forwarded-for");
if (xff) return xff.split(",")[0].trim();
const real = req.headers.get("x-real-ip");
if (real) return real;
return "unknown";
}
const CHAT_LIMIT: RateLimitConfig = {
capacity: 30, // Burst of 30 messages
refillPerSec: 0.5, // = 30/min sustained
};
export function checkChatRateLimit(req: Request): RateLimitResult {
const ip = getClientIp(req);
return rateLimit(`chat:${ip}`, CHAT_LIMIT);
}