From 1bc1f46bb81142d767d784181df6e3d21beca53d Mon Sep 17 00:00:00 2001 From: Samuel Prevost Date: Tue, 14 Apr 2026 23:19:16 +0200 Subject: [PATCH] feat(deskew): add debug logging, progress callbacks, and WASM safety - Add step-by-step console logging throughout the algorithm - Add onProgress callback for UI progress bar integration - Fix WASM OOM: clamp output dimensions with proper matrix scaling (previously clamped size but not the transform, causing cropping) - Fix waitForOpenCV race condition: probe cv.Mat() instead of checking constructor existence - Wrap all OpenCV mats in try/finally for guaranteed cleanup - Raise MAX_OUTPUT_DIM to 12288 for more leniency Co-Authored-By: Claude --- src/lib/deskew.ts | 525 ++++++++++++++++++++++++++++----------------- src/types/index.ts | 2 + 2 files changed, 328 insertions(+), 199 deletions(-) diff --git a/src/lib/deskew.ts b/src/lib/deskew.ts index 4f3e9f8..1a09750 100644 --- a/src/lib/deskew.ts +++ b/src/lib/deskew.ts @@ -1,9 +1,8 @@ /** * deskew.ts — Browser-based perspective correction using OpenCV.js (WASM) * - * Adapted from the reference algorithm. Accepts N datums (rectangles and/or - * lines), each with known real-world dimensions and a confidence score (1–5). - * Minimum: one rectangle. + * Accepts N datums (rectangles and/or lines), each with known real-world + * dimensions and a confidence score (1–5). Minimum: one rectangle. * * Algorithm: * 1. Pick the highest-confidence rectangle as primary reference. @@ -27,6 +26,10 @@ import type { RectDatum, } from "@/types" +// Max output dimension in pixels to avoid WASM OOM +// 12288 = ~576MB RGBA at square, but actual images are rarely square +const MAX_OUTPUT_DIM = 12288 + // ─── OpenCV helpers ────────────────────────────────────────────────────────── function pointsToMat(points: Point[]): InstanceType { @@ -75,7 +78,8 @@ function mul3x3(A: number[], B: number[]): number[] { for (let c = 0; c < 3; c++) { let sum = 0 for (let k = 0; k < 3; k++) { - sum += (A[r * 3 + k] ?? 0) * (B[k * 3 + c] ?? 0) + sum += + (A[r * 3 + k] ?? 0) * (B[k * 3 + c] ?? 0) } R[r * 3 + c] = sum } @@ -86,17 +90,20 @@ function mul3x3(A: number[], B: number[]): number[] { // ─── Validation ────────────────────────────────────────────────────────────── function pickPrimary(datums: Datum[]): RectDatum { - const rects = datums.filter((d): d is RectDatum => d.type === "rectangle") + const rects = datums.filter( + (d): d is RectDatum => d.type === "rectangle", + ) if (rects.length === 0) { throw new Error( "At least one rectangle datum is required for perspective correction.", ) } - // Highest confidence; tie-break by pixel area (larger = more precise corners) rects.sort((a, b) => { - if (b.confidence !== a.confidence) return b.confidence - a.confidence + if (b.confidence !== a.confidence) + return b.confidence - a.confidence const area = (r: RectDatum) => - dist(r.corners[0], r.corners[1]) * dist(r.corners[0], r.corners[3]) + dist(r.corners[0], r.corners[1]) * + dist(r.corners[0], r.corners[3]) return area(b) - area(a) }) return rects[0] as RectDatum @@ -109,7 +116,6 @@ function pickPrimary(datums: Datum[]): RectDatum { function cornersToAlgoOrder( corners: [Point, Point, Point, Point], ): [Point, Point, Point, Point] { - // App: [TL, TR, BR, BL] → Algo: [TL, TR, BL, BR] return [corners[0], corners[1], corners[3], corners[2]] } @@ -123,11 +129,8 @@ function canvasToBlob( return new Promise((resolve, reject) => { canvas.toBlob( (b) => { - if (b) { - resolve(b) - } else { - reject(new Error("toBlob failed")) - } + if (b) resolve(b) + else reject(new Error("toBlob failed")) }, type, quality, @@ -137,231 +140,355 @@ function canvasToBlob( // ─── Core ──────────────────────────────────────────────────────────────────── -export async function deskewImage(input: DeskewInput): Promise { - const { image, datums, scalePxPerMm: scale } = input +const log = (tag: string, ...args: unknown[]) => { + console.log(`[deskew:${tag}]`, ...args) +} + +export async function deskewImage( + input: DeskewInput, +): Promise { + const { image, datums, scalePxPerMm: scale, onProgress } = input + log("start", `${String(datums.length)} datums, scale=${String(scale)} px/mm`) + + const TOTAL_STEPS = 7 + const progress = async (step: number, label: string) => { + log(`progress`, `[${String(step + 1)}/${String(TOTAL_STEPS)}] ${label}`) + onProgress?.(step, TOTAL_STEPS, label) + // Yield to let the browser repaint + await new Promise((r) => { + requestAnimationFrame(r) + }) + } if (datums.length === 0) throw new Error("No datums provided.") const primary = pickPrimary(datums) + log("primary", primary.label, `${String(primary.widthMm)}×${String(primary.heightMm)}mm`, `conf=${String(primary.confidence)}`) // Load source image into OpenCV let srcCanvas: HTMLCanvasElement if (image instanceof HTMLCanvasElement) { srcCanvas = image + log("input", `canvas ${String(image.width)}×${String(image.height)}`) } else { srcCanvas = document.createElement("canvas") srcCanvas.width = image.naturalWidth srcCanvas.height = image.naturalHeight + log("input", `img ${String(image.naturalWidth)}×${String(image.naturalHeight)}, drawing to canvas`) const ctx = srcCanvas.getContext("2d") if (!ctx) throw new Error("Failed to get 2d context") ctx.drawImage(image, 0, 0) } - const src = cv.imread(srcCanvas) - const imgW = src.cols - const imgH = src.rows - // ================================================================ - // STEP 1 — Initial perspective correction from primary rectangle - // ================================================================ - const pw = primary.widthMm * scale - const ph = primary.heightMm * scale + await progress(0, "Loading image into OpenCV") - const algoCorners = cornersToAlgoOrder(primary.corners) - const srcPts = pointsToMat(algoCorners) - const dstInit = pointsToMat([ - { x: 0, y: 0 }, - { x: pw, y: 0 }, - { x: 0, y: ph }, - { x: pw, y: ph }, - ]) - const mInit = cv.getPerspectiveTransform(srcPts, dstInit) + // All OpenCV mats to clean up + const mats: InstanceType[] = [] + const track = >(m: T): T => { + mats.push(m) + return m + } - // ================================================================ - // STEP 2 — Measure all secondary datums, accumulate corrections - // ================================================================ - let xWSum = 0, - xWTotal = 0 - let yWSum = 0, - yWTotal = 0 - const reports: DatumReport[] = [] + try { + log("cv.imread", "reading source canvas into cv.Mat") + const src = track(cv.imread(srcCanvas)) + const imgW = src.cols + const imgH = src.rows + log("cv.imread", `done: ${String(imgW)}×${String(imgH)}, type=${String(src.type())}, channels=${String(src.channels())}`) - for (const datum of datums) { - const w = datum.confidence + // ============================================================ + // STEP 1 — Initial perspective correction from primary rect + // ============================================================ + await progress(1, "Computing initial homography") + const pw = primary.widthMm * scale + const ph = primary.heightMm * scale + log("step1", `dest rect: ${pw.toFixed(1)}×${ph.toFixed(1)} px`) - if (datum === primary) { - reports.push({ - label: datum.label, - type: "rectangle", - measuredMm: datum.widthMm, - expectedMm: datum.widthMm, - errorPercent: 0, - axisContribution: "both", - }) - continue - } + const algoCorners = cornersToAlgoOrder(primary.corners) + log("step1", `corners (algo order): ${JSON.stringify(algoCorners)}`) + const srcPts = track(pointsToMat(algoCorners)) - if (datum.type === "line") { - const [s, e] = transformPoints(datum.endpoints as Point[], mInit) - if (!s || !e) continue - const dx = Math.abs(e.x - s.x) - const dy = Math.abs(e.y - s.y) - const measured = dist(s, e) - const expected = datum.lengthMm * scale - const ratio = expected / measured + const dstInit = track( + pointsToMat([ + { x: 0, y: 0 }, + { x: pw, y: 0 }, + { x: 0, y: ph }, + { x: pw, y: ph }, + ]), + ) + log("step1", "calling getPerspectiveTransform (initial)") + const mInit = track( + cv.getPerspectiveTransform(srcPts, dstInit), + ) + log("step1", `mInit type=${String(mInit.type())}, rows=${String(mInit.rows)}, cols=${String(mInit.cols)}`) - // Axis contribution proportional to alignment - const total = dx + dy - if (total > 1e-6) { - const xFrac = dx / total - const yFrac = dy / total - xWSum += ratio * w * xFrac - xWTotal += w * xFrac - yWSum += ratio * w * yFrac - yWTotal += w * yFrac + // ============================================================ + // STEP 2 — Measure secondary datums, accumulate corrections + // ============================================================ + await progress(2, "Measuring secondary datums") + let xWSum = 0, + xWTotal = 0 + let yWSum = 0, + yWTotal = 0 + const reports: DatumReport[] = [] + + for (const datum of datums) { + const w = datum.confidence + + if (datum === primary) { + reports.push({ + label: datum.label, + type: "rectangle", + measuredMm: datum.widthMm, + expectedMm: datum.widthMm, + errorPercent: 0, + axisContribution: "both", + }) + continue } - reports.push({ - label: datum.label, - type: "line", - measuredMm: measured / scale, - expectedMm: datum.lengthMm, - errorPercent: Math.abs(1 - ratio) * 100, - axisContribution: dx > dy ? "x" : "y", - }) - } else { - // Secondary rectangle: top edge → X, left edge → Y - const ac = cornersToAlgoOrder(datum.corners) - const [tl, tr, bl] = transformPoints([ac[0], ac[1], ac[2]], mInit) - if (!tl || !tr || !bl) continue - const mW = dist(tl, tr) - const mH = dist(tl, bl) - const xR = (datum.widthMm * scale) / mW - const yR = (datum.heightMm * scale) / mH + if (datum.type === "line") { + const pts = transformPoints( + datum.endpoints as Point[], + mInit, + ) + const s = pts[0] + const e = pts[1] + if (!s || !e) continue + const dx = Math.abs(e.x - s.x) + const dy = Math.abs(e.y - s.y) + const measured = dist(s, e) + const expected = datum.lengthMm * scale + const ratio = expected / measured - xWSum += xR * w - xWTotal += w - yWSum += yR * w - yWTotal += w + const total = dx + dy + if (total > 1e-6) { + const xFrac = dx / total + const yFrac = dy / total + xWSum += ratio * w * xFrac + xWTotal += w * xFrac + yWSum += ratio * w * yFrac + yWTotal += w * yFrac + } - reports.push({ - label: datum.label, - type: "rectangle", - measuredMm: mW / scale, - expectedMm: datum.widthMm, - errorPercent: (Math.abs(1 - xR) + Math.abs(1 - yR)) * 50, - axisContribution: "both", - }) + reports.push({ + label: datum.label, + type: "line", + measuredMm: measured / scale, + expectedMm: datum.lengthMm, + errorPercent: Math.abs(1 - ratio) * 100, + axisContribution: dx > dy ? "x" : "y", + }) + } else { + const ac = cornersToAlgoOrder(datum.corners) + const pts = transformPoints( + [ac[0], ac[1], ac[2]], + mInit, + ) + const tl = pts[0] + const tr = pts[1] + const bl = pts[2] + if (!tl || !tr || !bl) continue + const mW = dist(tl, tr) + const mH = dist(tl, bl) + const xR = (datum.widthMm * scale) / mW + const yR = (datum.heightMm * scale) / mH + + xWSum += xR * w + xWTotal += w + yWSum += yR * w + yWTotal += w + + reports.push({ + label: datum.label, + type: "rectangle", + measuredMm: mW / scale, + expectedMm: datum.widthMm, + errorPercent: + (Math.abs(1 - xR) + Math.abs(1 - yR)) * 50, + axisContribution: "both", + }) + } + } + + // ============================================================ + // STEP 3 — Weighted corrections (1.0 = no secondary data) + // ============================================================ + await progress(3, "Computing axis corrections") + const xCorr: AxisCorrection = { + ratio: xWTotal > 0 ? xWSum / xWTotal : 1.0, + totalWeight: xWTotal, + } + const yCorr: AxisCorrection = { + ratio: yWTotal > 0 ? yWSum / yWTotal : 1.0, + totalWeight: yWTotal, + } + log("step3", `xCorr=${xCorr.ratio.toFixed(4)} (w=${xCorr.totalWeight.toFixed(1)}), yCorr=${yCorr.ratio.toFixed(4)} (w=${yCorr.totalWeight.toFixed(1)})`) + + // ============================================================ + // STEP 4 — Fold corrections, recompute transform + // ============================================================ + await progress(4, "Recomputing final transform") + const pwFinal = pw * xCorr.ratio + const phFinal = ph * yCorr.ratio + log("step4", `final dest rect: ${pwFinal.toFixed(1)}×${phFinal.toFixed(1)} px`) + + const dstFinal = track( + pointsToMat([ + { x: 0, y: 0 }, + { x: pwFinal, y: 0 }, + { x: 0, y: phFinal }, + { x: pwFinal, y: phFinal }, + ]), + ) + log("step4", "calling getPerspectiveTransform (final)") + const mFinal = track( + cv.getPerspectiveTransform(srcPts, dstFinal), + ) + log("step4", `mFinal type=${String(mFinal.type())}, rows=${String(mFinal.rows)}, cols=${String(mFinal.cols)}`) + + // ============================================================ + // STEP 5 — Output bounds + translation shift + // ============================================================ + await progress(5, "Computing output bounds") + const imgCorners: Point[] = [ + { x: 0, y: 0 }, + { x: imgW, y: 0 }, + { x: 0, y: imgH }, + { x: imgW, y: imgH }, + ] + const warped = transformPoints(imgCorners, mFinal) + if (warped.length < 4) { + throw new Error( + "Perspective transform produced invalid bounds", + ) + } + + let xMin = Infinity, + yMin = Infinity, + xMax = -Infinity, + yMax = -Infinity + for (const c of warped) { + xMin = Math.min(xMin, c.x) + yMin = Math.min(yMin, c.y) + xMax = Math.max(xMax, c.x) + yMax = Math.max(yMax, c.y) + } + + let outW = Math.ceil(xMax - xMin) + let outH = Math.ceil(yMax - yMin) + log("step5", `bounds: x=[${xMin.toFixed(1)}, ${xMax.toFixed(1)}], y=[${yMin.toFixed(1)}, ${yMax.toFixed(1)}]`) + log("step5", `raw output: ${String(outW)}×${String(outH)} px`) + + // Guard against absurd output sizes that crash WASM + if (outW <= 0 || outH <= 0) { + throw new Error( + `Invalid output dimensions: ${String(outW)}×${String(outH)}`, + ) + } + let downscale = 1 + if (outW > MAX_OUTPUT_DIM || outH > MAX_OUTPUT_DIM) { + downscale = MAX_OUTPUT_DIM / Math.max(outW, outH) + log("step5", `CLAMPING from ${String(outW)}×${String(outH)} by factor ${downscale.toFixed(4)}`) + outW = Math.ceil(outW * downscale) + outH = Math.ceil(outH * downscale) + } + log("step5", `final output: ${String(outW)}×${String(outH)} px (${String(Math.round(outW * outH * 4 / 1024 / 1024))} MB RGBA)`) + + const mData: number[] = readMat3x3(mFinal) + // Translate so the top-left warped corner is at (0,0), + // then scale down if we clamped the output size. + const tShift: number[] = [ + downscale, 0, -xMin * downscale, + 0, downscale, -yMin * downscale, + 0, 0, 1, + ] + const mOutData: number[] = mul3x3(tShift, mData) + const mOut = track( + cv.matFromArray(3, 3, cv.CV_64FC1, mOutData), + ) + + // ============================================================ + // STEP 6 — Warp + // ============================================================ + await progress(6, "Warping image (this may take a moment)") + log("step6", "calling warpPerspective...") + const dstMat = track(new cv.Mat()) + cv.warpPerspective( + src, + dstMat, + mOut, + new cv.Size(outW, outH), + cv.INTER_LANCZOS4 as number, + cv.BORDER_CONSTANT as number, + new cv.Scalar(0, 0, 0, 0), + ) + + log("step6", `warpPerspective done, dstMat: ${String(dstMat.cols)}×${String(dstMat.rows)}, type=${String(dstMat.type())}`) + + log("export", "cv.imshow to canvas") + const outCanvas = document.createElement("canvas") + outCanvas.width = outW + outCanvas.height = outH + cv.imshow(outCanvas, dstMat) + + log("export", "canvas.toBlob (PNG)") + const blob = await canvasToBlob(outCanvas, "image/png", 0.95) + log("export", `blob size: ${String(Math.round(blob.size / 1024))} KB`) + + const diagnostics: DeskewDiagnostics = { + primaryDatum: primary.label, + xCorrection: xCorr, + yCorrection: yCorr, + perDatum: reports, + outputWidthPx: outW, + outputHeightPx: outH, + } + + log("done", "success") + return { correctedImageBlob: blob, diagnostics } + } finally { + // Always clean up all OpenCV mats, even on error + for (const m of mats) { + try { + m.delete() + } catch { + // already deleted or invalid — ignore + } } } - - // ================================================================ - // STEP 3 — Weighted corrections (1.0 = no secondary data) - // ================================================================ - const xCorr: AxisCorrection = { - ratio: xWTotal > 0 ? xWSum / xWTotal : 1.0, - totalWeight: xWTotal, - } - const yCorr: AxisCorrection = { - ratio: yWTotal > 0 ? yWSum / yWTotal : 1.0, - totalWeight: yWTotal, - } - - // ================================================================ - // STEP 4 — Fold into destination rectangle, recompute transform - // ================================================================ - const pwFinal = pw * xCorr.ratio - const phFinal = ph * yCorr.ratio - - const dstFinal = pointsToMat([ - { x: 0, y: 0 }, - { x: pwFinal, y: 0 }, - { x: 0, y: phFinal }, - { x: pwFinal, y: phFinal }, - ]) - const mFinal = cv.getPerspectiveTransform(srcPts, dstFinal) - - // ================================================================ - // STEP 5 — Output bounds + translation shift - // ================================================================ - const imgCorners: Point[] = [ - { x: 0, y: 0 }, - { x: imgW, y: 0 }, - { x: 0, y: imgH }, - { x: imgW, y: imgH }, - ] - const warped = transformPoints(imgCorners, mFinal) - let xMin = Infinity, - yMin = Infinity, - xMax = -Infinity, - yMax = -Infinity - for (const c of warped) { - xMin = Math.min(xMin, c.x) - yMin = Math.min(yMin, c.y) - xMax = Math.max(xMax, c.x) - yMax = Math.max(yMax, c.y) - } - - const outW = Math.ceil(xMax - xMin) - const outH = Math.ceil(yMax - yMin) - - const mData: number[] = readMat3x3(mFinal) - const tShift: number[] = [1, 0, -xMin, 0, 1, -yMin, 0, 0, 1] - const mOutData: number[] = mul3x3(tShift, mData) - const mOut = cv.matFromArray(3, 3, cv.CV_64FC1, mOutData) - - // ================================================================ - // STEP 6 — Warp - // ================================================================ - const dstMat = new cv.Mat() - cv.warpPerspective( - src, - dstMat, - mOut, - new cv.Size(outW, outH), - cv.INTER_LANCZOS4 as number, - cv.BORDER_CONSTANT as number, - new cv.Scalar(0, 0, 0, 0), - ) - - const outCanvas = document.createElement("canvas") - outCanvas.width = outW - outCanvas.height = outH - cv.imshow(outCanvas, dstMat) - - // Cleanup OpenCV mats - src.delete() - srcPts.delete() - dstInit.delete() - mInit.delete() - dstFinal.delete() - mFinal.delete() - mOut.delete() - dstMat.delete() - - const blob = await canvasToBlob(outCanvas, "image/png", 0.95) - - const diagnostics: DeskewDiagnostics = { - primaryDatum: primary.label, - xCorrection: xCorr, - yCorrection: yCorr, - perDatum: reports, - outputWidthPx: outW, - outputHeightPx: outH, - } - - return { correctedImageBlob: blob, diagnostics } } // ─── OpenCV init ──────────────────────────────────────────────────────────── +let cvReady = false + /** Wait for OpenCV WASM to initialize. Call once at app startup. */ export function waitForOpenCV(): Promise { + log("opencv", "waitForOpenCV called, cvReady=" + String(cvReady)) return new Promise((resolve) => { - // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition - if (cv.Mat) { + if (cvReady) { + log("opencv", "already ready") resolve() return } + + // Test if WASM is actually functional by trying to create a mat + try { + log("opencv", "probing cv.Mat()...") + const test = new cv.Mat() + test.delete() + cvReady = true + log("opencv", "probe succeeded, WASM ready") + resolve() + return + } catch { + log("opencv", "probe failed, waiting for onRuntimeInitialized") + // Not ready yet, wait for callback + } + cv.onRuntimeInitialized = () => { + cvReady = true + log("opencv", "onRuntimeInitialized fired, WASM ready") resolve() } }) diff --git a/src/types/index.ts b/src/types/index.ts index f9dd843..2727c54 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -49,6 +49,8 @@ export interface DeskewInput { exif: ExifData /** Output pixels per mm. */ scalePxPerMm: number + /** Called with (stepIndex 0-based, totalSteps, stepLabel) */ + onProgress?: (step: number, total: number, label: string) => void } export interface AxisCorrection {