feat(deskew): add debug logging, progress callbacks, and WASM safety

- Add step-by-step console logging throughout the algorithm
- Add onProgress callback for UI progress bar integration
- Fix WASM OOM: clamp output dimensions with proper matrix scaling
  (previously clamped size but not the transform, causing cropping)
- Fix waitForOpenCV race condition: probe cv.Mat() instead of
  checking constructor existence
- Wrap all OpenCV mats in try/finally for guaranteed cleanup
- Raise MAX_OUTPUT_DIM to 12288 for more leniency

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Samuel Prevost 2026-04-14 23:19:16 +02:00
parent 3e0284da4c
commit 1bc1f46bb8
2 changed files with 328 additions and 199 deletions

View File

@ -1,9 +1,8 @@
/** /**
* deskew.ts Browser-based perspective correction using OpenCV.js (WASM) * deskew.ts Browser-based perspective correction using OpenCV.js (WASM)
* *
* Adapted from the reference algorithm. Accepts N datums (rectangles and/or * Accepts N datums (rectangles and/or lines), each with known real-world
* lines), each with known real-world dimensions and a confidence score (15). * dimensions and a confidence score (15). Minimum: one rectangle.
* Minimum: one rectangle.
* *
* Algorithm: * Algorithm:
* 1. Pick the highest-confidence rectangle as primary reference. * 1. Pick the highest-confidence rectangle as primary reference.
@ -27,6 +26,10 @@ import type {
RectDatum, RectDatum,
} from "@/types" } from "@/types"
// Max output dimension in pixels to avoid WASM OOM
// 12288 = ~576MB RGBA at square, but actual images are rarely square
const MAX_OUTPUT_DIM = 12288
// ─── OpenCV helpers ────────────────────────────────────────────────────────── // ─── OpenCV helpers ──────────────────────────────────────────────────────────
function pointsToMat(points: Point[]): InstanceType<typeof cv.Mat> { function pointsToMat(points: Point[]): InstanceType<typeof cv.Mat> {
@ -75,7 +78,8 @@ function mul3x3(A: number[], B: number[]): number[] {
for (let c = 0; c < 3; c++) { for (let c = 0; c < 3; c++) {
let sum = 0 let sum = 0
for (let k = 0; k < 3; k++) { for (let k = 0; k < 3; k++) {
sum += (A[r * 3 + k] ?? 0) * (B[k * 3 + c] ?? 0) sum +=
(A[r * 3 + k] ?? 0) * (B[k * 3 + c] ?? 0)
} }
R[r * 3 + c] = sum R[r * 3 + c] = sum
} }
@ -86,17 +90,20 @@ function mul3x3(A: number[], B: number[]): number[] {
// ─── Validation ────────────────────────────────────────────────────────────── // ─── Validation ──────────────────────────────────────────────────────────────
function pickPrimary(datums: Datum[]): RectDatum { function pickPrimary(datums: Datum[]): RectDatum {
const rects = datums.filter((d): d is RectDatum => d.type === "rectangle") const rects = datums.filter(
(d): d is RectDatum => d.type === "rectangle",
)
if (rects.length === 0) { if (rects.length === 0) {
throw new Error( throw new Error(
"At least one rectangle datum is required for perspective correction.", "At least one rectangle datum is required for perspective correction.",
) )
} }
// Highest confidence; tie-break by pixel area (larger = more precise corners)
rects.sort((a, b) => { rects.sort((a, b) => {
if (b.confidence !== a.confidence) return b.confidence - a.confidence if (b.confidence !== a.confidence)
return b.confidence - a.confidence
const area = (r: RectDatum) => const area = (r: RectDatum) =>
dist(r.corners[0], r.corners[1]) * dist(r.corners[0], r.corners[3]) dist(r.corners[0], r.corners[1]) *
dist(r.corners[0], r.corners[3])
return area(b) - area(a) return area(b) - area(a)
}) })
return rects[0] as RectDatum return rects[0] as RectDatum
@ -109,7 +116,6 @@ function pickPrimary(datums: Datum[]): RectDatum {
function cornersToAlgoOrder( function cornersToAlgoOrder(
corners: [Point, Point, Point, Point], corners: [Point, Point, Point, Point],
): [Point, Point, Point, Point] { ): [Point, Point, Point, Point] {
// App: [TL, TR, BR, BL] → Algo: [TL, TR, BL, BR]
return [corners[0], corners[1], corners[3], corners[2]] return [corners[0], corners[1], corners[3], corners[2]]
} }
@ -123,11 +129,8 @@ function canvasToBlob(
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
canvas.toBlob( canvas.toBlob(
(b) => { (b) => {
if (b) { if (b) resolve(b)
resolve(b) else reject(new Error("toBlob failed"))
} else {
reject(new Error("toBlob failed"))
}
}, },
type, type,
quality, quality,
@ -137,47 +140,91 @@ function canvasToBlob(
// ─── Core ──────────────────────────────────────────────────────────────────── // ─── Core ────────────────────────────────────────────────────────────────────
export async function deskewImage(input: DeskewInput): Promise<DeskewResult> { const log = (tag: string, ...args: unknown[]) => {
const { image, datums, scalePxPerMm: scale } = input console.log(`[deskew:${tag}]`, ...args)
}
export async function deskewImage(
input: DeskewInput,
): Promise<DeskewResult> {
const { image, datums, scalePxPerMm: scale, onProgress } = input
log("start", `${String(datums.length)} datums, scale=${String(scale)} px/mm`)
const TOTAL_STEPS = 7
const progress = async (step: number, label: string) => {
log(`progress`, `[${String(step + 1)}/${String(TOTAL_STEPS)}] ${label}`)
onProgress?.(step, TOTAL_STEPS, label)
// Yield to let the browser repaint
await new Promise((r) => {
requestAnimationFrame(r)
})
}
if (datums.length === 0) throw new Error("No datums provided.") if (datums.length === 0) throw new Error("No datums provided.")
const primary = pickPrimary(datums) const primary = pickPrimary(datums)
log("primary", primary.label, `${String(primary.widthMm)}×${String(primary.heightMm)}mm`, `conf=${String(primary.confidence)}`)
// Load source image into OpenCV // Load source image into OpenCV
let srcCanvas: HTMLCanvasElement let srcCanvas: HTMLCanvasElement
if (image instanceof HTMLCanvasElement) { if (image instanceof HTMLCanvasElement) {
srcCanvas = image srcCanvas = image
log("input", `canvas ${String(image.width)}×${String(image.height)}`)
} else { } else {
srcCanvas = document.createElement("canvas") srcCanvas = document.createElement("canvas")
srcCanvas.width = image.naturalWidth srcCanvas.width = image.naturalWidth
srcCanvas.height = image.naturalHeight srcCanvas.height = image.naturalHeight
log("input", `img ${String(image.naturalWidth)}×${String(image.naturalHeight)}, drawing to canvas`)
const ctx = srcCanvas.getContext("2d") const ctx = srcCanvas.getContext("2d")
if (!ctx) throw new Error("Failed to get 2d context") if (!ctx) throw new Error("Failed to get 2d context")
ctx.drawImage(image, 0, 0) ctx.drawImage(image, 0, 0)
} }
const src = cv.imread(srcCanvas)
await progress(0, "Loading image into OpenCV")
// All OpenCV mats to clean up
const mats: InstanceType<typeof cv.Mat>[] = []
const track = <T extends InstanceType<typeof cv.Mat>>(m: T): T => {
mats.push(m)
return m
}
try {
log("cv.imread", "reading source canvas into cv.Mat")
const src = track(cv.imread(srcCanvas))
const imgW = src.cols const imgW = src.cols
const imgH = src.rows const imgH = src.rows
log("cv.imread", `done: ${String(imgW)}×${String(imgH)}, type=${String(src.type())}, channels=${String(src.channels())}`)
// ================================================================ // ============================================================
// STEP 1 — Initial perspective correction from primary rectangle // STEP 1 — Initial perspective correction from primary rect
// ================================================================ // ============================================================
await progress(1, "Computing initial homography")
const pw = primary.widthMm * scale const pw = primary.widthMm * scale
const ph = primary.heightMm * scale const ph = primary.heightMm * scale
log("step1", `dest rect: ${pw.toFixed(1)}×${ph.toFixed(1)} px`)
const algoCorners = cornersToAlgoOrder(primary.corners) const algoCorners = cornersToAlgoOrder(primary.corners)
const srcPts = pointsToMat(algoCorners) log("step1", `corners (algo order): ${JSON.stringify(algoCorners)}`)
const dstInit = pointsToMat([ const srcPts = track(pointsToMat(algoCorners))
const dstInit = track(
pointsToMat([
{ x: 0, y: 0 }, { x: 0, y: 0 },
{ x: pw, y: 0 }, { x: pw, y: 0 },
{ x: 0, y: ph }, { x: 0, y: ph },
{ x: pw, y: ph }, { x: pw, y: ph },
]) ]),
const mInit = cv.getPerspectiveTransform(srcPts, dstInit) )
log("step1", "calling getPerspectiveTransform (initial)")
const mInit = track(
cv.getPerspectiveTransform(srcPts, dstInit),
)
log("step1", `mInit type=${String(mInit.type())}, rows=${String(mInit.rows)}, cols=${String(mInit.cols)}`)
// ================================================================ // ============================================================
// STEP 2 — Measure all secondary datums, accumulate corrections // STEP 2 — Measure secondary datums, accumulate corrections
// ================================================================ // ============================================================
await progress(2, "Measuring secondary datums")
let xWSum = 0, let xWSum = 0,
xWTotal = 0 xWTotal = 0
let yWSum = 0, let yWSum = 0,
@ -200,7 +247,12 @@ export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
} }
if (datum.type === "line") { if (datum.type === "line") {
const [s, e] = transformPoints(datum.endpoints as Point[], mInit) const pts = transformPoints(
datum.endpoints as Point[],
mInit,
)
const s = pts[0]
const e = pts[1]
if (!s || !e) continue if (!s || !e) continue
const dx = Math.abs(e.x - s.x) const dx = Math.abs(e.x - s.x)
const dy = Math.abs(e.y - s.y) const dy = Math.abs(e.y - s.y)
@ -208,7 +260,6 @@ export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
const expected = datum.lengthMm * scale const expected = datum.lengthMm * scale
const ratio = expected / measured const ratio = expected / measured
// Axis contribution proportional to alignment
const total = dx + dy const total = dx + dy
if (total > 1e-6) { if (total > 1e-6) {
const xFrac = dx / total const xFrac = dx / total
@ -228,9 +279,14 @@ export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
axisContribution: dx > dy ? "x" : "y", axisContribution: dx > dy ? "x" : "y",
}) })
} else { } else {
// Secondary rectangle: top edge → X, left edge → Y
const ac = cornersToAlgoOrder(datum.corners) const ac = cornersToAlgoOrder(datum.corners)
const [tl, tr, bl] = transformPoints([ac[0], ac[1], ac[2]], mInit) const pts = transformPoints(
[ac[0], ac[1], ac[2]],
mInit,
)
const tl = pts[0]
const tr = pts[1]
const bl = pts[2]
if (!tl || !tr || !bl) continue if (!tl || !tr || !bl) continue
const mW = dist(tl, tr) const mW = dist(tl, tr)
const mH = dist(tl, bl) const mH = dist(tl, bl)
@ -247,15 +303,17 @@ export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
type: "rectangle", type: "rectangle",
measuredMm: mW / scale, measuredMm: mW / scale,
expectedMm: datum.widthMm, expectedMm: datum.widthMm,
errorPercent: (Math.abs(1 - xR) + Math.abs(1 - yR)) * 50, errorPercent:
(Math.abs(1 - xR) + Math.abs(1 - yR)) * 50,
axisContribution: "both", axisContribution: "both",
}) })
} }
} }
// ================================================================ // ============================================================
// STEP 3 — Weighted corrections (1.0 = no secondary data) // STEP 3 — Weighted corrections (1.0 = no secondary data)
// ================================================================ // ============================================================
await progress(3, "Computing axis corrections")
const xCorr: AxisCorrection = { const xCorr: AxisCorrection = {
ratio: xWTotal > 0 ? xWSum / xWTotal : 1.0, ratio: xWTotal > 0 ? xWSum / xWTotal : 1.0,
totalWeight: xWTotal, totalWeight: xWTotal,
@ -264,24 +322,34 @@ export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
ratio: yWTotal > 0 ? yWSum / yWTotal : 1.0, ratio: yWTotal > 0 ? yWSum / yWTotal : 1.0,
totalWeight: yWTotal, totalWeight: yWTotal,
} }
log("step3", `xCorr=${xCorr.ratio.toFixed(4)} (w=${xCorr.totalWeight.toFixed(1)}), yCorr=${yCorr.ratio.toFixed(4)} (w=${yCorr.totalWeight.toFixed(1)})`)
// ================================================================ // ============================================================
// STEP 4 — Fold into destination rectangle, recompute transform // STEP 4 — Fold corrections, recompute transform
// ================================================================ // ============================================================
await progress(4, "Recomputing final transform")
const pwFinal = pw * xCorr.ratio const pwFinal = pw * xCorr.ratio
const phFinal = ph * yCorr.ratio const phFinal = ph * yCorr.ratio
log("step4", `final dest rect: ${pwFinal.toFixed(1)}×${phFinal.toFixed(1)} px`)
const dstFinal = pointsToMat([ const dstFinal = track(
pointsToMat([
{ x: 0, y: 0 }, { x: 0, y: 0 },
{ x: pwFinal, y: 0 }, { x: pwFinal, y: 0 },
{ x: 0, y: phFinal }, { x: 0, y: phFinal },
{ x: pwFinal, y: phFinal }, { x: pwFinal, y: phFinal },
]) ]),
const mFinal = cv.getPerspectiveTransform(srcPts, dstFinal) )
log("step4", "calling getPerspectiveTransform (final)")
const mFinal = track(
cv.getPerspectiveTransform(srcPts, dstFinal),
)
log("step4", `mFinal type=${String(mFinal.type())}, rows=${String(mFinal.rows)}, cols=${String(mFinal.cols)}`)
// ================================================================ // ============================================================
// STEP 5 — Output bounds + translation shift // STEP 5 — Output bounds + translation shift
// ================================================================ // ============================================================
await progress(5, "Computing output bounds")
const imgCorners: Point[] = [ const imgCorners: Point[] = [
{ x: 0, y: 0 }, { x: 0, y: 0 },
{ x: imgW, y: 0 }, { x: imgW, y: 0 },
@ -289,6 +357,12 @@ export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
{ x: imgW, y: imgH }, { x: imgW, y: imgH },
] ]
const warped = transformPoints(imgCorners, mFinal) const warped = transformPoints(imgCorners, mFinal)
if (warped.length < 4) {
throw new Error(
"Perspective transform produced invalid bounds",
)
}
let xMin = Infinity, let xMin = Infinity,
yMin = Infinity, yMin = Infinity,
xMax = -Infinity, xMax = -Infinity,
@ -300,18 +374,45 @@ export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
yMax = Math.max(yMax, c.y) yMax = Math.max(yMax, c.y)
} }
const outW = Math.ceil(xMax - xMin) let outW = Math.ceil(xMax - xMin)
const outH = Math.ceil(yMax - yMin) let outH = Math.ceil(yMax - yMin)
log("step5", `bounds: x=[${xMin.toFixed(1)}, ${xMax.toFixed(1)}], y=[${yMin.toFixed(1)}, ${yMax.toFixed(1)}]`)
log("step5", `raw output: ${String(outW)}×${String(outH)} px`)
// Guard against absurd output sizes that crash WASM
if (outW <= 0 || outH <= 0) {
throw new Error(
`Invalid output dimensions: ${String(outW)}×${String(outH)}`,
)
}
let downscale = 1
if (outW > MAX_OUTPUT_DIM || outH > MAX_OUTPUT_DIM) {
downscale = MAX_OUTPUT_DIM / Math.max(outW, outH)
log("step5", `CLAMPING from ${String(outW)}×${String(outH)} by factor ${downscale.toFixed(4)}`)
outW = Math.ceil(outW * downscale)
outH = Math.ceil(outH * downscale)
}
log("step5", `final output: ${String(outW)}×${String(outH)} px (${String(Math.round(outW * outH * 4 / 1024 / 1024))} MB RGBA)`)
const mData: number[] = readMat3x3(mFinal) const mData: number[] = readMat3x3(mFinal)
const tShift: number[] = [1, 0, -xMin, 0, 1, -yMin, 0, 0, 1] // Translate so the top-left warped corner is at (0,0),
// then scale down if we clamped the output size.
const tShift: number[] = [
downscale, 0, -xMin * downscale,
0, downscale, -yMin * downscale,
0, 0, 1,
]
const mOutData: number[] = mul3x3(tShift, mData) const mOutData: number[] = mul3x3(tShift, mData)
const mOut = cv.matFromArray(3, 3, cv.CV_64FC1, mOutData) const mOut = track(
cv.matFromArray(3, 3, cv.CV_64FC1, mOutData),
)
// ================================================================ // ============================================================
// STEP 6 — Warp // STEP 6 — Warp
// ================================================================ // ============================================================
const dstMat = new cv.Mat() await progress(6, "Warping image (this may take a moment)")
log("step6", "calling warpPerspective...")
const dstMat = track(new cv.Mat())
cv.warpPerspective( cv.warpPerspective(
src, src,
dstMat, dstMat,
@ -322,22 +423,17 @@ export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
new cv.Scalar(0, 0, 0, 0), new cv.Scalar(0, 0, 0, 0),
) )
log("step6", `warpPerspective done, dstMat: ${String(dstMat.cols)}×${String(dstMat.rows)}, type=${String(dstMat.type())}`)
log("export", "cv.imshow to canvas")
const outCanvas = document.createElement("canvas") const outCanvas = document.createElement("canvas")
outCanvas.width = outW outCanvas.width = outW
outCanvas.height = outH outCanvas.height = outH
cv.imshow(outCanvas, dstMat) cv.imshow(outCanvas, dstMat)
// Cleanup OpenCV mats log("export", "canvas.toBlob (PNG)")
src.delete()
srcPts.delete()
dstInit.delete()
mInit.delete()
dstFinal.delete()
mFinal.delete()
mOut.delete()
dstMat.delete()
const blob = await canvasToBlob(outCanvas, "image/png", 0.95) const blob = await canvasToBlob(outCanvas, "image/png", 0.95)
log("export", `blob size: ${String(Math.round(blob.size / 1024))} KB`)
const diagnostics: DeskewDiagnostics = { const diagnostics: DeskewDiagnostics = {
primaryDatum: primary.label, primaryDatum: primary.label,
@ -348,20 +444,51 @@ export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
outputHeightPx: outH, outputHeightPx: outH,
} }
log("done", "success")
return { correctedImageBlob: blob, diagnostics } return { correctedImageBlob: blob, diagnostics }
} finally {
// Always clean up all OpenCV mats, even on error
for (const m of mats) {
try {
m.delete()
} catch {
// already deleted or invalid — ignore
}
}
}
} }
// ─── OpenCV init ──────────────────────────────────────────────────────────── // ─── OpenCV init ────────────────────────────────────────────────────────────
let cvReady = false
/** Wait for OpenCV WASM to initialize. Call once at app startup. */ /** Wait for OpenCV WASM to initialize. Call once at app startup. */
export function waitForOpenCV(): Promise<void> { export function waitForOpenCV(): Promise<void> {
log("opencv", "waitForOpenCV called, cvReady=" + String(cvReady))
return new Promise<void>((resolve) => { return new Promise<void>((resolve) => {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition if (cvReady) {
if (cv.Mat) { log("opencv", "already ready")
resolve() resolve()
return return
} }
// Test if WASM is actually functional by trying to create a mat
try {
log("opencv", "probing cv.Mat()...")
const test = new cv.Mat()
test.delete()
cvReady = true
log("opencv", "probe succeeded, WASM ready")
resolve()
return
} catch {
log("opencv", "probe failed, waiting for onRuntimeInitialized")
// Not ready yet, wait for callback
}
cv.onRuntimeInitialized = () => { cv.onRuntimeInitialized = () => {
cvReady = true
log("opencv", "onRuntimeInitialized fired, WASM ready")
resolve() resolve()
} }
}) })

View File

@ -49,6 +49,8 @@ export interface DeskewInput {
exif: ExifData exif: ExifData
/** Output pixels per mm. */ /** Output pixels per mm. */
scalePxPerMm: number scalePxPerMm: number
/** Called with (stepIndex 0-based, totalSteps, stepLabel) */
onProgress?: (step: number, total: number, label: string) => void
} }
export interface AxisCorrection { export interface AxisCorrection {