feat(deskew): add debug logging, progress callbacks, and WASM safety

- Add step-by-step console logging throughout the algorithm
- Add onProgress callback for UI progress bar integration
- Fix WASM OOM: clamp output dimensions with proper matrix scaling
  (previously clamped size but not the transform, causing cropping)
- Fix waitForOpenCV race condition: probe cv.Mat() instead of
  checking constructor existence
- Wrap all OpenCV mats in try/finally for guaranteed cleanup
- Raise MAX_OUTPUT_DIM to 12288 for more leniency

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Samuel Prevost 2026-04-14 23:19:16 +02:00
parent 3e0284da4c
commit 1bc1f46bb8
2 changed files with 328 additions and 199 deletions

View File

@ -1,9 +1,8 @@
/**
* deskew.ts Browser-based perspective correction using OpenCV.js (WASM)
*
* Adapted from the reference algorithm. Accepts N datums (rectangles and/or
* lines), each with known real-world dimensions and a confidence score (15).
* Minimum: one rectangle.
* Accepts N datums (rectangles and/or lines), each with known real-world
* dimensions and a confidence score (15). Minimum: one rectangle.
*
* Algorithm:
* 1. Pick the highest-confidence rectangle as primary reference.
@ -27,6 +26,10 @@ import type {
RectDatum,
} from "@/types"
// Max output dimension in pixels to avoid WASM OOM
// 12288 = ~576MB RGBA at square, but actual images are rarely square
const MAX_OUTPUT_DIM = 12288
// ─── OpenCV helpers ──────────────────────────────────────────────────────────
function pointsToMat(points: Point[]): InstanceType<typeof cv.Mat> {
@ -75,7 +78,8 @@ function mul3x3(A: number[], B: number[]): number[] {
for (let c = 0; c < 3; c++) {
let sum = 0
for (let k = 0; k < 3; k++) {
sum += (A[r * 3 + k] ?? 0) * (B[k * 3 + c] ?? 0)
sum +=
(A[r * 3 + k] ?? 0) * (B[k * 3 + c] ?? 0)
}
R[r * 3 + c] = sum
}
@ -86,17 +90,20 @@ function mul3x3(A: number[], B: number[]): number[] {
// ─── Validation ──────────────────────────────────────────────────────────────
function pickPrimary(datums: Datum[]): RectDatum {
const rects = datums.filter((d): d is RectDatum => d.type === "rectangle")
const rects = datums.filter(
(d): d is RectDatum => d.type === "rectangle",
)
if (rects.length === 0) {
throw new Error(
"At least one rectangle datum is required for perspective correction.",
)
}
// Highest confidence; tie-break by pixel area (larger = more precise corners)
rects.sort((a, b) => {
if (b.confidence !== a.confidence) return b.confidence - a.confidence
if (b.confidence !== a.confidence)
return b.confidence - a.confidence
const area = (r: RectDatum) =>
dist(r.corners[0], r.corners[1]) * dist(r.corners[0], r.corners[3])
dist(r.corners[0], r.corners[1]) *
dist(r.corners[0], r.corners[3])
return area(b) - area(a)
})
return rects[0] as RectDatum
@ -109,7 +116,6 @@ function pickPrimary(datums: Datum[]): RectDatum {
function cornersToAlgoOrder(
corners: [Point, Point, Point, Point],
): [Point, Point, Point, Point] {
// App: [TL, TR, BR, BL] → Algo: [TL, TR, BL, BR]
return [corners[0], corners[1], corners[3], corners[2]]
}
@ -123,11 +129,8 @@ function canvasToBlob(
return new Promise((resolve, reject) => {
canvas.toBlob(
(b) => {
if (b) {
resolve(b)
} else {
reject(new Error("toBlob failed"))
}
if (b) resolve(b)
else reject(new Error("toBlob failed"))
},
type,
quality,
@ -137,231 +140,355 @@ function canvasToBlob(
// ─── Core ────────────────────────────────────────────────────────────────────
export async function deskewImage(input: DeskewInput): Promise<DeskewResult> {
const { image, datums, scalePxPerMm: scale } = input
const log = (tag: string, ...args: unknown[]) => {
console.log(`[deskew:${tag}]`, ...args)
}
export async function deskewImage(
input: DeskewInput,
): Promise<DeskewResult> {
const { image, datums, scalePxPerMm: scale, onProgress } = input
log("start", `${String(datums.length)} datums, scale=${String(scale)} px/mm`)
const TOTAL_STEPS = 7
const progress = async (step: number, label: string) => {
log(`progress`, `[${String(step + 1)}/${String(TOTAL_STEPS)}] ${label}`)
onProgress?.(step, TOTAL_STEPS, label)
// Yield to let the browser repaint
await new Promise((r) => {
requestAnimationFrame(r)
})
}
if (datums.length === 0) throw new Error("No datums provided.")
const primary = pickPrimary(datums)
log("primary", primary.label, `${String(primary.widthMm)}×${String(primary.heightMm)}mm`, `conf=${String(primary.confidence)}`)
// Load source image into OpenCV
let srcCanvas: HTMLCanvasElement
if (image instanceof HTMLCanvasElement) {
srcCanvas = image
log("input", `canvas ${String(image.width)}×${String(image.height)}`)
} else {
srcCanvas = document.createElement("canvas")
srcCanvas.width = image.naturalWidth
srcCanvas.height = image.naturalHeight
log("input", `img ${String(image.naturalWidth)}×${String(image.naturalHeight)}, drawing to canvas`)
const ctx = srcCanvas.getContext("2d")
if (!ctx) throw new Error("Failed to get 2d context")
ctx.drawImage(image, 0, 0)
}
const src = cv.imread(srcCanvas)
const imgW = src.cols
const imgH = src.rows
// ================================================================
// STEP 1 — Initial perspective correction from primary rectangle
// ================================================================
const pw = primary.widthMm * scale
const ph = primary.heightMm * scale
await progress(0, "Loading image into OpenCV")
const algoCorners = cornersToAlgoOrder(primary.corners)
const srcPts = pointsToMat(algoCorners)
const dstInit = pointsToMat([
{ x: 0, y: 0 },
{ x: pw, y: 0 },
{ x: 0, y: ph },
{ x: pw, y: ph },
])
const mInit = cv.getPerspectiveTransform(srcPts, dstInit)
// All OpenCV mats to clean up
const mats: InstanceType<typeof cv.Mat>[] = []
const track = <T extends InstanceType<typeof cv.Mat>>(m: T): T => {
mats.push(m)
return m
}
// ================================================================
// STEP 2 — Measure all secondary datums, accumulate corrections
// ================================================================
let xWSum = 0,
xWTotal = 0
let yWSum = 0,
yWTotal = 0
const reports: DatumReport[] = []
try {
log("cv.imread", "reading source canvas into cv.Mat")
const src = track(cv.imread(srcCanvas))
const imgW = src.cols
const imgH = src.rows
log("cv.imread", `done: ${String(imgW)}×${String(imgH)}, type=${String(src.type())}, channels=${String(src.channels())}`)
for (const datum of datums) {
const w = datum.confidence
// ============================================================
// STEP 1 — Initial perspective correction from primary rect
// ============================================================
await progress(1, "Computing initial homography")
const pw = primary.widthMm * scale
const ph = primary.heightMm * scale
log("step1", `dest rect: ${pw.toFixed(1)}×${ph.toFixed(1)} px`)
if (datum === primary) {
reports.push({
label: datum.label,
type: "rectangle",
measuredMm: datum.widthMm,
expectedMm: datum.widthMm,
errorPercent: 0,
axisContribution: "both",
})
continue
}
const algoCorners = cornersToAlgoOrder(primary.corners)
log("step1", `corners (algo order): ${JSON.stringify(algoCorners)}`)
const srcPts = track(pointsToMat(algoCorners))
if (datum.type === "line") {
const [s, e] = transformPoints(datum.endpoints as Point[], mInit)
if (!s || !e) continue
const dx = Math.abs(e.x - s.x)
const dy = Math.abs(e.y - s.y)
const measured = dist(s, e)
const expected = datum.lengthMm * scale
const ratio = expected / measured
const dstInit = track(
pointsToMat([
{ x: 0, y: 0 },
{ x: pw, y: 0 },
{ x: 0, y: ph },
{ x: pw, y: ph },
]),
)
log("step1", "calling getPerspectiveTransform (initial)")
const mInit = track(
cv.getPerspectiveTransform(srcPts, dstInit),
)
log("step1", `mInit type=${String(mInit.type())}, rows=${String(mInit.rows)}, cols=${String(mInit.cols)}`)
// Axis contribution proportional to alignment
const total = dx + dy
if (total > 1e-6) {
const xFrac = dx / total
const yFrac = dy / total
xWSum += ratio * w * xFrac
xWTotal += w * xFrac
yWSum += ratio * w * yFrac
yWTotal += w * yFrac
// ============================================================
// STEP 2 — Measure secondary datums, accumulate corrections
// ============================================================
await progress(2, "Measuring secondary datums")
let xWSum = 0,
xWTotal = 0
let yWSum = 0,
yWTotal = 0
const reports: DatumReport[] = []
for (const datum of datums) {
const w = datum.confidence
if (datum === primary) {
reports.push({
label: datum.label,
type: "rectangle",
measuredMm: datum.widthMm,
expectedMm: datum.widthMm,
errorPercent: 0,
axisContribution: "both",
})
continue
}
reports.push({
label: datum.label,
type: "line",
measuredMm: measured / scale,
expectedMm: datum.lengthMm,
errorPercent: Math.abs(1 - ratio) * 100,
axisContribution: dx > dy ? "x" : "y",
})
} else {
// Secondary rectangle: top edge → X, left edge → Y
const ac = cornersToAlgoOrder(datum.corners)
const [tl, tr, bl] = transformPoints([ac[0], ac[1], ac[2]], mInit)
if (!tl || !tr || !bl) continue
const mW = dist(tl, tr)
const mH = dist(tl, bl)
const xR = (datum.widthMm * scale) / mW
const yR = (datum.heightMm * scale) / mH
if (datum.type === "line") {
const pts = transformPoints(
datum.endpoints as Point[],
mInit,
)
const s = pts[0]
const e = pts[1]
if (!s || !e) continue
const dx = Math.abs(e.x - s.x)
const dy = Math.abs(e.y - s.y)
const measured = dist(s, e)
const expected = datum.lengthMm * scale
const ratio = expected / measured
xWSum += xR * w
xWTotal += w
yWSum += yR * w
yWTotal += w
const total = dx + dy
if (total > 1e-6) {
const xFrac = dx / total
const yFrac = dy / total
xWSum += ratio * w * xFrac
xWTotal += w * xFrac
yWSum += ratio * w * yFrac
yWTotal += w * yFrac
}
reports.push({
label: datum.label,
type: "rectangle",
measuredMm: mW / scale,
expectedMm: datum.widthMm,
errorPercent: (Math.abs(1 - xR) + Math.abs(1 - yR)) * 50,
axisContribution: "both",
})
reports.push({
label: datum.label,
type: "line",
measuredMm: measured / scale,
expectedMm: datum.lengthMm,
errorPercent: Math.abs(1 - ratio) * 100,
axisContribution: dx > dy ? "x" : "y",
})
} else {
const ac = cornersToAlgoOrder(datum.corners)
const pts = transformPoints(
[ac[0], ac[1], ac[2]],
mInit,
)
const tl = pts[0]
const tr = pts[1]
const bl = pts[2]
if (!tl || !tr || !bl) continue
const mW = dist(tl, tr)
const mH = dist(tl, bl)
const xR = (datum.widthMm * scale) / mW
const yR = (datum.heightMm * scale) / mH
xWSum += xR * w
xWTotal += w
yWSum += yR * w
yWTotal += w
reports.push({
label: datum.label,
type: "rectangle",
measuredMm: mW / scale,
expectedMm: datum.widthMm,
errorPercent:
(Math.abs(1 - xR) + Math.abs(1 - yR)) * 50,
axisContribution: "both",
})
}
}
// ============================================================
// STEP 3 — Weighted corrections (1.0 = no secondary data)
// ============================================================
await progress(3, "Computing axis corrections")
const xCorr: AxisCorrection = {
ratio: xWTotal > 0 ? xWSum / xWTotal : 1.0,
totalWeight: xWTotal,
}
const yCorr: AxisCorrection = {
ratio: yWTotal > 0 ? yWSum / yWTotal : 1.0,
totalWeight: yWTotal,
}
log("step3", `xCorr=${xCorr.ratio.toFixed(4)} (w=${xCorr.totalWeight.toFixed(1)}), yCorr=${yCorr.ratio.toFixed(4)} (w=${yCorr.totalWeight.toFixed(1)})`)
// ============================================================
// STEP 4 — Fold corrections, recompute transform
// ============================================================
await progress(4, "Recomputing final transform")
const pwFinal = pw * xCorr.ratio
const phFinal = ph * yCorr.ratio
log("step4", `final dest rect: ${pwFinal.toFixed(1)}×${phFinal.toFixed(1)} px`)
const dstFinal = track(
pointsToMat([
{ x: 0, y: 0 },
{ x: pwFinal, y: 0 },
{ x: 0, y: phFinal },
{ x: pwFinal, y: phFinal },
]),
)
log("step4", "calling getPerspectiveTransform (final)")
const mFinal = track(
cv.getPerspectiveTransform(srcPts, dstFinal),
)
log("step4", `mFinal type=${String(mFinal.type())}, rows=${String(mFinal.rows)}, cols=${String(mFinal.cols)}`)
// ============================================================
// STEP 5 — Output bounds + translation shift
// ============================================================
await progress(5, "Computing output bounds")
const imgCorners: Point[] = [
{ x: 0, y: 0 },
{ x: imgW, y: 0 },
{ x: 0, y: imgH },
{ x: imgW, y: imgH },
]
const warped = transformPoints(imgCorners, mFinal)
if (warped.length < 4) {
throw new Error(
"Perspective transform produced invalid bounds",
)
}
let xMin = Infinity,
yMin = Infinity,
xMax = -Infinity,
yMax = -Infinity
for (const c of warped) {
xMin = Math.min(xMin, c.x)
yMin = Math.min(yMin, c.y)
xMax = Math.max(xMax, c.x)
yMax = Math.max(yMax, c.y)
}
let outW = Math.ceil(xMax - xMin)
let outH = Math.ceil(yMax - yMin)
log("step5", `bounds: x=[${xMin.toFixed(1)}, ${xMax.toFixed(1)}], y=[${yMin.toFixed(1)}, ${yMax.toFixed(1)}]`)
log("step5", `raw output: ${String(outW)}×${String(outH)} px`)
// Guard against absurd output sizes that crash WASM
if (outW <= 0 || outH <= 0) {
throw new Error(
`Invalid output dimensions: ${String(outW)}×${String(outH)}`,
)
}
let downscale = 1
if (outW > MAX_OUTPUT_DIM || outH > MAX_OUTPUT_DIM) {
downscale = MAX_OUTPUT_DIM / Math.max(outW, outH)
log("step5", `CLAMPING from ${String(outW)}×${String(outH)} by factor ${downscale.toFixed(4)}`)
outW = Math.ceil(outW * downscale)
outH = Math.ceil(outH * downscale)
}
log("step5", `final output: ${String(outW)}×${String(outH)} px (${String(Math.round(outW * outH * 4 / 1024 / 1024))} MB RGBA)`)
const mData: number[] = readMat3x3(mFinal)
// Translate so the top-left warped corner is at (0,0),
// then scale down if we clamped the output size.
const tShift: number[] = [
downscale, 0, -xMin * downscale,
0, downscale, -yMin * downscale,
0, 0, 1,
]
const mOutData: number[] = mul3x3(tShift, mData)
const mOut = track(
cv.matFromArray(3, 3, cv.CV_64FC1, mOutData),
)
// ============================================================
// STEP 6 — Warp
// ============================================================
await progress(6, "Warping image (this may take a moment)")
log("step6", "calling warpPerspective...")
const dstMat = track(new cv.Mat())
cv.warpPerspective(
src,
dstMat,
mOut,
new cv.Size(outW, outH),
cv.INTER_LANCZOS4 as number,
cv.BORDER_CONSTANT as number,
new cv.Scalar(0, 0, 0, 0),
)
log("step6", `warpPerspective done, dstMat: ${String(dstMat.cols)}×${String(dstMat.rows)}, type=${String(dstMat.type())}`)
log("export", "cv.imshow to canvas")
const outCanvas = document.createElement("canvas")
outCanvas.width = outW
outCanvas.height = outH
cv.imshow(outCanvas, dstMat)
log("export", "canvas.toBlob (PNG)")
const blob = await canvasToBlob(outCanvas, "image/png", 0.95)
log("export", `blob size: ${String(Math.round(blob.size / 1024))} KB`)
const diagnostics: DeskewDiagnostics = {
primaryDatum: primary.label,
xCorrection: xCorr,
yCorrection: yCorr,
perDatum: reports,
outputWidthPx: outW,
outputHeightPx: outH,
}
log("done", "success")
return { correctedImageBlob: blob, diagnostics }
} finally {
// Always clean up all OpenCV mats, even on error
for (const m of mats) {
try {
m.delete()
} catch {
// already deleted or invalid — ignore
}
}
}
// ================================================================
// STEP 3 — Weighted corrections (1.0 = no secondary data)
// ================================================================
const xCorr: AxisCorrection = {
ratio: xWTotal > 0 ? xWSum / xWTotal : 1.0,
totalWeight: xWTotal,
}
const yCorr: AxisCorrection = {
ratio: yWTotal > 0 ? yWSum / yWTotal : 1.0,
totalWeight: yWTotal,
}
// ================================================================
// STEP 4 — Fold into destination rectangle, recompute transform
// ================================================================
const pwFinal = pw * xCorr.ratio
const phFinal = ph * yCorr.ratio
const dstFinal = pointsToMat([
{ x: 0, y: 0 },
{ x: pwFinal, y: 0 },
{ x: 0, y: phFinal },
{ x: pwFinal, y: phFinal },
])
const mFinal = cv.getPerspectiveTransform(srcPts, dstFinal)
// ================================================================
// STEP 5 — Output bounds + translation shift
// ================================================================
const imgCorners: Point[] = [
{ x: 0, y: 0 },
{ x: imgW, y: 0 },
{ x: 0, y: imgH },
{ x: imgW, y: imgH },
]
const warped = transformPoints(imgCorners, mFinal)
let xMin = Infinity,
yMin = Infinity,
xMax = -Infinity,
yMax = -Infinity
for (const c of warped) {
xMin = Math.min(xMin, c.x)
yMin = Math.min(yMin, c.y)
xMax = Math.max(xMax, c.x)
yMax = Math.max(yMax, c.y)
}
const outW = Math.ceil(xMax - xMin)
const outH = Math.ceil(yMax - yMin)
const mData: number[] = readMat3x3(mFinal)
const tShift: number[] = [1, 0, -xMin, 0, 1, -yMin, 0, 0, 1]
const mOutData: number[] = mul3x3(tShift, mData)
const mOut = cv.matFromArray(3, 3, cv.CV_64FC1, mOutData)
// ================================================================
// STEP 6 — Warp
// ================================================================
const dstMat = new cv.Mat()
cv.warpPerspective(
src,
dstMat,
mOut,
new cv.Size(outW, outH),
cv.INTER_LANCZOS4 as number,
cv.BORDER_CONSTANT as number,
new cv.Scalar(0, 0, 0, 0),
)
const outCanvas = document.createElement("canvas")
outCanvas.width = outW
outCanvas.height = outH
cv.imshow(outCanvas, dstMat)
// Cleanup OpenCV mats
src.delete()
srcPts.delete()
dstInit.delete()
mInit.delete()
dstFinal.delete()
mFinal.delete()
mOut.delete()
dstMat.delete()
const blob = await canvasToBlob(outCanvas, "image/png", 0.95)
const diagnostics: DeskewDiagnostics = {
primaryDatum: primary.label,
xCorrection: xCorr,
yCorrection: yCorr,
perDatum: reports,
outputWidthPx: outW,
outputHeightPx: outH,
}
return { correctedImageBlob: blob, diagnostics }
}
// ─── OpenCV init ────────────────────────────────────────────────────────────
let cvReady = false
/** Wait for OpenCV WASM to initialize. Call once at app startup. */
export function waitForOpenCV(): Promise<void> {
log("opencv", "waitForOpenCV called, cvReady=" + String(cvReady))
return new Promise<void>((resolve) => {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (cv.Mat) {
if (cvReady) {
log("opencv", "already ready")
resolve()
return
}
// Test if WASM is actually functional by trying to create a mat
try {
log("opencv", "probing cv.Mat()...")
const test = new cv.Mat()
test.delete()
cvReady = true
log("opencv", "probe succeeded, WASM ready")
resolve()
return
} catch {
log("opencv", "probe failed, waiting for onRuntimeInitialized")
// Not ready yet, wait for callback
}
cv.onRuntimeInitialized = () => {
cvReady = true
log("opencv", "onRuntimeInitialized fired, WASM ready")
resolve()
}
})

View File

@ -49,6 +49,8 @@ export interface DeskewInput {
exif: ExifData
/** Output pixels per mm. */
scalePxPerMm: number
/** Called with (stepIndex 0-based, totalSteps, stepLabel) */
onProgress?: (step: number, total: number, label: string) => void
}
export interface AxisCorrection {