Fetching latest headlines…
Building an Automated Invoice Processing Pipeline with Node.js
NORTH AMERICA
🇺🇸 United StatesMay 7, 2026

Building an Automated Invoice Processing Pipeline with Node.js

0 views0 likes0 comments
Originally published byDev.to

Accounts payable teams spend an average of 3.7 minutes manually processing each invoice. At 200 invoices per month, that's 12+ hours of data entry. Here's how to build an automated pipeline that brings this to under 10 seconds per document.

Pipeline Architecture

Email/SFTP/API → Receive → Extract → Validate → Enrich → Store → Notify

Each stage is independent and can fail gracefully without losing the document.

Stage 1: Document Ingestion

Accept invoices from multiple sources:

const express = require('express');
const multer  = require('multer');
const path    = require('path');

const upload = multer({
  dest: '/tmp/invoices',
  limits: { fileSize: 20 * 1024 * 1024 }, // 20MB
  fileFilter: (req, file, cb) => {
    const allowed = ['.pdf', '.docx', '.xlsx', '.png', '.jpg'];
    const ext     = path.extname(file.originalname).toLowerCase();
    cb(null, allowed.includes(ext));
  },
});

app.post('/api/invoices/upload', upload.array('files', 20), async (req, res) => {
  const jobs = req.files.map(file => ({
    id:       generateJobId(),
    path:     file.path,
    filename: file.originalname,
    status:   'queued',
  }));

  await queue.addBatch(jobs);
  res.json({ jobs: jobs.map(j => ({ id: j.id, status: j.status })) });
});

Stage 2: Extraction

async function extractInvoiceData(job) {
  const formData = new FormData();
  formData.append('file', fs.createReadStream(job.path), job.filename);
  formData.append('fields', JSON.stringify([
    'invoice_number', 'invoice_date', 'due_date',
    'vendor_name', 'vendor_address', 'vendor_tax_id',
    'line_items', 'subtotal', 'tax_amount', 'total_amount',
    'currency', 'payment_terms',
  ]));

  const response = await fetch('https://parseflow.dev/api/extract', {
    method:  'POST',
    headers: { 'Authorization': `Bearer ${process.env.PARSEFLOW_KEY}` },
    body:    formData,
  });

  if (!response.ok) {
    const error = await response.json();
    throw new Error(`Extraction failed: ${error.message}`);
  }

  return response.json();
}

Stage 3: Validation

Never trust extracted data without validation:

function validateInvoice(data) {
  const errors = [];

  // Required fields
  if (!data.invoice_number) errors.push('Missing invoice number');
  if (!data.vendor_name)    errors.push('Missing vendor name');
  if (!data.total_amount)   errors.push('Missing total amount');

  // Math validation
  if (data.line_items?.length > 0) {
    const lineTotal = data.line_items.reduce((sum, item) => sum + item.total, 0);
    const tolerance = 0.02; // 2 cents tolerance for rounding

    if (Math.abs(lineTotal - data.subtotal) > tolerance) {
      errors.push(`Line items sum (${lineTotal}) != subtotal (${data.subtotal})`);
    }
  }

  if (data.subtotal && data.tax_amount && data.total_amount) {
    const expected = data.subtotal + data.tax_amount;
    if (Math.abs(expected - data.total_amount) > 0.02) {
      errors.push(`Subtotal + tax (${expected}) != total (${data.total_amount})`);
    }
  }

  // Duplicate detection
  // (check against your DB for same invoice_number + vendor)

  return { valid: errors.length === 0, errors };
}

Stage 4: Enrichment

Match the vendor to your supplier database:

async function enrichInvoice(data) {
  // Fuzzy match vendor name to known suppliers
  const vendor = await db.suppliers.findBestMatch(data.vendor_name);

  if (vendor) {
    data.supplier_id      = vendor.id;
    data.gl_account       = vendor.default_gl_account;
    data.cost_center      = vendor.default_cost_center;
    data.approver_email   = vendor.approver_email;
    data.payment_method   = vendor.preferred_payment_method;
  } else {
    data.requires_review  = true;
    data.review_reason    = 'Unknown vendor — manual matching required';
  }

  return data;
}

Stage 5: Notifications

async function notifyApprover(invoice) {
  // Only for invoices above threshold or from unknown vendors
  if (invoice.total_amount > 5000 || invoice.requires_review) {
    await emailService.send({
      to:      invoice.approver_email,
      subject: `Invoice approval required: ${invoice.invoice_number}${invoice.vendor_name}`,
      template: 'invoice-approval',
      data:    invoice,
    });
  }
}

Error Handling and Dead Letter Queue

async function processJob(job) {
  try {
    job.status = 'processing';
    const extracted  = await extractInvoiceData(job);
    const validation = validateInvoice(extracted);

    if (!validation.valid) {
      job.status       = 'validation_failed';
      job.errors       = validation.errors;
      await moveToReview(job);
      return;
    }

    const enriched = await enrichInvoice(extracted);
    await db.invoices.create({ ...enriched, job_id: job.id });
    await notifyApprover(enriched);

    job.status = 'completed';

  } catch (err) {
    job.attempts++;
    if (job.attempts >= 3) {
      job.status = 'dead_letter';
      await alertOps(job, err);
    } else {
      job.status       = 'retry';
      job.retry_after  = addMinutes(new Date(), job.attempts * 15);
    }
  }

  await db.jobs.update(job);
}

Results

A pipeline like this, using ParseFlow for the extraction stage, processes a typical invoice in 4-8 seconds with 94%+ field accuracy across variable formats. The validation stage catches the remaining edge cases and routes them to a human reviewer queue rather than silently accepting bad data.

The full pipeline handles PDF, Word, and Excel with the same code path — no special-casing per format.

Comments (0)

Sign in to join the discussion

Be the first to comment!