Skip to content

Turn a contract into a template

Goal: take a signed agreement PDF and turn it into a reusable template — automatically replacing the parties, dollar amounts, and dates with {{placeholders}} you can fill for the next deal. It’s one of the most practical document-automation tasks in legal, and it needs no LLM and no API key — just a small on-device NER model.

It composes four packages into a pipeline — read the PDF, find fields with NER, then rewrite and redline as a template:

Terminal window
uv run examples/contract-to-template.py
Found 4 field(s) with on-device NER (no LLM, no key):
{{date_1}} <- 'March 1, 2026'
{{party_1}} <- 'Acme Corporation'
{{party_2}} <- 'Globex Industries'
{{amount_1}} <- '$50,000'
Redline (contract → template): 4 fields parameterized, 8 tracked change(s) → redline.docx
Fill it for a new deal:
...entered into as of June 30, 2026, by and between Initech LLC and Stark Industries.
examples/contract-to-template.py
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "kaos-pdf>=0.1.0,<0.2",
# "kaos-office>=0.1.4,<0.2",
# "kaos-content>=0.1.3,<0.2",
# "kaos-nlp-transformers>=0.1.5,<0.2",
# "fpdf2",
# ]
# ///
"""Turn a signed contract PDF into a reusable template — no LLM, no API key.
A genuinely practical legal task: take a *real* agreement, find its variable
fields (the parties, the dollar amounts, the dates), and swap them for template
placeholders so you can reuse the document. This composes four KAOS packages:
read the PDF + clean the text (kaos-pdf)
→ find fields with a local NER model (kaos-nlp-transformers / GLiNER)
→ rewrite as a {{template}} + redline the changes (kaos-office)
Then fill the template with new values to generate a fresh contract. Everything
runs locally — the only "model" is a small on-device NER model (downloaded once,
then cached; no provider key).
Run it:
uv run examples/contract-to-template.py
"""
from __future__ import annotations
import re
from pathlib import Path
import kaos_content as kc
import kaos_nlp_transformers as knt
import kaos_office as ko
import kaos_pdf
from fpdf import FPDF
from kaos_content.revision import Revisions
# The clauses of the "signed" agreement we'll templatize.
CLAUSES = [
"This Mutual Nondisclosure Agreement is entered into as of March 1, 2026, "
"by and between Acme Corporation and Globex Industries.",
"The receiving party shall pay liquidated damages of $50,000 for any breach "
"of its confidentiality obligations under this Agreement.",
]
# Which NER labels become which kind of template field.
FIELD_OF = {"organization": "party", "money": "amount", "date": "date"}
_extractor = knt.GLiNERExtractor.load()
def make_pdf(path: Path) -> Path:
"""Stand in for a real signed PDF on disk."""
pdf = FPDF()
pdf.add_page()
pdf.set_font("Helvetica", size=12)
for clause in CLAUSES:
pdf.multi_cell(0, 8, clause)
pdf.ln(3)
pdf.output(str(path))
return path
def read_pdf_text(path: Path) -> str:
"""Extract the page text and normalize PDF whitespace — line wrapping splits
phrases like 'Acme\\nCorporation', so collapse runs of whitespace to spaces."""
raw = kaos_pdf.extract_page_text(path, 0)
return re.sub(r"\s+", " ", raw).strip()
def templatize(text: str) -> tuple[str, dict[str, str]]:
"""Replace every party/amount/date with a {{field}} placeholder; return the
rewritten text and the field → original-value map. Runs NER on the whole
document (context matters — in isolation a model may tag a generic phrase
like 'the receiving party' as an org)."""
entities = sorted(
_extractor.extract([text], labels=list(FIELD_OF), threshold=0.6)[0],
key=lambda e: e.start,
)
counters: dict[str, int] = {}
fields: dict[str, str] = {}
spans = []
for e in entities:
base = FIELD_OF[e.label]
counters[base] = counters.get(base, 0) + 1
token = f"{{{{{base}_{counters[base]}}}}}"
fields[token] = e.text
spans.append((e.start, e.end, token))
for start, end, token in sorted(spans, reverse=True): # right-to-left
text = text[:start] + token + text[end:]
return text, fields
def sentences(text: str) -> list[str]:
return [s.strip() for s in re.split(r"(?<=\.)\s+", text) if s.strip()]
def write_docx(paragraphs: list[str], path: Path) -> Path:
b = kc.DocumentBuilder()
b.heading(1, "Mutual NDA")
for p in paragraphs:
b.paragraph(p)
ko.write_docx(b.build(), path)
return path
def main():
out = Path.cwd() / "template-demo"
out.mkdir(exist_ok=True)
# 1. Read the "signed" PDF and clean the extracted text.
pdf = make_pdf(out / "agreement.pdf")
text = read_pdf_text(pdf)
print(f"Wrote files to {out}/ — open them to review.\n")
print("Signed contract (agreement.pdf), text extracted & cleaned:")
print(f" {sentences(text)[0]}\n")
# 2. Find fields + build the template.
template_text, fields = templatize(text)
print(f"Found {len(fields)} field(s) with on-device NER (no LLM, no key):")
for token, value in fields.items():
print(f" {token:<12} <- {value!r}")
print("\nReusable template (template.docx):")
print(f" {sentences(template_text)[0]}\n")
# 3. Author the original + template as DOCX and write a redline between them —
# every swap is a tracked change a human can review, so the
# parameterization is auditable. Open redline.docx in Word.
original_docx = write_docx(sentences(text), out / "agreement.docx")
template_docx = write_docx(sentences(template_text), out / "template.docx")
ko.write_redline(original_docx, template_docx, out / "redline.docx")
changes = list(Revisions.from_document(
ko.parse_docx(str(out / "redline.docx"), track_changes=True)
))
print(f"Redline (contract → template): {len(fields)} fields parameterized, "
f"{len(changes)} tracked change(s) → redline.docx\n")
# 4. Fill the template for a new deal — the reuse payoff.
new_values = {
"{{date_1}}": "June 30, 2026",
"{{party_1}}": "Initech LLC",
"{{party_2}}": "Stark Industries",
"{{amount_1}}": "$250,000",
}
filled = sentences(template_text)[0]
for token, value in new_values.items():
filled = filled.replace(token, value)
print("Fill it for a new deal:")
print(f" {filled}")
return fields, changes, filled
if __name__ == "__main__":
fields, changes, filled = main()
# NER found the parties, the date, and the amount...
assert fields.get("{{party_1}}") == "Acme Corporation"
assert fields.get("{{party_2}}") == "Globex Industries"
assert any("2026" in v for v in fields.values())
assert any(v.startswith("$") for v in fields.values())
# ...the redline captured the swaps...
assert len(changes) >= len(fields)
# ...and the filled contract carries the new values, not the originals.
assert "Initech LLC" in filled and "Acme Corporation" not in filled

What to notice

  • Four packages, one pipeline. kaos-pdf reads the contract, kaos-nlp-transformers finds the fields with a local GLiNER model, and kaos-office writes the template and the redline. That’s the KAOS thesis — small packages that snap together.
  • Real PDFs need a cleanup pass. Line wrapping splits phrases (“Acme\nCorporation”), so the example collapses whitespace before running NER. A realistic, one-line step you’ll reach for on any extracted PDF.
  • The redline is your audit trail. Comparing the original to the template produces tracked changes, so a human can review exactly which spans were parameterized before trusting the template. Nothing is replaced silently.
  • Context matters for NER. Run extraction over the whole document — in isolation a model may tag a generic phrase (“the receiving party”) as an org; with the full agreement as context it returns just the named parties.
  • Local and private. The only model is a small on-device NER model (downloaded once, then cached). The contract text never leaves the machine — exactly what you want for client documents. Pre-warm the cache with prefetch-models.
  • Fill = reuse. Because the fields are named, filling the template is a plain string-substitution — or wire it to a form, a matter record, or a billing system.