API Reference

Integration examples

Complete, copy-paste examples for the most common integration scenarios.

cURL — full pipeline

Submit a job, poll until done, and download the output in one script.

bash
#!/bin/bash
API_KEY="rg_your_key_here"
BASE="https://api.ragify.it"
FILE="document.pdf"

# 1. Submit job
JOB=$(curl -s -X POST "$BASE/jobs" \
  -H "X-Api-Key: $API_KEY" \
  -F "file=@$FILE" \
  -F 'options={"format":["markdown","json"]}')

JOB_ID=$(echo $JOB | jq -r '.id')
echo "Job ID: $JOB_ID"

# 2. Poll until done
while true; do
  STATUS=$(curl -s "$BASE/jobs/$JOB_ID" \
    -H "X-Api-Key: $API_KEY" | jq -r '.status')
  echo "Status: $STATUS"
  [ "$STATUS" = "done" ] && break
  [ "$STATUS" = "failed" ] && echo "Job failed!" && exit 1
  sleep 2
done

# 3. Download outputs
curl -s -o output.md "$BASE/jobs/$JOB_ID/download/markdown" \
  -H "X-Api-Key: $API_KEY"
curl -s -o output.json "$BASE/jobs/$JOB_ID/download/json" \
  -H "X-Api-Key: $API_KEY"

echo "Done! output.md and output.json saved."

# 4. Optional: delete job when finished
curl -s -X DELETE "$BASE/jobs/$JOB_ID" \
  -H "X-Api-Key: $API_KEY"

Python

Synchronous (requests)

python
import time
import requests

API_KEY = "rg_your_key_here"
BASE = "https://api.ragify.it"
HEADERS = {"X-Api-Key": API_KEY}

def parse_pdf(pdf_path: str, formats: list[str] = ["markdown"]) -> dict:
    """Submit a PDF and wait for completion. Returns the job dict."""

    # Submit
    with open(pdf_path, "rb") as f:
        resp = requests.post(
            f"{BASE}/jobs",
            headers=HEADERS,
            files={"file": f},
            data={"options": '{"format": ' + str(formats).replace("'", '"') + '}'},
        )
    resp.raise_for_status()
    job = resp.json()
    job_id = job["id"]
    print(f"Job submitted: {job_id}")

    # Poll
    while True:
        job = requests.get(f"{BASE}/jobs/{job_id}", headers=HEADERS).json()
        print(f"  status: {job['status']}")
        if job["status"] == "done":
            return job
        if job["status"] == "failed":
            raise RuntimeError(f"Job failed: {job.get('error_message')}")
        time.sleep(2)

def download_output(job_id: str, fmt: str, dest_path: str) -> None:
    resp = requests.get(
        f"{BASE}/jobs/{job_id}/download/{fmt}",
        headers=HEADERS,
        stream=True,
    )
    resp.raise_for_status()
    with open(dest_path, "wb") as f:
        for chunk in resp.iter_content(chunk_size=8192):
            f.write(chunk)

# Usage
if __name__ == "__main__":
    job = parse_pdf("report.pdf", formats=["markdown", "json"])
    print(f"Done! {job['page_count']} pages processed.")

    download_output(job["id"], "markdown", "output.md")
    download_output(job["id"], "json", "output.json")
    print("Files saved.")

Async (httpx + asyncio)

python
import asyncio
import httpx

API_KEY = "rg_your_key_here"
BASE = "https://api.ragify.it"

async def parse_pdf_async(pdf_path: str, formats: list[str] = ["markdown"]) -> dict:
    async with httpx.AsyncClient(headers={"X-Api-Key": API_KEY}) as client:
        # Submit
        with open(pdf_path, "rb") as f:
            resp = await client.post(
                f"{BASE}/jobs",
                files={"file": f},
                data={"options": '{"format":' + str(formats).replace("'", '"') + '}'},
            )
        resp.raise_for_status()
        job_id = resp.json()["id"]

        # Poll
        while True:
            job = (await client.get(f"{BASE}/jobs/{job_id}")).json()
            if job["status"] == "done":
                return job
            if job["status"] == "failed":
                raise RuntimeError(job.get("error_message"))
            await asyncio.sleep(2)

# Usage
job = asyncio.run(parse_pdf_async("report.pdf", ["markdown", "json"]))

Node.js / TypeScript

typescript
import fs from "fs";
import FormData from "form-data";
import fetch from "node-fetch"; // or native fetch in Node 18+

const API_KEY = "rg_your_key_here";
const BASE = "https://api.ragify.it";

async function parsePDF(filePath: string, formats = ["markdown"]): Promise<any> {
  // 1. Submit
  const form = new FormData();
  form.append("file", fs.createReadStream(filePath));
  form.append("options", JSON.stringify({ format: formats }));

  const submitRes = await fetch(`${BASE}/jobs`, {
    method: "POST",
    headers: { "X-Api-Key": API_KEY, ...form.getHeaders() },
    body: form,
  });
  if (!submitRes.ok) throw new Error(await submitRes.text());
  const { id: jobId } = await submitRes.json() as { id: string };
  console.log("Job:", jobId);

  // 2. Poll
  while (true) {
    const res = await fetch(`${BASE}/jobs/${jobId}`, {
      headers: { "X-Api-Key": API_KEY },
    });
    const job = await res.json() as any;
    console.log("Status:", job.status);
    if (job.status === "done") return job;
    if (job.status === "failed") throw new Error(job.error_message);
    await new Promise((r) => setTimeout(r, 2000));
  }
}

async function downloadOutput(jobId: string, fmt: string): Promise<Buffer> {
  const res = await fetch(`${BASE}/jobs/${jobId}/download/${fmt}`, {
    headers: { "X-Api-Key": API_KEY },
  });
  if (!res.ok) throw new Error(await res.text());
  return Buffer.from(await res.arrayBuffer());
}

// Usage
const job = await parsePDF("report.pdf", ["markdown", "json"]);
const md = await downloadOutput(job.id, "markdown");
fs.writeFileSync("output.md", md);
console.log(`Done! ${job.page_count} pages.`);

Python + LangChain / LlamaIndex

python
"""
Load a PDF via Ragify, then create a LangChain RAG pipeline.
"""
import json, time, requests
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

API_KEY = "rg_your_key_here"
BASE = "https://api.ragify.it"

def get_markdown(pdf_path: str) -> str:
    headers = {"X-Api-Key": API_KEY}
    with open(pdf_path, "rb") as f:
        job_id = requests.post(f"{BASE}/jobs", headers=headers,
            files={"file": f},
            data={"options": '{"format":["markdown"]}'}).json()["id"]
    while True:
        job = requests.get(f"{BASE}/jobs/{job_id}", headers=headers).json()
        if job["status"] == "done": break
        time.sleep(2)
    return requests.get(f"{BASE}/jobs/{job_id}/download/markdown",
                        headers=headers).text

# Load and chunk
md = get_markdown("knowledge_base.pdf")
splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=[("#","H1"),("##","H2"),("###","H3")]
)
chunks = splitter.split_text(md)

# Embed and index
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

print(f"Indexed {len(chunks)} chunks from PDF.")

n8n workflow

Use the HTTP Request node in n8n to integrate Ragify into your automation workflows.

Step 1 — Submit a PDF from Google Drive / S3 / URL

json
// n8n HTTP Request node — Submit job
{
  "method": "POST",
  "url": "https://api.ragify.it/jobs",
  "authentication": "genericCredentialType",
  "genericAuthType": "httpHeaderAuth",
  "sendHeaders": true,
  "headerParameters": {
    "parameters": [
      { "name": "X-Api-Key", "value": "={{ $credentials.apiKey }}" }
    ]
  },
  "sendBody": true,
  "contentType": "multipart-form-data",
  "bodyParameters": {
    "parameters": [
      { "name": "file", "value": "={{ $binary.data }}", "parameterType": "formBinaryData" },
      { "name": "options", "value": "{"format":["markdown"]}" }
    ]
  }
}

Step 2 — Wait for completion (polling loop)

Add a Wait node (5 seconds) followed by a HTTP Request to GET /jobs/{{$json.id}}, then a IF node checking status === "done". Loop back to Wait if not done.

Step 3 — Download and use output

json
// n8n HTTP Request — Download Markdown
{
  "method": "GET",
  "url": "=https://api.ragify.it/jobs/{{ $('Submit job').item.json.id }}/download/markdown",
  "headerParameters": {
    "parameters": [
      { "name": "X-Api-Key", "value": "={{ $credentials.apiKey }}" }
    ]
  },
  "responseFormat": "text"
}

Tip

Store your API key as an n8n Credential (Header Auth type, header name X-Api-Key). Never hardcode keys in workflow nodes.

Make (Integromat) and Zapier

Both platforms support custom HTTP actions. Use the following configuration:

URLhttps://api.ragify.it/jobs
MethodPOST
HeaderX-Api-Key: rg_your_key
Body typemultipart/form-data
Field: fileBinary PDF file
Field: options{"format":["markdown"]}

Batch processing (Python)

Process an entire folder of PDFs concurrently using asyncio.

python
"""
Batch-parse all PDFs in a folder with up to 5 concurrent jobs.
"""
import asyncio, httpx, json
from pathlib import Path

API_KEY = "rg_your_key_here"
BASE = "https://api.ragify.it"
INPUT_DIR = Path("./pdfs")
OUTPUT_DIR = Path("./outputs")
OUTPUT_DIR.mkdir(exist_ok=True)

sem = asyncio.Semaphore(5)  # respect Pro concurrent limit

async def process_one(client: httpx.AsyncClient, pdf: Path) -> None:
    async with sem:
        # Submit
        with open(pdf, "rb") as f:
            r = await client.post(
                f"{BASE}/jobs",
                files={"file": f},
                data={"options": '{"format":["markdown","json"]}'},
            )
        r.raise_for_status()
        job_id = r.json()["id"]

        # Poll
        while True:
            job = (await client.get(f"{BASE}/jobs/{job_id}")).json()
            if job["status"] == "done": break
            if job["status"] == "failed":
                print(f"FAILED: {pdf.name}: {job['error_message']}")
                return
            await asyncio.sleep(2)

        # Download Markdown
        md = await client.get(f"{BASE}/jobs/{job_id}/download/markdown")
        out = OUTPUT_DIR / (pdf.stem + ".md")
        out.write_bytes(md.content)
        print(f"✓ {pdf.name}{out.name} ({job['page_count']}p)")

async def main():
    pdfs = list(INPUT_DIR.glob("*.pdf"))
    print(f"Processing {len(pdfs)} PDFs...")
    async with httpx.AsyncClient(
        headers={"X-Api-Key": API_KEY}, timeout=120
    ) as client:
        await asyncio.gather(*[process_one(client, p) for p in pdfs])

asyncio.run(main())