API Reference
Integration examples
Complete, copy-paste examples for the most common integration scenarios.
cURL — full pipeline
Submit a job, poll until done, and download the output in one script.
bash
#!/bin/bash
API_KEY="rg_your_key_here"
BASE="https://api.ragify.it"
FILE="document.pdf"
# 1. Submit job
JOB=$(curl -s -X POST "$BASE/jobs" \
-H "X-Api-Key: $API_KEY" \
-F "file=@$FILE" \
-F 'options={"format":["markdown","json"]}')
JOB_ID=$(echo $JOB | jq -r '.id')
echo "Job ID: $JOB_ID"
# 2. Poll until done
while true; do
STATUS=$(curl -s "$BASE/jobs/$JOB_ID" \
-H "X-Api-Key: $API_KEY" | jq -r '.status')
echo "Status: $STATUS"
[ "$STATUS" = "done" ] && break
[ "$STATUS" = "failed" ] && echo "Job failed!" && exit 1
sleep 2
done
# 3. Download outputs
curl -s -o output.md "$BASE/jobs/$JOB_ID/download/markdown" \
-H "X-Api-Key: $API_KEY"
curl -s -o output.json "$BASE/jobs/$JOB_ID/download/json" \
-H "X-Api-Key: $API_KEY"
echo "Done! output.md and output.json saved."
# 4. Optional: delete job when finished
curl -s -X DELETE "$BASE/jobs/$JOB_ID" \
-H "X-Api-Key: $API_KEY"Python
Synchronous (requests)
python
import time
import requests
API_KEY = "rg_your_key_here"
BASE = "https://api.ragify.it"
HEADERS = {"X-Api-Key": API_KEY}
def parse_pdf(pdf_path: str, formats: list[str] = ["markdown"]) -> dict:
"""Submit a PDF and wait for completion. Returns the job dict."""
# Submit
with open(pdf_path, "rb") as f:
resp = requests.post(
f"{BASE}/jobs",
headers=HEADERS,
files={"file": f},
data={"options": '{"format": ' + str(formats).replace("'", '"') + '}'},
)
resp.raise_for_status()
job = resp.json()
job_id = job["id"]
print(f"Job submitted: {job_id}")
# Poll
while True:
job = requests.get(f"{BASE}/jobs/{job_id}", headers=HEADERS).json()
print(f" status: {job['status']}")
if job["status"] == "done":
return job
if job["status"] == "failed":
raise RuntimeError(f"Job failed: {job.get('error_message')}")
time.sleep(2)
def download_output(job_id: str, fmt: str, dest_path: str) -> None:
resp = requests.get(
f"{BASE}/jobs/{job_id}/download/{fmt}",
headers=HEADERS,
stream=True,
)
resp.raise_for_status()
with open(dest_path, "wb") as f:
for chunk in resp.iter_content(chunk_size=8192):
f.write(chunk)
# Usage
if __name__ == "__main__":
job = parse_pdf("report.pdf", formats=["markdown", "json"])
print(f"Done! {job['page_count']} pages processed.")
download_output(job["id"], "markdown", "output.md")
download_output(job["id"], "json", "output.json")
print("Files saved.")Async (httpx + asyncio)
python
import asyncio
import httpx
API_KEY = "rg_your_key_here"
BASE = "https://api.ragify.it"
async def parse_pdf_async(pdf_path: str, formats: list[str] = ["markdown"]) -> dict:
async with httpx.AsyncClient(headers={"X-Api-Key": API_KEY}) as client:
# Submit
with open(pdf_path, "rb") as f:
resp = await client.post(
f"{BASE}/jobs",
files={"file": f},
data={"options": '{"format":' + str(formats).replace("'", '"') + '}'},
)
resp.raise_for_status()
job_id = resp.json()["id"]
# Poll
while True:
job = (await client.get(f"{BASE}/jobs/{job_id}")).json()
if job["status"] == "done":
return job
if job["status"] == "failed":
raise RuntimeError(job.get("error_message"))
await asyncio.sleep(2)
# Usage
job = asyncio.run(parse_pdf_async("report.pdf", ["markdown", "json"]))Node.js / TypeScript
typescript
import fs from "fs";
import FormData from "form-data";
import fetch from "node-fetch"; // or native fetch in Node 18+
const API_KEY = "rg_your_key_here";
const BASE = "https://api.ragify.it";
async function parsePDF(filePath: string, formats = ["markdown"]): Promise<any> {
// 1. Submit
const form = new FormData();
form.append("file", fs.createReadStream(filePath));
form.append("options", JSON.stringify({ format: formats }));
const submitRes = await fetch(`${BASE}/jobs`, {
method: "POST",
headers: { "X-Api-Key": API_KEY, ...form.getHeaders() },
body: form,
});
if (!submitRes.ok) throw new Error(await submitRes.text());
const { id: jobId } = await submitRes.json() as { id: string };
console.log("Job:", jobId);
// 2. Poll
while (true) {
const res = await fetch(`${BASE}/jobs/${jobId}`, {
headers: { "X-Api-Key": API_KEY },
});
const job = await res.json() as any;
console.log("Status:", job.status);
if (job.status === "done") return job;
if (job.status === "failed") throw new Error(job.error_message);
await new Promise((r) => setTimeout(r, 2000));
}
}
async function downloadOutput(jobId: string, fmt: string): Promise<Buffer> {
const res = await fetch(`${BASE}/jobs/${jobId}/download/${fmt}`, {
headers: { "X-Api-Key": API_KEY },
});
if (!res.ok) throw new Error(await res.text());
return Buffer.from(await res.arrayBuffer());
}
// Usage
const job = await parsePDF("report.pdf", ["markdown", "json"]);
const md = await downloadOutput(job.id, "markdown");
fs.writeFileSync("output.md", md);
console.log(`Done! ${job.page_count} pages.`);Python + LangChain / LlamaIndex
python
"""
Load a PDF via Ragify, then create a LangChain RAG pipeline.
"""
import json, time, requests
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
API_KEY = "rg_your_key_here"
BASE = "https://api.ragify.it"
def get_markdown(pdf_path: str) -> str:
headers = {"X-Api-Key": API_KEY}
with open(pdf_path, "rb") as f:
job_id = requests.post(f"{BASE}/jobs", headers=headers,
files={"file": f},
data={"options": '{"format":["markdown"]}'}).json()["id"]
while True:
job = requests.get(f"{BASE}/jobs/{job_id}", headers=headers).json()
if job["status"] == "done": break
time.sleep(2)
return requests.get(f"{BASE}/jobs/{job_id}/download/markdown",
headers=headers).text
# Load and chunk
md = get_markdown("knowledge_base.pdf")
splitter = MarkdownHeaderTextSplitter(
headers_to_split_on=[("#","H1"),("##","H2"),("###","H3")]
)
chunks = splitter.split_text(md)
# Embed and index
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
print(f"Indexed {len(chunks)} chunks from PDF.")n8n workflow
Use the HTTP Request node in n8n to integrate Ragify into your automation workflows.
Step 1 — Submit a PDF from Google Drive / S3 / URL
json
// n8n HTTP Request node — Submit job
{
"method": "POST",
"url": "https://api.ragify.it/jobs",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendHeaders": true,
"headerParameters": {
"parameters": [
{ "name": "X-Api-Key", "value": "={{ $credentials.apiKey }}" }
]
},
"sendBody": true,
"contentType": "multipart-form-data",
"bodyParameters": {
"parameters": [
{ "name": "file", "value": "={{ $binary.data }}", "parameterType": "formBinaryData" },
{ "name": "options", "value": "{"format":["markdown"]}" }
]
}
}Step 2 — Wait for completion (polling loop)
Add a Wait node (5 seconds) followed by a HTTP Request to GET /jobs/{{$json.id}}, then a IF node checking status === "done". Loop back to Wait if not done.
Step 3 — Download and use output
json
// n8n HTTP Request — Download Markdown
{
"method": "GET",
"url": "=https://api.ragify.it/jobs/{{ $('Submit job').item.json.id }}/download/markdown",
"headerParameters": {
"parameters": [
{ "name": "X-Api-Key", "value": "={{ $credentials.apiKey }}" }
]
},
"responseFormat": "text"
}✦ Tip
Store your API key as an n8n Credential (Header Auth type, header name
X-Api-Key). Never hardcode keys in workflow nodes.Make (Integromat) and Zapier
Both platforms support custom HTTP actions. Use the following configuration:
URL
https://api.ragify.it/jobsMethod
POSTHeader
X-Api-Key: rg_your_keyBody type
multipart/form-dataField: file
Binary PDF fileField: options
{"format":["markdown"]}Batch processing (Python)
Process an entire folder of PDFs concurrently using asyncio.
python
"""
Batch-parse all PDFs in a folder with up to 5 concurrent jobs.
"""
import asyncio, httpx, json
from pathlib import Path
API_KEY = "rg_your_key_here"
BASE = "https://api.ragify.it"
INPUT_DIR = Path("./pdfs")
OUTPUT_DIR = Path("./outputs")
OUTPUT_DIR.mkdir(exist_ok=True)
sem = asyncio.Semaphore(5) # respect Pro concurrent limit
async def process_one(client: httpx.AsyncClient, pdf: Path) -> None:
async with sem:
# Submit
with open(pdf, "rb") as f:
r = await client.post(
f"{BASE}/jobs",
files={"file": f},
data={"options": '{"format":["markdown","json"]}'},
)
r.raise_for_status()
job_id = r.json()["id"]
# Poll
while True:
job = (await client.get(f"{BASE}/jobs/{job_id}")).json()
if job["status"] == "done": break
if job["status"] == "failed":
print(f"FAILED: {pdf.name}: {job['error_message']}")
return
await asyncio.sleep(2)
# Download Markdown
md = await client.get(f"{BASE}/jobs/{job_id}/download/markdown")
out = OUTPUT_DIR / (pdf.stem + ".md")
out.write_bytes(md.content)
print(f"✓ {pdf.name} → {out.name} ({job['page_count']}p)")
async def main():
pdfs = list(INPUT_DIR.glob("*.pdf"))
print(f"Processing {len(pdfs)} PDFs...")
async with httpx.AsyncClient(
headers={"X-Api-Key": API_KEY}, timeout=120
) as client:
await asyncio.gather(*[process_one(client, p) for p in pdfs])
asyncio.run(main())