Skip to content
Last updated

Extract structured data from a single document by providing a file and a JSON schema.

POSThttps://api.aitronos.com/v1/documents/extract

Request Body

file file required

Document file to process. Supported formats: PDF, DOCX, XLSX, JPEG, PNG, GIF, BMP, TIFF. Max size: 50 MB.

schema string required

JSON schema as string defining the structure of data to extract.

organization_id string required

Your organization ID.

prompt string optional

Custom extraction instructions to guide the AI. Example: "Focus on extracting line items from the table".

model string optional · Defaults to ftg-3.0

Model selection: ftg-3.0, gpt-4o, or gpt-4o-mini.

vision_model string optional · Defaults to gpt-5

Vision analysis model for processing images and PDFs. Used for OCR and visual understanding.

sync boolean optional · Defaults to false

Process synchronously (true) or asynchronously (false).

include_raw_text boolean optional · Defaults to false

Include extracted text in response.

Returns

Returns a job object with extraction status and detailed confidence metrics. In synchronous mode, includes extracted data matching your schema. In asynchronous mode, returns job ID for status polling.

import requests
import json

API_URL = "https://api.aitronos.com/v1/documents/extract"
TOKEN = "your_bearer_token_here"

headers = {
    "Authorization": f"Bearer {TOKEN}"
}

# Define schema
schema = {
    "properties": {
        "invoice_number": {"type": "string"},
        "date": {"type": "string"},
        "total_amount": {"type": "number"},
        "vendor_name": {"type": "string"}
    },
    "required": ["invoice_number", "total_amount"]
}

# Prepare request
files = {
    "file": open("invoice.pdf", "rb")
}
data = {
    "schema": json.dumps(schema),
    "organization_id": "org_abc123",
    "sync": "true",
    "model": "gpt-4o-mini"
}

# Extract data
response = requests.post(API_URL, headers=headers, files=files, data=data)
result = response.json()

if result['success'] and result['status'] == 'completed':
    print(f"Invoice: {result['extracted_data']['invoice_number']}")
    print(f"Total: ${result['extracted_data']['total_amount']}")
    print(f"Confidence: {result['confidence']:.2%}")
    print(f"Cost: CHF {result['cost_chf']:.4f}")
else:
    print(f"Error: {result.get('error_message', 'Unknown error')}")

Response Examples

Successful Extraction (Synchronous)

{
  "success": true,
  "job_id": "job_abc123def456",
  "status": "completed",
  "extracted_data": {
    "invoice_number": "INV-2024-001",
    "date": "2024-12-16",
    "total_amount": 1250.00,
    "vendor_name": "Acme Corporation"
  },
  "confidence": 0.95,
  "processing_time": 2.3,
  "cost_chf": 0.015,
  "model_used": "gpt-4o",
  "created_at": "2024-12-16T10:30:00Z",
  "completed_at": "2024-12-16T10:30:02Z"
}

Job Submitted (Asynchronous)

{
  "success": true,
  "job_id": "job_abc123def456",
  "status": "pending",
  "extracted_data": null,
  "confidence": null,
  "processing_time": null,
  "cost_chf": null,
  "model_used": null,
  "created_at": "2024-12-16T10:30:00Z",
  "completed_at": null
}

Error Response

{
  "success": false,
  "error": {
    "code": "INVALID_FILE_TYPE",
    "message": "The uploaded file type is not supported. Please upload a PDF, Word document, Excel file, or image.",
    "system_message": "Unsupported file type: .txt",
    "type": "validation_error",
    "status": 422,
    "details": {
      "file_type": "txt",
      "supported_types": ["pdf", "docx", "xlsx", "jpg", "png", "gif", "bmp", "tiff"]
    },
    "trace_id": "trace_abc123",
    "timestamp": "2024-12-16T10:30:00Z"
  }
}