Extract structured data from a single document by providing a file and a JSON schema.
file file required
Document file to process. Supported formats: PDF, DOCX, XLSX, JPEG, PNG, GIF, BMP, TIFF. Max size: 50 MB.
schema string required
JSON schema as string defining the structure of data to extract.
organization_id string required
Your organization ID.
prompt string optional
Custom extraction instructions to guide the AI. Example: "Focus on extracting line items from the table".
model string optional · Defaults to ftg-3.0
Model selection: ftg-3.0, gpt-4o, or gpt-4o-mini.
vision_model string optional · Defaults to gpt-5
Vision analysis model for processing images and PDFs. Used for OCR and visual understanding.
sync boolean optional · Defaults to false
Process synchronously (true) or asynchronously (false).
include_raw_text boolean optional · Defaults to false
Include extracted text in response.
Returns a job object with extraction status and detailed confidence metrics. In synchronous mode, includes extracted data matching your schema. In asynchronous mode, returns job ID for status polling.
import requests
import json
API_URL = "https://api.aitronos.com/v1/documents/extract"
TOKEN = "your_bearer_token_here"
headers = {
"Authorization": f"Bearer {TOKEN}"
}
# Define schema
schema = {
"properties": {
"invoice_number": {"type": "string"},
"date": {"type": "string"},
"total_amount": {"type": "number"},
"vendor_name": {"type": "string"}
},
"required": ["invoice_number", "total_amount"]
}
# Prepare request
files = {
"file": open("invoice.pdf", "rb")
}
data = {
"schema": json.dumps(schema),
"organization_id": "org_abc123",
"sync": "true",
"model": "gpt-4o-mini"
}
# Extract data
response = requests.post(API_URL, headers=headers, files=files, data=data)
result = response.json()
if result['success'] and result['status'] == 'completed':
print(f"Invoice: {result['extracted_data']['invoice_number']}")
print(f"Total: ${result['extracted_data']['total_amount']}")
print(f"Confidence: {result['confidence']:.2%}")
print(f"Cost: CHF {result['cost_chf']:.4f}")
else:
print(f"Error: {result.get('error_message', 'Unknown error')}"){
"success": true,
"job_id": "job_abc123def456",
"status": "completed",
"extracted_data": {
"invoice_number": "INV-2024-001",
"date": "2024-12-16",
"total_amount": 1250.00,
"vendor_name": "Acme Corporation"
},
"confidence": 0.95,
"processing_time": 2.3,
"cost_chf": 0.015,
"model_used": "gpt-4o",
"created_at": "2024-12-16T10:30:00Z",
"completed_at": "2024-12-16T10:30:02Z"
}{
"success": true,
"job_id": "job_abc123def456",
"status": "pending",
"extracted_data": null,
"confidence": null,
"processing_time": null,
"cost_chf": null,
"model_used": null,
"created_at": "2024-12-16T10:30:00Z",
"completed_at": null
}{
"success": false,
"error": {
"code": "INVALID_FILE_TYPE",
"message": "The uploaded file type is not supported. Please upload a PDF, Word document, Excel file, or image.",
"system_message": "Unsupported file type: .txt",
"type": "validation_error",
"status": 422,
"details": {
"file_type": "txt",
"supported_types": ["pdf", "docx", "xlsx", "jpg", "png", "gif", "bmp", "tiff"]
},
"trace_id": "trace_abc123",
"timestamp": "2024-12-16T10:30:00Z"
}
}