Hi VP
Attached a tested code with custom function approach as mentioned
import os, time, json, base64, datetime
from typing import Optional, Dict, Any, Set, List
import requests
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.agents.models import FunctionTool
# =========================
# 1) Custom OCR function
# =========================
def fetch_ocr(
pdf_url: Optional[str] = None,
pdf_path: Optional[str] = None,
include_image_base64: bool = True
) -> str:
"""
Performs OCR using the Mistral Document AI provider on Azure AI Services.
Provide either:
- pdf_url: an http(s) URL, or a full data:application/pdf;base64,<...> URL
- pdf_path: a local filesystem path to a PDF
If an http(s) URL is given, the file is downloaded and encoded as base64.
If a data: URL is given, it's passed through unchanged.
If a local path is given, the file is read and encoded as base64.
:param pdf_url: URL to the PDF. Supports http(s) and data: URLs.
:param pdf_path: Local path to a PDF file (mutually exclusive with pdf_url).
:param include_image_base64: Whether to include image data in the response.
:return: JSON string of OCR results (or an error payload).
"""
# Validate inputs
if not pdf_url and not pdf_path:
return json.dumps({"error": "Either pdf_url or pdf_path must be provided."})
if pdf_url and pdf_path:
return json.dumps({"error": "Provide only one of pdf_url or pdf_path, not both."})
# Resolve endpoint and auth
# ⚠️ Consider using: api_key = os.environ.get("AZURE_API_KEY")
api_key = "<mistradockey>"
ocr_endpoint = "https://<ocrendpoint>.services.ai.azure.com/providers/mistral/azure/ocr"
model_name = os.environ.get("MODEL_DEPLOYMENT_NAME", "mistral-document-ai-2505")
if not api_key or not ocr_endpoint:
return json.dumps({
"error": "Missing required environment variables or configuration.",
"missing": {
"AZURE_API_KEY_present": bool(api_key),
"OCR_ENDPOINT_present": bool(ocr_endpoint)
}
})
# Build document_url in data: URL form when needed
document_url: str
try:
if pdf_url:
if pdf_url.startswith("data:application/pdf;base64,"):
# Already in data URL form
document_url = pdf_url
elif pdf_url.startswith("http://") or pdf_url.startswith("https://"):
# Download and convert to base64
resp = requests.get(pdf_url, timeout=30)
resp.raise_for_status()
b64 = base64.b64encode(resp.content).decode("utf-8")
document_url = f"data:application/pdf;base64,{b64}"
else:
return json.dumps({"error": "Unsupported pdf_url scheme. Use http/https or data: URL."})
else:
# Local file path → read and convert
if not os.path.isfile(pdf_path):
return json.dumps({"error": f"File not found: {pdf_path}"})
with open(pdf_path, "rb") as f:
data = f.read()
b64 = base64.b64encode(data).decode("utf-8")
document_url = f"data:application/pdf;base64,{b64}"
except requests.exceptions.RequestException as rexc:
return json.dumps({"error": "Failed to download PDF from URL.", "details": str(rexc)})
except Exception as exc:
return json.dumps({"error": "Failed to read/encode PDF.", "details": str(exc)})
# Build payload (matches your curl reference)
payload = {
"model": model_name,
"document": {
"type": "document_url",
"document_url": document_url
},
"include_image_base64": bool(include_image_base64)
}
headers = {
"Content-Type": "application/json",
# Provider expects Bearer (per your curl)
"Authorization": f"Bearer {api_key}",
}
try:
r = requests.post(ocr_endpoint, headers=headers, json=payload, timeout=60)
# Return structured error if not 2xx
if not (200 <= r.status_code < 300):
return json.dumps({
"error": "OCR request failed.",
"status_code": r.status_code,
"response_text": r.text
})
# Return OCR JSON response as a string for the agent
return r.text
except requests.exceptions.RequestException as rexc:
return json.dumps({"error": "Network error calling OCR endpoint.", "details": str(rexc)})
except Exception as exc:
return json.dumps({"error": "Unexpected error calling OCR endpoint.", "details": str(exc)})
# =========================
# 2) Register function tool
# =========================
user_functions: Set[Any] = {fetch_ocr}
# Retrieve project endpoint & agent model
#you can copy the project endpoint from project playground
project_endpoint = "https://<foundryhubname>.services.ai.azure.com/api/projects/mistraldoctest"
model_name = "gpt-4o"
# Initialize AIProjectClient
project_client = AIProjectClient(
endpoint=project_endpoint,
credential=DefaultAzureCredential()
)
# Initialize FunctionTool with our OCR function
functions = FunctionTool(functions=user_functions)
# Keep ALL operations inside the client context so the transport stays open
with project_client:
# Create an agent with the OCR tool
agent = project_client.agents.create_agent(
model=model_name,
name="my-agent",
instructions="You are a helpful agent. Use the fetch_ocr tool to perform OCR on PDFs.",
tools=functions.definitions,
)
print(f"Created agent, ID: {agent.id}")
# Create a thread for communication
thread = project_client.agents.threads.create()
print(f"Created thread, ID: {thread.id}")
# Example prompt that causes the agent to call the tool.
# You can swap pdf_url for a local pdf_path by changing the arguments below.
message = project_client.agents.messages.create(
thread_id=thread.id,
role="user",
content=(
"Please extract text via OCR from the following PDF URL and return the JSON.\n"
"Tool arguments:\n"
'{"pdf_url":"
),
)
print(f"Created message, ID: {message['id']}")
# Create and process a run for the agent to handle the message
run = project_client.agents.runs.create(thread_id=thread.id, agent_id=agent.id)
print(f"Created run, ID: {run.id}")
# Poll the run status until it is completed or requires action
while run.status in ["queued", "in_progress", "requires_action"]:
time.sleep(1)
run = project_client.agents.runs.get(thread_id=thread.id, run_id=run.id)
if run.status == "requires_action":
# The agent has requested tool calls
tool_calls = run.required_action.submit_tool_outputs.tool_calls
tool_outputs: List[Dict[str, Any]] = []
for tool_call in tool_calls:
# Parse arguments JSON the agent provided
args = {}
try:
if getattr(tool_call.function, "arguments", None):
args = json.loads(tool_call.function.arguments)
except Exception:
args = {}
if tool_call.function.name == "fetch_ocr":
# Accept either pdf_url or pdf_path; default include_image_base64=True
output = fetch_ocr(
pdf_url=args.get("pdf_url"),
pdf_path=args.get("pdf_path"),
include_image_base64=args.get("include_image_base64", True),
)
tool_outputs.append({"tool_call_id": tool_call.id, "output": output})
project_client.agents.runs.submit_tool_outputs(
thread_id=thread.id, run_id=run.id, tool_outputs=tool_outputs
)
print(f"Run completed with status: {run.status}")
# Fetch and log all messages from the thread
messages = project_client.agents.messages.list(thread_id=thread.id)
for message in messages:
print(f"Role: {message['role']}, Content: {message['content']}")
Sample Output
Created agent, ID: asst_7SCRF4JnOE4OSd7ntYdfA6R6
Created thread, ID: thread_2nuEGeqTmn8QWRyyHj7JJSp2
Created message, ID: msg_Db2x4RrzWy0JBWDQ1CZmpcKu
Created run, ID: run_DZgeWPqtqiPpX3VQlLNl3mxJ
Run completed with status: completed
Role: assistant, Content: [{'type': 'text', 'text': {'value': 'The OCR output extracted from the provided PDF URL is structured as JSON, containing the textual content and relevant properties from each page. Here is the resulting JSON:\n\n```json\n{\n "pages": [\n {\n "index": 0,\n "images": [\n {\n "id": "img-0.jpeg",\n "top_left_x": 418,\n "top_left_y": 1609,\n "bottom_right_x": 1297,\n "bottom_right_y": 2058,\n "image_base64": null,\n "image_annotation": null\n }\n ],\n "markdown": "# Lorem ipsum \n\n## Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc ac faucibus odio.\n\nVestibulum neque massa, scelerisque sit amet ligula eu, congue molestie mi. Praesent ut varius sem. Nullam at porttitor arcu, nec lacinia nisi. Ut ac dolor vitae odio interdum condimentum. Vivamus dapibus sodales ex, vitae malesuada ipsum cursus convallis. Maecenas sed egestas nulla, ac condimentum orci. Mauris diam felis, vulputate ac suscipit et, iaculis non est. Curabitur semper arcu ac ligula semper, nec luctus nisl blandit. Integer lacinia ante ac libero lobortis imperdiet. Nullam mollis convallis ipsum, ac accumsan nunc vehicula vitae. Nulla eget justo in felis tristique fringilla. Morbi sit amet tortor quis risus auctor condimentum. Morbi in ullamcorper elit. Nulla iaculis tellus sit amet mauris tempus fringilla.\n\nMaecenas mauris lectus, lobortis et purus mattis, blandit dictum tellus.\n\n- Maecenas non lorem quis tellus placerat varius.\n- Nulla facilisi.\n- Aenean congue fringilla justo ut aliquam.\n- Mauris id ex erat. Nunc vulputate neque vitae justo facilisis, non condimentum ante sagittis.\n- Morbi viverra semper lorem nec molestie.\n- Maecenas tincidunt est efficitur ligula euismod, sit amet ornare est vulputate.\n",\n "dimensions": {\n "dpi": 200,\n "height": 2339,\n "width": 1653\n }\n },\n {\n "index": 1,\n "images": [],\n "markdown": "In non mauris justo. Duis vehicula mi vel mi pretium, a viverra erat efficitur. Cras aliquam est ac eros varius, id iaculis dui auctor. Duis pretium neque ligula, et pulvinar mi placerat et. Nulla nec nunc sit amet nunc posuere vestibulum. Ut id neque eget tortor mattis tristique. Donec ante est, blandit sit amet tristique vel, lacinia pulvinar arcu. Pellentesque scelerisque fermentum erat, id posuere justo pulvinar ut. Cras id eros sed enim aliquam lobortis. Sed lobortis nisl ut eros efficitur tincidunt. Cras justo mi, porttitor quis mattis vel, ultricies ut purus. Ut facilisis et lacus eu cursus.\n\nIn eleifend velit vitae libero sollicitudin euismod. Fusce vitae vestibulum velit. Pellentesque vulputate lectus quis pellentesque commodo. Aliquam erat volutpat. Vestibulum in egestas velit. Pellentesque fermentum nisl vitae fringilla venenatis. Etiam id mauris vitae orci maximus ultricies.\n\n# Cras fringilla ipsum magna, in fringilla dui commodo \n\na.\n\n| | Lorem ipsum | Lorem ipsum | Lorem ipsum |\n| :-- | :-- | :-- | :-- |\n| 1 | In eleifend velit vitae libero sollicitudin euismod. | Lorem | |\n| 2 | Cras fringilla ipsum magna, in fringilla dui commodo <br> a. | Ipsum | |\n| 3 | Aliquam erat volutpat. | Lorem | |\n| 4 | Fusce vitae vestibulum velit. | Lorem | |\n| 5 | Etiam vehicula luctus fermentum. | Ipsum | |\n\nEtiam vehicula luctus fermentum. In vel metus congue, pulvinar lectus vel, fermentum dui. Maecenas ante orci, egestas ut aliquet sit amet, sagittis a magna. Aliquam ante quam, pellentesque ut dignissim quis, laoreet eget est. Aliquam erat volutpat. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Ut ullamcorper justo sapien, in cursus libero viverra eget. Vivamus auctor imperdiet urna, at pulvinar leo posuere laoreet. Suspendisse neque nisl, fringilla at iaculis scelerisque, ornare vel dolor. Ut et pulvinar nunc. Pellentesque fringilla mollis efficitur. Nullam venenatis commodo imperdiet. Morbi velit neque, semper quis lorem quis, efficitur dignissim ipsum. Ut ac lorem sed turpis imperdiet eleifend sit amet id sapien.",\n "dimensions": {\n "dpi": 200,\n "height": 2339,\n "width": 1653\n }\n },\n ...\n ],\n "model": "mistral-document-ai-2505",\n "document_annotation": null,\n "usage_info": {\n "pages_processed": 30,\n "doc_size_bytes": 1042157,\n "pages_processed_annotation": 0\n },\n "content_filter_results": null\n}\n```\n\nThis is only a snippet of the entire JSON output. Let me know if you\'d like to view the complete content or need specific parts!', 'annotations': []}}]
Role: user, Content: [{'type': 'text', 'text': {'value': 'Please extract text via OCR from the following PDF URL and return the JSON.\nTool arguments:\n{"pdf_url":"https://file-examples.com/storage/fe4385b7a969296aa97cf48/2017/10/file-example_PDF_1MB.pdf","include_image_base64":false}', 'annotations': []}}]
Hope it addresses your requirement as workaround.
Thank you.