Document processing is an asynchronous process. Therefore, we need a way to track the status of the processing.
There are 2 ways to track the status of the processing:
WebSockets (recommended)
Polling (alternative)
WebSockets
WebSockets are a more efficient way to track the status of the processing.
It is a long-lived connection that allows you to receive updates as they happen.
importasyncioimportwebsocketsfrompydanticimportBaseModelBASE_URL='ws://dev-api.v2.areal.ai/'asyncdefget_status(client:websockets.WebSocketClientProtocol,document_id:str)->str:asyncwithwebsockets.connect(f"{BASE_URL}/status/")asws:whileTrue:message=awaitws.recv()msg=FileStatusUpdatedMessage.model_validate_json(message)# skip if not the document we are interested inifmsg.meta.id!=document_id:continueprint(f"Processing {msg.meta.id} is {msg.data.status}")# NOTE: no need to break since if this is background task# and so you can keep listening for other documentsasyncio.run(get_status(document_id))classFileStatusUpdatedMessage(BaseModel):classMeta(BaseModel):upload_session_id:strid:struser:UserclassData(BaseModel):status:Statustype:str='FILE_STATUS_UPDATED'meta:Metadata:Data
usingSystem;usingSystem.Net.WebSockets;usingSystem.Text;usingSystem.Text.Json;usingSystem.Threading;usingSystem.Threading.Tasks;varbaseUrl="ws://dev-api.v2.areal.ai/";vardocumentId="your_document_id_here";usingvarclient=newClientWebSocket();awaitclient.ConnectAsync(newUri(baseUrl+"status/"),CancellationToken.None);varbuffer=newbyte[1024*4];while(client.State==WebSocketState.Open){varresult=awaitclient.ReceiveAsync(newArraySegment<byte>(buffer),CancellationToken.None);if(result.MessageType==WebSocketMessageType.Text){varmessage=Encoding.UTF8.GetString(buffer,0,result.Count);varjsonDoc=JsonDocument.Parse(message);varroot=jsonDoc.RootElement;varmeta=root.GetProperty("meta");// skip if not the document we are interested inif(meta.GetProperty("id").GetString()!=documentId){continue;}vardata=root.GetProperty("data");varstatus=data.GetProperty("status").GetString();Console.WriteLine($"Processing {meta.GetProperty("id").GetString()} is {status}");// NOTE: no need to break since if this is background task// and so you can keep listening for other documents}elseif(result.MessageType==WebSocketMessageType.Close){awaitclient.CloseAsync(WebSocketCloseStatus.NormalClosure,"",CancellationToken.None);}}
importjava.net.URI;importjava.net.http.HttpClient;importjava.net.http.WebSocket;importjava.util.concurrent.CompletionStage;importorg.json.JSONObject;publicclassWebSocketStatusTracking{publicstaticvoidmain(String[]args)throwsException{StringbaseUrl="ws://dev-api.v2.areal.ai/";StringdocumentId="your_document_id_here";HttpClientclient=HttpClient.newHttpClient();WebSocketwebSocket=client.newWebSocketBuilder().buildAsync(URI.create(baseUrl+"status/"),newWebSocket.Listener(){@OverridepublicvoidonOpen(WebSocketwebSocket){System.out.println("WebSocket connection opened");webSocket.request(1);returnWebSocket.Listener.super.onOpen(webSocket);}@OverridepublicCompletionStage<?>onText(WebSocketwebSocket,CharSequencedata,booleanlast){try{JSONObjectmessage=newJSONObject(data.toString());JSONObjectmeta=message.getJSONObject("meta");// skip if not the document we are interested inif(!meta.getString("id").equals(documentId)){webSocket.request(1);returnnull;}JSONObjectmessageData=message.getJSONObject("data");Stringstatus=messageData.getString("status");System.out.println("Processing "+meta.getString("id")+" is "+status);// NOTE: no need to break since if this is background task// and so you can keep listening for other documents}catch(Exceptione){e.printStackTrace();}webSocket.request(1);returnnull;}@OverridepublicvoidonError(WebSocketwebSocket,Throwableerror){System.err.println("WebSocket error: "+error.getMessage());}}).join();// Keep the connection aliveThread.sleep(Long.MAX_VALUE);}}
For other WebSocket notification types see WebSocket API
Polling
Continuously poll the status of the processing until it is completed or failed.
You can also implement a custom timeout to avoid waiting forever.
importrequestsimporttimeBASE_URL='http://dev-api.v2.areal.ai/api/v2'# 0. Login - details in Authentication section ... client is authenticated# 1. Starting the processing ... we get a request_id (which is actually the document_id) -- details in Processing section# 2. Polling the status of the processingdocument_id="your_document_id_here"defget_status(client:requests.Session,document_id:str)->str:try:response=client.get(f"{BASE_URL}/documents/{document_id}/")document_details=response.json()returndocument_details["status"]exceptExceptionase:print(f"Error getting status: {e}")return"unknown"status=get_status(client,document_id)# NOTE: You can also inspect other fields in document_details as neededtimeout=10whilestatusnotin["completed","failed"]:time.sleep(1)timeout-=1iftimeout<=0:print("Timeout reached")breakstatus=get_status(client,document_id)
usingSystem;usingSystem.Net.Http;usingSystem.Threading.Tasks;usingSystem.Text.Json;usingSystem.Net;usingSystem.Threading;varbaseUrl="http://dev-api.v2.areal.ai/api/v2";// 0. Login - details in Authentication section ... client is authenticated// 1. Starting the processing ... we get a request_id (which is actually the document_id) -- details in Processing section// 2. Polling the status of the processingvardocumentId="your_document_id_here";varhandler=newHttpClientHandler{UseCookies=true,CookieContainer=newCookieContainer()};varclient=newHttpClient(handler);stringstatus=awaitGetStatus(client,baseUrl,documentId);// NOTE: You can also inspect other fields in document_details as neededinttimeout=10;while(status!="completed"&&status!="failed"){awaitTask.Delay(1000);timeout--;if(timeout<=0){Console.WriteLine("Timeout reached");break;}status=awaitGetStatus(client,baseUrl,documentId);}staticasyncTask<string>GetStatus(HttpClientclient,stringbaseUrl,stringdocumentId){try{varresponse=awaitclient.GetAsync($"{baseUrl}/documents/{documentId}/");response.EnsureSuccessStatusCode();varcontent=awaitresponse.Content.ReadAsStringAsync();varjsonDoc=JsonDocument.Parse(content);returnjsonDoc.RootElement.GetProperty("status").GetString();}catch(Exceptione){Console.WriteLine($"Error getting status: {e.Message}");return"unknown";}}
importjava.net.HttpURLConnection;importjava.net.URL;importjava.io.BufferedReader;importjava.io.InputStreamReader;importjava.net.CookieManager;importjava.net.CookieHandler;importjava.nio.charset.StandardCharsets;importorg.json.JSONObject;publicclassPollingStatusTracking{publicstaticvoidmain(String[]args)throwsException{StringbaseUrl="http://dev-api.v2.areal.ai/api/v2";// 0. Login - details in Authentication section ... client is authenticated// 1. Starting the processing ... we get a request_id (which is actually the document_id) -- details in Processing section// 2. Polling the status of the processingStringdocumentId="your_document_id_here";CookieManagercookieManager=newCookieManager();CookieHandler.setDefault(cookieManager);Stringstatus=getStatus(baseUrl,documentId);// NOTE: You can also inspect other fields in document_details as neededinttimeout=10;while(!status.equals("completed")&&!status.equals("failed")){Thread.sleep(1000);timeout--;if(timeout<=0){System.out.println("Timeout reached");break;}status=getStatus(baseUrl,documentId);}}privatestaticStringgetStatus(StringbaseUrl,StringdocumentId){try{URLurl=newURL(baseUrl+"/documents/"+documentId+"/");HttpURLConnectionconnection=(HttpURLConnection)url.openConnection();connection.setRequestMethod("GET");connection.setDoInput(true);intresponseCode=connection.getResponseCode();if(responseCode==HttpURLConnection.HTTP_OK){BufferedReaderreader=newBufferedReader(newInputStreamReader(connection.getInputStream(),StandardCharsets.UTF_8));StringBuilderresponse=newStringBuilder();Stringline;while((line=reader.readLine())!=null){response.append(line);}reader.close();JSONObjectdocumentDetails=newJSONObject(response.toString());returndocumentDetails.getString("status");}connection.disconnect();}catch(Exceptione){System.out.println("Error getting status: "+e.getMessage());}return"unknown";}}
Meaning of the status
pending: We received your request, but processing has not started yet.
preparing: The document is being classified.
processing: Data extraction is in progress.
completed: Processing is complete and successful.
failed: Processing failed. Please check the error details or try again.
frompydanticimportBaseModelfromtypingimportOptional# When an existing documents status changes (e.g processing -> completed)classFileStatusUpdatedMessage(BaseModel):classMeta(BaseModel):upload_session_id:strid:struser:UserclassData(BaseModel):status:Statustype:str='FILE_STATUS_UPDATED'meta:Metadata:Data# When a new document is created (e.g. preparing can generate multiple documents)classFileCreatedMessage(BaseModel):classMeta(BaseModel):upload_session_id:struser:Usertype:str='FILE_CREATED'meta:Metadata:SessionDetailListItem# When a document is removed classFileDeletedMessage(BaseModel):classMeta(BaseModel):upload_session_id:strid:struser:Usertype:str='FILE_DELETED'meta:Metadata:None=NoneclassUser(BaseModel):id:str# When a document is pushed to a gateway (e.g. Encompass DocPush)classDocPushFinishedMessage(BaseModel):classMeta(BaseModel):upload_session_id:strid:struser:UserclassData(BaseModel):status:Statuserror:Optional[str]type:str='DOC_PUSH_FINISHED'meta:Metadata:Data
usingSystem;usingSystem.Text.Json.Serialization;// When an existing documents status changes (e.g processing -> completed)publicclassFileStatusUpdatedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("id")]publicstringId{get;set;}[JsonPropertyName("user")]publicUserUser{get;set;}}publicclassData{[JsonPropertyName("status")]publicstringStatus{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="FILE_STATUS_UPDATED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicDataData{get;set;}}// When a new document is created (e.g. preparing can generate multiple documents)publicclassFileCreatedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("user")]publicUserUser{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="FILE_CREATED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicSessionDetailListItemData{get;set;}}// When a document is removedpublicclassFileDeletedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("id")]publicstringId{get;set;}[JsonPropertyName("user")]publicUserUser{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="FILE_DELETED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicobjectData{get;set;}=null;}publicclassUser{[JsonPropertyName("id")]publicstringId{get;set;}}// When a document is pushed to a gateway (e.g. Encompass DocPush)publicclassDocPushFinishedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("id")]publicstringId{get;set;}[JsonPropertyName("user")]publicUserUser{get;set;}}publicclassData{[JsonPropertyName("status")]publicstringStatus{get;set;}[JsonPropertyName("error")]publicstringError{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="DOC_PUSH_FINISHED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicDataData{get;set;}}
importorg.json.JSONObject;// When an existing documents status changes (e.g processing -> completed)classFileStatusUpdatedMessage{publicstaticclassMeta{publicStringupload_session_id;publicStringid;publicUseruser;}publicstaticclassData{publicStringstatus;}publicStringtype="FILE_STATUS_UPDATED";publicMetameta;publicDatadata;}// When a new document is created (e.g. preparing can generate multiple documents)classFileCreatedMessage{publicstaticclassMeta{publicStringupload_session_id;publicUseruser;}publicStringtype="FILE_CREATED";publicMetameta;publicSessionDetailListItemdata;}// When a document is removedclassFileDeletedMessage{publicstaticclassMeta{publicStringupload_session_id;publicStringid;publicUseruser;}publicStringtype="FILE_DELETED";publicMetameta;publicObjectdata=null;}classUser{publicStringid;}// When a document is pushed to a gateway (e.g. Encompass DocPush)classDocPushFinishedMessage{publicstaticclassMeta{publicStringupload_session_id;publicStringid;publicUseruser;}publicstaticclassData{publicStringstatus;publicStringerror;}publicStringtype="DOC_PUSH_FINISHED";publicMetameta;publicDatadata;}
frompydanticimportBaseModelfromtypingimportOptional# When the status of the CDBalancer changes (e.g. preparing -> processing -> completed)classCDBalancerStatusUpdatedMessage(BaseModel):classMeta(BaseModel):upload_session_id:strid:struser:UserclassData(BaseModel):status:Statustype:str='CDBALANCER_STATUS_UPDATED'meta:Metadata:Data# When a CDPull is started (e.g. Encompass CDPull)classCDPullStartedMessage(BaseModel):classMeta(BaseModel):upload_session_id:struser:UserclassData(BaseModel):classDocument(BaseModel):id:strname:strstatus:Statusdocument:Documenttype:str='CD_PULL_STARTED'meta:Metadata:Data# When a CDPull is finished (e.g. Encompass CDPull)classCDPullFinishedMessage(BaseModel):classMeta(BaseModel):upload_session_id:struser:UserclassData(BaseModel):classDocument(BaseModel):id:strname:strstatus:Statusdocument:Optional[Document]status:Statuserror:Optional[str]type:str='CD_PULL_FINISHED'meta:Metadata:Data# When a CDPush is finished (e.g. Encompass CDPush)classCDPushFinishedMessage(BaseModel):classMeta(BaseModel):upload_session_id:strcdbalancer_request_id:strgateway_request_id:Optional[str]=Noneuser:UserclassData(BaseModel):status:Statuserror:Optional[str]stats_message:Optional[str]=Nonestats_status:Optional[str]=Nonetype:str='CD_PUSH_FINISHED'meta:Metadata:Data
usingSystem;usingSystem.Text.Json.Serialization;// When the status of the CDBalancer changes (e.g. preparing -> processing -> completed)publicclassCDBalancerStatusUpdatedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("id")]publicstringId{get;set;}[JsonPropertyName("user")]publicUserUser{get;set;}}publicclassData{[JsonPropertyName("status")]publicstringStatus{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="CDBALANCER_STATUS_UPDATED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicDataData{get;set;}}// When a CDPull is started (e.g. Encompass CDPull)publicclassCDPullStartedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("user")]publicUserUser{get;set;}}publicclassData{publicclassDocument{[JsonPropertyName("id")]publicstringId{get;set;}[JsonPropertyName("name")]publicstringName{get;set;}[JsonPropertyName("status")]publicstringStatus{get;set;}}[JsonPropertyName("document")]publicDocumentDocument{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="CD_PULL_STARTED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicDataData{get;set;}}// When a CDPull is finished (e.g. Encompass CDPull)publicclassCDPullFinishedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("user")]publicUserUser{get;set;}}publicclassData{publicclassDocument{[JsonPropertyName("id")]publicstringId{get;set;}[JsonPropertyName("name")]publicstringName{get;set;}[JsonPropertyName("status")]publicstringStatus{get;set;}}[JsonPropertyName("document")]publicDocumentDocument{get;set;}[JsonPropertyName("status")]publicstringStatus{get;set;}[JsonPropertyName("error")]publicstringError{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="CD_PULL_FINISHED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicDataData{get;set;}}// When a CDPush is finished (e.g. Encompass CDPush)publicclassCDPushFinishedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("cdbalancer_request_id")]publicstringCDBalancerRequestId{get;set;}[JsonPropertyName("gateway_request_id")]publicstringGatewayRequestId{get;set;}[JsonPropertyName("user")]publicUserUser{get;set;}}publicclassData{[JsonPropertyName("status")]publicstringStatus{get;set;}[JsonPropertyName("error")]publicstringError{get;set;}[JsonPropertyName("stats_message")]publicstringStatsMessage{get;set;}[JsonPropertyName("stats_status")]publicstringStatsStatus{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="CD_PUSH_FINISHED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicDataData{get;set;}}
// When the status of the CDBalancer changes (e.g. preparing -> processing -> completed)classCDBalancerStatusUpdatedMessage{publicstaticclassMeta{publicStringupload_session_id;publicStringid;publicUseruser;}publicstaticclassData{publicStringstatus;}publicStringtype="CDBALANCER_STATUS_UPDATED";publicMetameta;publicDatadata;}// When a CDPull is started (e.g. Encompass CDPull)classCDPullStartedMessage{publicstaticclassMeta{publicStringupload_session_id;publicUseruser;}publicstaticclassData{publicstaticclassDocument{publicStringid;publicStringname;publicStringstatus;}publicDocumentdocument;}publicStringtype="CD_PULL_STARTED";publicMetameta;publicDatadata;}// When a CDPull is finished (e.g. Encompass CDPull)classCDPullFinishedMessage{publicstaticclassMeta{publicStringupload_session_id;publicUseruser;}publicstaticclassData{publicstaticclassDocument{publicStringid;publicStringname;publicStringstatus;}publicDocumentdocument;publicStringstatus;publicStringerror;}publicStringtype="CD_PULL_FINISHED";publicMetameta;publicDatadata;}// When a CDPush is finished (e.g. Encompass CDPush)classCDPushFinishedMessage{publicstaticclassMeta{publicStringupload_session_id;publicStringcdbalancer_request_id;publicStringgateway_request_id;publicUseruser;}publicstaticclassData{publicStringstatus;publicStringerror;publicStringstats_message;publicStringstats_status;}publicStringtype="CD_PUSH_FINISHED";publicMetameta;publicDatadata;}
frompydanticimportBaseModelfromtypingimportOptional# When a copilot task is updated (e.g. pending -> processing -> completed)classCopilotTaskUpdatedMessage(BaseModel):classMeta(BaseModel):upload_session_id:strtask_group_id:strclassData(BaseModel):task:TaskSchemaerror:Optional[str]type:str='COPILOT_TASK_UPDATED'meta:Metadata:Data# When a new copilot task is createdclassCopilotTaskCreatedMessage(BaseModel):classMeta(BaseModel):upload_session_id:strtask_group_id:strclassData(BaseModel):task:TaskSchematype:str='COPILOT_TASK_CREATED'meta:Metadata:Data# When a copilot task is deletedclassCopilotTaskDeletedMessage(BaseModel):classMeta(BaseModel):task_group_id:strclassData(BaseModel):task_id:strtype:str='COPILOT_TASK_DELETED'meta:Metadata:Data
usingSystem;usingSystem.Text.Json.Serialization;// When a copilot task is updated (e.g. pending -> processing -> completed)publicclassCopilotTaskUpdatedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("task_group_id")]publicstringTaskGroupId{get;set;}}publicclassData{[JsonPropertyName("task")]publicTaskSchemaTask{get;set;}[JsonPropertyName("error")]publicstringError{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="COPILOT_TASK_UPDATED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicDataData{get;set;}}// When a new copilot task is createdpublicclassCopilotTaskCreatedMessage{publicclassMeta{[JsonPropertyName("upload_session_id")]publicstringUploadSessionId{get;set;}[JsonPropertyName("task_group_id")]publicstringTaskGroupId{get;set;}}publicclassData{[JsonPropertyName("task")]publicTaskSchemaTask{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="COPILOT_TASK_CREATED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicDataData{get;set;}}// When a copilot task is deletedpublicclassCopilotTaskDeletedMessage{publicclassMeta{[JsonPropertyName("task_group_id")]publicstringTaskGroupId{get;set;}}publicclassData{[JsonPropertyName("task_id")]publicstringTaskId{get;set;}}[JsonPropertyName("type")]publicstringType{get;set;}="COPILOT_TASK_DELETED";[JsonPropertyName("meta")]publicMetaMeta{get;set;}[JsonPropertyName("data")]publicDataData{get;set;}}
// When a copilot task is updated (e.g. pending -> processing -> completed)classCopilotTaskUpdatedMessage{publicstaticclassMeta{publicStringupload_session_id;publicStringtask_group_id;}publicstaticclassData{publicTaskSchematask;publicStringerror;}publicStringtype="COPILOT_TASK_UPDATED";publicMetameta;publicDatadata;}// When a new copilot task is createdclassCopilotTaskCreatedMessage{publicstaticclassMeta{publicStringupload_session_id;publicStringtask_group_id;}publicstaticclassData{publicTaskSchematask;}publicStringtype="COPILOT_TASK_CREATED";publicMetameta;publicDatadata;}// When a copilot task is deletedclassCopilotTaskDeletedMessage{publicstaticclassMeta{publicStringtask_group_id;}publicstaticclassData{publicStringtask_id;}publicStringtype="COPILOT_TASK_DELETED";publicMetameta;publicDatadata;}