Document AI
OCR
With the OCR tool of ComPDFKit Document AI, you can recognize the text from all kinds of images or scanned PDF files as you want. The following examples show how to upload a JPG file and run the OCR processing using Java, PHP, C#, Python, and Swift programming languages. Then, output a JSON file.
Java
// Create a client
CPDFClient client = new CPDFClient(publicKey,secretKey);
// Create a task
// Create an example of a DocumentAI OCR task
CPDFCreateTaskResult result = client.createTask(CPDFDocumentAIEnum.OCR);
// Get a task id
String taskId = result.getTaskId();
// File handling parameter settings
CPDFOcrParameter fileParameter = new CPDFOcrParameter();
fileParameter.setLang("auto");
// Upload files
client.uploadFile(new File("test.jpg"), taskId, fileParameter);
// Execute task
client.executeTask(taskId);
// Query TaskInfo
CPDFTaskInfoResult taskInfo = client.getTaskInfo(taskId);
PHP
// Create a client
$client = new CPDFClient('public_key', 'secret_key');
// Create a task
// Create an example of a DocumentAI OCR task
$taskInfo = $client->createTask(CPDFDocumentAI::OCR);
// File handling parameter settings
$file = $client->addFile('test.jpg')
->setLang('auto');
// Upload files
$fileInfo = $file->uploadFile($taskInfo['taskId']);
// Execute task
$client->executeTask($taskInfo['taskId']);
// Query TaskInfo
$taskInfo = $client->getTaskInfo($taskInfo['taskId']);
C#
// Create a client
CPDFClient client = new CPDFClient(publicKey,secretKey);
// Create a task
// Create an example of a DocumentAI OCR task
CPDFCreateTaskResult result = client.CreateTask(CPDFDocumentAIEnum.OCR);
// Get a task id
string taskId = result.TaskId;
// File handling parameter settings
CPDFOcrParameter fileParameter = new CPDFOcrParameter();
fileParameter.Lang = "auto";
// Upload files
client.UploadFile(new FileInfo("test.jpg"), taskId, fileParameter);
// Execute task
client.ExecuteTask(taskId);
// Query TaskInfo
CPDFTaskInfoResult taskInfo = client.GetTaskInfo(taskId);
Python
# Create a client
client = CPDFClient(public_key, secret_key)
# Create a task
# Create an example of a DocumentAI OCR task
create_task_result = client.create_task(CPDFDocumentAIEnum.OCR)
# Get a task id
task_id = create_task_result.task_id
# File handling parameter settings
file_parameter = CPDFOcrParameter()
file_parameter.lang = "auto"
# Upload files
client.upload_file('test.jpg', task_id, file_parameter)
# Execute task
client.execute_task(task_id)
# Query TaskInfo
task_info = client.get_task_info(task_id)
Swift
// Create a client
let client: CPDFClient = CPDFClient(publicKey: public_key, secretKey: secret_key)
Task { @MainActor in
// Create a task
// Create an example of a DocumentAI OCR task
let taskModel = await client.createTask(url: CPDFDocumentAI.OCR)
// Get a task id
let taskId = taskModel?.taskId ?? ""
// Upload files
let path = Bundle.main.path(forResource: "test", ofType: "png")
let uploadFileModel = await client.uploadFile(filepath: path ?? "", params: [
CPDFFileUploadParameterKey.lang.string():"auto"
], taskId: taskId)
// Execute task
let _ = await client.processFiles(taskId: taskId)
// Query TaskInfo
let taskInfoModel = await client.getTaskInfo(taskId: taskId)
}
lang
:Supported types and definitions- auto - automatic classification language
- english - English
- chinese - Simplified Chinese
- chinese_tra - Traditional Chinese
- korean - Korean
- japanese - Japanese
- latin - Latin
- devanagari - Sanskrit alphabet
Result:
File Type | Description |
---|---|
.json | OCR recognition results. |
Content:
Parameter | Description |
---|---|
cost | OCR recognition time. |
boxes | All detected object box positions of the input image. |
text | OCR recognition content. |
rec_scores | OCR text recognition score, the higher the score, the more credible the result. |