Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions apps/docs/components/icons.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4093,6 +4093,23 @@ export function SQSIcon(props: SVGProps<SVGSVGElement>) {
)
}

export function TextractIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg
{...props}
viewBox='10 14 60 52'
version='1.1'
xmlns='http://www.w3.org/2000/svg'
xmlnsXlink='http://www.w3.org/1999/xlink'
>
<path
d='M22.0624102,50 C24.3763895,53.603 28.4103535,56 33.0003125,56 C40.1672485,56 45.9991964,50.168 45.9991964,43 C45.9991964,35.832 40.1672485,30 33.0003125,30 C27.6033607,30 22.9664021,33.307 21.0024196,38 L23.2143999,38 C25.0393836,34.444 28.7363506,32 33.0003125,32 C39.0652583,32 43.9992143,36.935 43.9992143,43 C43.9992143,49.065 39.0652583,54 33.0003125,54 C29.5913429,54 26.5413702,52.441 24.5213882,50 L22.0624102,50 Z M37.0002768,45 L37.0002768,43 L41.9992321,43 C41.9992321,38.038 37.9622682,34 33.0003125,34 C28.0373568,34 23.9993929,38.038 23.9993929,43 L28.9993482,43 L28.9993482,45 L24.2313908,45 C25.1443826,49.002 28.7253507,52 33.0003125,52 C35.1362934,52 37.0992759,51.249 38.6442621,50 L34.0003036,50 L34.0003036,48 L40.4782457,48 C41.0812403,47.102 41.5202364,46.087 41.7682342,45 L37.0002768,45 Z M21.0024196,48 L23.2143999,48 C22.4434068,46.498 22.0004107,44.801 22.0004107,43 C22.0004107,41.959 22.1554093,40.955 22.4264069,40 L20.3634253,40 C20.1344274,40.965 19.9994286,41.966 19.9994286,43 C19.9994286,44.771 20.3584254,46.46 21.0024196,48 L21.0024196,48 Z M19.7434309,50 L17.0004554,50 L17.0004554,48 L18.8744386,48 C18.5344417,47.04 18.2894438,46.038 18.1494451,45 L15.4144695,45 L16.707458,46.293 L15.2924706,47.707 L12.2924974,44.707 C11.9025009,44.316 11.9025009,43.684 12.2924974,43.293 L15.2924706,40.293 L16.707458,41.707 L15.4144695,43 L18.0004464,43 C18.0004464,41.973 18.1044455,40.97 18.3024437,40 L17.0004554,40 L17.0004554,38 L18.8744386,38 C20.9404202,32.184 26.4833707,28 33.0003125,28 C37.427273,28 41.4002375,29.939 44.148213,33 L59.0000804,33 L59.0000804,35 L45.6661994,35 C47.1351863,37.318 47.9991786,40.058 47.9991786,43 L59.0000804,43 L59.0000804,45 L47.8501799,45 C46.8681887,52.327 40.5912447,58 33.0003125,58 C27.2563638,58 22.2624084,54.752 19.7434309,50 L19.7434309,50 Z M37.0002768,39 C37.0002768,38.448 36.5522808,38 36.0002857,38 L29.9993482,38 C29.4473442,38 28.9993482,38.448 28.9993482,39 L28.9993482,41 L31.0003304,41 L31.0003304,40 L32.0003214,40 L32.0003214,43 L31.0003304,43 L31.0003304,45 L35.0002946,45 L35.0002946,43 L34.0003036,43 L34.0003036,40 L35.0002946,40 L35.0002946,41 L37.0002768,41 L37.0002768,39 Z M49.0001696,40 L59.0000804,40 L59.0000804,38 L49.0001696,38 L49.0001696,40 Z M49.0001696,50 L59.0000804,50 L59.0000804,48 L49.0001696,48 L49.0001696,50 Z M57.0000982,27 L60.5850662,27 L57.0000982,23.414 L57.0000982,27 Z M63.7070383,27.293 C63.8940367,27.48 64.0000357,27.735 64.0000357,28 L64.0000357,63 C64.0000357,63.552 63.5520397,64 63.0000446,64 L32.0003304,64 C31.4473264,64 31.0003304,63.552 31.0003304,63 L31.0003304,59 L33.0003125,59 L33.0003125,62 L62.0000536,62 L62.0000536,29 L56.0001071,29 C55.4471121,29 55.0001161,28.552 55.0001161,28 L55.0001161,22 L33.0003125,22 L33.0003125,27 L31.0003304,27 L31.0003304,21 C31.0003304,20.448 31.4473264,20 32.0003304,20 L56.0001071,20 C56.2651048,20 56.5191025,20.105 56.7071008,20.293 L63.7070383,27.293 Z M68,24.166 L68,61 C68,61.552 67.552004,62 67.0000089,62 L65.0000268,62 L65.0000268,60 L66.0000179,60 L66.0000179,24.612 L58.6170838,18 L36.0002857,18 L36.0002857,19 L34.0003036,19 L34.0003036,17 C34.0003036,16.448 34.4472996,16 35.0003036,16 L59.0000804,16 C59.2460782,16 59.483076,16.091 59.6660744,16.255 L67.666003,23.42 C67.8780011,23.61 68,23.881 68,24.166 L68,24.166 Z'
fill='currentColor'
/>
</svg>
)
}

export function McpIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg
Expand Down
2 changes: 2 additions & 0 deletions apps/docs/components/ui/icon-mapping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ import {
SupabaseIcon,
TavilyIcon,
TelegramIcon,
TextractIcon,
TinybirdIcon,
TranslateIcon,
TrelloIcon,
Expand Down Expand Up @@ -237,6 +238,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
supabase: SupabaseIcon,
tavily: TavilyIcon,
telegram: TelegramIcon,
textract: TextractIcon,
tinybird: TinybirdIcon,
translate: TranslateIcon,
trello: TrelloIcon,
Expand Down
1 change: 1 addition & 0 deletions apps/docs/content/docs/en/tools/meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
"supabase",
"tavily",
"telegram",
"textract",
"tinybird",
"translate",
"trello",
Expand Down
120 changes: 120 additions & 0 deletions apps/docs/content/docs/en/tools/textract.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
---
title: AWS Textract
description: Extract text, tables, and forms from documents
---

import { BlockInfoCard } from "@/components/ui/block-info-card"

<BlockInfoCard
type="textract"
color="linear-gradient(135deg, #055F4E 0%, #56C0A7 100%)"
/>

{/* MANUAL-CONTENT-START:intro */}
[AWS Textract](https://aws.amazon.com/textract/) is a powerful AI service from Amazon Web Services designed to automatically extract printed text, handwriting, tables, forms, key-value pairs, and other structured data from scanned documents and images. Textract leverages advanced optical character recognition (OCR) and document analysis to transform documents into actionable data, enabling automation, analytics, compliance, and more.

With AWS Textract, you can:

- **Extract text from images and documents**: Recognize printed text and handwriting in formats such as PDF, JPEG, PNG, or TIFF
- **Detect and extract tables**: Automatically find tables and output their structured content
- **Parse forms and key-value pairs**: Pull structured data from forms, including fields and their corresponding values
- **Identify signatures and layout features**: Detect signatures, geometric layout, and relationships between document elements
- **Customize extraction with queries**: Extract specific fields and answers using query-based extraction (e.g., "What is the invoice number?")

In Sim, the AWS Textract integration empowers your agents to intelligently process documents as part of their workflows. This unlocks automation scenarios such as data entry from invoices, onboarding documents, contracts, receipts, and more. Your agents can extract relevant data, analyze structured forms, and generate summaries or reports directly from document uploads or URLs. By connecting Sim with AWS Textract, you can reduce manual effort, improve data accuracy, and streamline your business processes with robust document understanding.
{/* MANUAL-CONTENT-END */}


## Usage Instructions

Integrate AWS Textract into your workflow to extract text, tables, forms, and key-value pairs from documents. Single-page mode supports JPEG, PNG, and single-page PDF. Multi-page mode supports multi-page PDF and TIFF.



## Tools

### `textract_parser`

Parse documents using AWS Textract OCR and document analysis

#### Input

| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `accessKeyId` | string | Yes | AWS Access Key ID |
| `secretAccessKey` | string | Yes | AWS Secret Access Key |
| `region` | string | Yes | AWS region for Textract service \(e.g., us-east-1\) |
| `processingMode` | string | No | Document type: single-page or multi-page. Defaults to single-page. |
| `filePath` | string | No | URL to a document to be processed \(JPEG, PNG, or single-page PDF\). |
| `s3Uri` | string | No | S3 URI for multi-page processing \(s3://bucket/key\). |
| `fileUpload` | object | No | File upload data from file-upload component |
| `featureTypes` | array | No | Feature types to detect: TABLES, FORMS, QUERIES, SIGNATURES, LAYOUT. If not specified, only text detection is performed. |
| `items` | string | No | Feature type |
| `queries` | array | No | Custom queries to extract specific information. Only used when featureTypes includes QUERIES. |
| `items` | object | No | Query configuration |
| `properties` | string | No | The query text |
| `Text` | string | No | No description |
| `Alias` | string | No | No description |

#### Output

| Parameter | Type | Description |
| --------- | ---- | ----------- |
| `blocks` | array | Array of Block objects containing detected text, tables, forms, and other elements |
| ↳ `BlockType` | string | Type of block \(PAGE, LINE, WORD, TABLE, CELL, KEY_VALUE_SET, etc.\) |
| ↳ `Id` | string | Unique identifier for the block |
| ↳ `Text` | string | Query text |
| ↳ `TextType` | string | Type of text \(PRINTED or HANDWRITING\) |
| ↳ `Confidence` | number | Confidence score \(0-100\) |
| ↳ `Page` | number | Page number |
| ↳ `Geometry` | object | Location and bounding box information |
| ↳ `BoundingBox` | object | Height as ratio of document height |
| ↳ `Height` | number | Height as ratio of document height |
| ↳ `Left` | number | Left position as ratio of document width |
| ↳ `Top` | number | Top position as ratio of document height |
| ↳ `Width` | number | Width as ratio of document width |
| ↳ `Height` | number | Height as ratio of document height |
| ↳ `Left` | number | Left position as ratio of document width |
| ↳ `Top` | number | Top position as ratio of document height |
| ↳ `Width` | number | Width as ratio of document width |
| ↳ `Polygon` | array | Polygon coordinates |
| ↳ `X` | number | X coordinate |
| ↳ `Y` | number | Y coordinate |
| ↳ `X` | number | X coordinate |
| ↳ `Y` | number | Y coordinate |
| ↳ `BoundingBox` | object | Height as ratio of document height |
| ↳ `Height` | number | Height as ratio of document height |
| ↳ `Left` | number | Left position as ratio of document width |
| ↳ `Top` | number | Top position as ratio of document height |
| ↳ `Width` | number | Width as ratio of document width |
| ↳ `Height` | number | Height as ratio of document height |
| ↳ `Left` | number | Left position as ratio of document width |
| ↳ `Top` | number | Top position as ratio of document height |
| ↳ `Width` | number | Width as ratio of document width |
| ↳ `Polygon` | array | Polygon coordinates |
| ↳ `X` | number | X coordinate |
| ↳ `Y` | number | Y coordinate |
| ↳ `X` | number | X coordinate |
| ↳ `Y` | number | Y coordinate |
| ↳ `Relationships` | array | Relationships to other blocks |
| ↳ `Type` | string | Relationship type \(CHILD, VALUE, ANSWER, etc.\) |
| ↳ `Ids` | array | IDs of related blocks |
| ↳ `Type` | string | Relationship type \(CHILD, VALUE, ANSWER, etc.\) |
| ↳ `Ids` | array | IDs of related blocks |
| ↳ `EntityTypes` | array | Entity types for KEY_VALUE_SET \(KEY or VALUE\) |
| ↳ `SelectionStatus` | string | For checkboxes: SELECTED or NOT_SELECTED |
| ↳ `RowIndex` | number | Row index for table cells |
| ↳ `ColumnIndex` | number | Column index for table cells |
| ↳ `RowSpan` | number | Row span for merged cells |
| ↳ `ColumnSpan` | number | Column span for merged cells |
| ↳ `Query` | object | Query information for QUERY blocks |
| ↳ `Text` | string | Query text |
| ↳ `Alias` | string | Query alias |
| ↳ `Pages` | array | Pages to search |
| ↳ `Alias` | string | Query alias |
| ↳ `Pages` | array | Pages to search |
| `documentMetadata` | object | Metadata about the analyzed document |
| ↳ `pages` | number | Number of pages in the document |
| `modelVersion` | string | Version of the Textract model used for processing |


Loading