Skip to content

Commit ecf39c5

Browse files
committed
feat(tools): added textract
1 parent a26a1a9 commit ecf39c5

File tree

14 files changed

+1706
-13
lines changed

14 files changed

+1706
-13
lines changed

apps/docs/components/icons.tsx

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4093,6 +4093,23 @@ export function SQSIcon(props: SVGProps<SVGSVGElement>) {
40934093
)
40944094
}
40954095

4096+
export function TextractIcon(props: SVGProps<SVGSVGElement>) {
4097+
return (
4098+
<svg
4099+
{...props}
4100+
viewBox='10 14 60 52'
4101+
version='1.1'
4102+
xmlns='http://www.w3.org/2000/svg'
4103+
xmlnsXlink='http://www.w3.org/1999/xlink'
4104+
>
4105+
<path
4106+
d='M22.0624102,50 C24.3763895,53.603 28.4103535,56 33.0003125,56 C40.1672485,56 45.9991964,50.168 45.9991964,43 C45.9991964,35.832 40.1672485,30 33.0003125,30 C27.6033607,30 22.9664021,33.307 21.0024196,38 L23.2143999,38 C25.0393836,34.444 28.7363506,32 33.0003125,32 C39.0652583,32 43.9992143,36.935 43.9992143,43 C43.9992143,49.065 39.0652583,54 33.0003125,54 C29.5913429,54 26.5413702,52.441 24.5213882,50 L22.0624102,50 Z M37.0002768,45 L37.0002768,43 L41.9992321,43 C41.9992321,38.038 37.9622682,34 33.0003125,34 C28.0373568,34 23.9993929,38.038 23.9993929,43 L28.9993482,43 L28.9993482,45 L24.2313908,45 C25.1443826,49.002 28.7253507,52 33.0003125,52 C35.1362934,52 37.0992759,51.249 38.6442621,50 L34.0003036,50 L34.0003036,48 L40.4782457,48 C41.0812403,47.102 41.5202364,46.087 41.7682342,45 L37.0002768,45 Z M21.0024196,48 L23.2143999,48 C22.4434068,46.498 22.0004107,44.801 22.0004107,43 C22.0004107,41.959 22.1554093,40.955 22.4264069,40 L20.3634253,40 C20.1344274,40.965 19.9994286,41.966 19.9994286,43 C19.9994286,44.771 20.3584254,46.46 21.0024196,48 L21.0024196,48 Z M19.7434309,50 L17.0004554,50 L17.0004554,48 L18.8744386,48 C18.5344417,47.04 18.2894438,46.038 18.1494451,45 L15.4144695,45 L16.707458,46.293 L15.2924706,47.707 L12.2924974,44.707 C11.9025009,44.316 11.9025009,43.684 12.2924974,43.293 L15.2924706,40.293 L16.707458,41.707 L15.4144695,43 L18.0004464,43 C18.0004464,41.973 18.1044455,40.97 18.3024437,40 L17.0004554,40 L17.0004554,38 L18.8744386,38 C20.9404202,32.184 26.4833707,28 33.0003125,28 C37.427273,28 41.4002375,29.939 44.148213,33 L59.0000804,33 L59.0000804,35 L45.6661994,35 C47.1351863,37.318 47.9991786,40.058 47.9991786,43 L59.0000804,43 L59.0000804,45 L47.8501799,45 C46.8681887,52.327 40.5912447,58 33.0003125,58 C27.2563638,58 22.2624084,54.752 19.7434309,50 L19.7434309,50 Z M37.0002768,39 C37.0002768,38.448 36.5522808,38 36.0002857,38 L29.9993482,38 C29.4473442,38 28.9993482,38.448 28.9993482,39 L28.9993482,41 L31.0003304,41 L31.0003304,40 L32.0003214,40 L32.0003214,43 L31.0003304,43 L31.0003304,45 L35.0002946,45 L35.0002946,43 L34.0003036,43 L34.0003036,40 L35.0002946,40 L35.0002946,41 L37.0002768,41 L37.0002768,39 Z M49.0001696,40 L59.0000804,40 L59.0000804,38 L49.0001696,38 L49.0001696,40 Z M49.0001696,50 L59.0000804,50 L59.0000804,48 L49.0001696,48 L49.0001696,50 Z M57.0000982,27 L60.5850662,27 L57.0000982,23.414 L57.0000982,27 Z M63.7070383,27.293 C63.8940367,27.48 64.0000357,27.735 64.0000357,28 L64.0000357,63 C64.0000357,63.552 63.5520397,64 63.0000446,64 L32.0003304,64 C31.4473264,64 31.0003304,63.552 31.0003304,63 L31.0003304,59 L33.0003125,59 L33.0003125,62 L62.0000536,62 L62.0000536,29 L56.0001071,29 C55.4471121,29 55.0001161,28.552 55.0001161,28 L55.0001161,22 L33.0003125,22 L33.0003125,27 L31.0003304,27 L31.0003304,21 C31.0003304,20.448 31.4473264,20 32.0003304,20 L56.0001071,20 C56.2651048,20 56.5191025,20.105 56.7071008,20.293 L63.7070383,27.293 Z M68,24.166 L68,61 C68,61.552 67.552004,62 67.0000089,62 L65.0000268,62 L65.0000268,60 L66.0000179,60 L66.0000179,24.612 L58.6170838,18 L36.0002857,18 L36.0002857,19 L34.0003036,19 L34.0003036,17 C34.0003036,16.448 34.4472996,16 35.0003036,16 L59.0000804,16 C59.2460782,16 59.483076,16.091 59.6660744,16.255 L67.666003,23.42 C67.8780011,23.61 68,23.881 68,24.166 L68,24.166 Z'
4107+
fill='currentColor'
4108+
/>
4109+
</svg>
4110+
)
4111+
}
4112+
40964113
export function McpIcon(props: SVGProps<SVGSVGElement>) {
40974114
return (
40984115
<svg

apps/docs/components/ui/icon-mapping.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ import {
110110
SupabaseIcon,
111111
TavilyIcon,
112112
TelegramIcon,
113+
TextractIcon,
113114
TinybirdIcon,
114115
TranslateIcon,
115116
TrelloIcon,
@@ -237,6 +238,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
237238
supabase: SupabaseIcon,
238239
tavily: TavilyIcon,
239240
telegram: TelegramIcon,
241+
textract: TextractIcon,
240242
tinybird: TinybirdIcon,
241243
translate: TranslateIcon,
242244
trello: TrelloIcon,

apps/docs/content/docs/en/tools/meta.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
"supabase",
107107
"tavily",
108108
"telegram",
109+
"textract",
109110
"tinybird",
110111
"translate",
111112
"trello",
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
---
2+
title: AWS Textract
3+
description: Extract text, tables, and forms from documents
4+
---
5+
6+
import { BlockInfoCard } from "@/components/ui/block-info-card"
7+
8+
<BlockInfoCard
9+
type="textract"
10+
color="linear-gradient(135deg, #055F4E 0%, #56C0A7 100%)"
11+
/>
12+
13+
{/* MANUAL-CONTENT-START:intro */}
14+
[AWS Textract](https://aws.amazon.com/textract/) is a powerful AI service from Amazon Web Services designed to automatically extract printed text, handwriting, tables, forms, key-value pairs, and other structured data from scanned documents and images. Textract leverages advanced optical character recognition (OCR) and document analysis to transform documents into actionable data, enabling automation, analytics, compliance, and more.
15+
16+
With AWS Textract, you can:
17+
18+
- **Extract text from images and documents**: Recognize printed text and handwriting in formats such as PDF, JPEG, PNG, or TIFF
19+
- **Detect and extract tables**: Automatically find tables and output their structured content
20+
- **Parse forms and key-value pairs**: Pull structured data from forms, including fields and their corresponding values
21+
- **Identify signatures and layout features**: Detect signatures, geometric layout, and relationships between document elements
22+
- **Customize extraction with queries**: Extract specific fields and answers using query-based extraction (e.g., "What is the invoice number?")
23+
24+
In Sim, the AWS Textract integration empowers your agents to intelligently process documents as part of their workflows. This unlocks automation scenarios such as data entry from invoices, onboarding documents, contracts, receipts, and more. Your agents can extract relevant data, analyze structured forms, and generate summaries or reports directly from document uploads or URLs. By connecting Sim with AWS Textract, you can reduce manual effort, improve data accuracy, and streamline your business processes with robust document understanding.
25+
{/* MANUAL-CONTENT-END */}
26+
27+
28+
## Usage Instructions
29+
30+
Integrate AWS Textract into your workflow to extract text, tables, forms, and key-value pairs from documents. Sync mode supports JPEG, PNG, and single-page PDF. Async mode supports multi-page PDF and TIFF via S3.
31+
32+
33+
34+
## Tools
35+
36+
### `textract_parser`
37+
38+
Parse documents using AWS Textract OCR and document analysis
39+
40+
#### Input
41+
42+
| Parameter | Type | Required | Description |
43+
| --------- | ---- | -------- | ----------- |
44+
| `accessKeyId` | string | Yes | AWS Access Key ID |
45+
| `secretAccessKey` | string | Yes | AWS Secret Access Key |
46+
| `region` | string | Yes | AWS region for Textract service \(e.g., us-east-1\) |
47+
| `processingMode` | string | No | Document type: single-page or multi-page. Defaults to single-page. |
48+
| `filePath` | string | No | URL to a document to be processed \(JPEG, PNG, PDF, or TIFF\). Required for sync mode. |
49+
| `s3Uri` | string | No | S3 URI for async processing \(s3://bucket/key\). Required for async mode with S3 input. |
50+
| `fileUpload` | object | No | File upload data from file-upload component |
51+
| `featureTypes` | array | No | Feature types to detect: TABLES, FORMS, QUERIES, SIGNATURES, LAYOUT. If not specified, only text detection is performed. |
52+
| `items` | string | No | Feature type |
53+
| `queries` | array | No | Custom queries to extract specific information. Only used when featureTypes includes QUERIES. |
54+
| `items` | object | No | Query configuration |
55+
| `properties` | string | No | The query text |
56+
| `Text` | string | No | No description |
57+
| `Alias` | string | No | No description |
58+
59+
#### Output
60+
61+
| Parameter | Type | Description |
62+
| --------- | ---- | ----------- |
63+
| `blocks` | array | Array of Block objects containing detected text, tables, forms, and other elements |
64+
|`BlockType` | string | Type of block \(PAGE, LINE, WORD, TABLE, CELL, KEY_VALUE_SET, etc.\) |
65+
|`Id` | string | Unique identifier for the block |
66+
|`Text` | string | Query text |
67+
|`TextType` | string | Type of text \(PRINTED or HANDWRITING\) |
68+
|`Confidence` | number | Confidence score \(0-100\) |
69+
|`Page` | number | Page number |
70+
|`Geometry` | object | Location and bounding box information |
71+
|`BoundingBox` | object | Height as ratio of document height |
72+
|`Height` | number | Height as ratio of document height |
73+
|`Left` | number | Left position as ratio of document width |
74+
|`Top` | number | Top position as ratio of document height |
75+
|`Width` | number | Width as ratio of document width |
76+
|`Height` | number | Height as ratio of document height |
77+
|`Left` | number | Left position as ratio of document width |
78+
|`Top` | number | Top position as ratio of document height |
79+
|`Width` | number | Width as ratio of document width |
80+
|`Polygon` | array | Polygon coordinates |
81+
|`X` | number | X coordinate |
82+
|`Y` | number | Y coordinate |
83+
|`X` | number | X coordinate |
84+
|`Y` | number | Y coordinate |
85+
|`BoundingBox` | object | Height as ratio of document height |
86+
|`Height` | number | Height as ratio of document height |
87+
|`Left` | number | Left position as ratio of document width |
88+
|`Top` | number | Top position as ratio of document height |
89+
|`Width` | number | Width as ratio of document width |
90+
|`Height` | number | Height as ratio of document height |
91+
|`Left` | number | Left position as ratio of document width |
92+
|`Top` | number | Top position as ratio of document height |
93+
|`Width` | number | Width as ratio of document width |
94+
|`Polygon` | array | Polygon coordinates |
95+
|`X` | number | X coordinate |
96+
|`Y` | number | Y coordinate |
97+
|`X` | number | X coordinate |
98+
|`Y` | number | Y coordinate |
99+
|`Relationships` | array | Relationships to other blocks |
100+
|`Type` | string | Relationship type \(CHILD, VALUE, ANSWER, etc.\) |
101+
|`Ids` | array | IDs of related blocks |
102+
|`Type` | string | Relationship type \(CHILD, VALUE, ANSWER, etc.\) |
103+
|`Ids` | array | IDs of related blocks |
104+
|`EntityTypes` | array | Entity types for KEY_VALUE_SET \(KEY or VALUE\) |
105+
|`SelectionStatus` | string | For checkboxes: SELECTED or NOT_SELECTED |
106+
|`RowIndex` | number | Row index for table cells |
107+
|`ColumnIndex` | number | Column index for table cells |
108+
|`RowSpan` | number | Row span for merged cells |
109+
|`ColumnSpan` | number | Column span for merged cells |
110+
|`Query` | object | Query information for QUERY blocks |
111+
|`Text` | string | Query text |
112+
|`Alias` | string | Query alias |
113+
|`Pages` | array | Pages to search |
114+
|`Alias` | string | Query alias |
115+
|`Pages` | array | Pages to search |
116+
| `documentMetadata` | object | Metadata about the analyzed document |
117+
|`pages` | number | Number of pages in the document |
118+
| `modelVersion` | string | Version of the Textract model used for processing |
119+
120+

0 commit comments

Comments
 (0)