diff --git a/configs/AM62DX/AM62DX_linux_toc.txt b/configs/AM62DX/AM62DX_linux_toc.txt index 2ebb80cf2..edc0c4a76 100644 --- a/configs/AM62DX/AM62DX_linux_toc.txt +++ b/configs/AM62DX/AM62DX_linux_toc.txt @@ -62,6 +62,12 @@ linux/Foundational_Components/Kernel/Kernel_Drivers/UART linux/Foundational_Components/Kernel/Kernel_Drivers/UBIFS linux/Foundational_Components/Kernel/Kernel_Drivers/VTM linux/Foundational_Components/Kernel/Kernel_Drivers/Watchdog +linux/Foundational_Components_Machine_Learning +linux/Foundational_Components/Machine_Learning/arm_compute_library +linux/Foundational_Components/Machine_Learning/armnn +linux/Foundational_Components/Machine_Learning/nnstreamer +linux/Foundational_Components/Machine_Learning/onnxrt +linux/Foundational_Components/Machine_Learning/tflite #linux/Foundational_Components_Power_Management diff --git a/source/images/Sitara_machine_learning_stack_diagram.png b/source/images/Sitara_machine_learning_stack_diagram.png index 6fb0fe0bf..8b9b52092 100644 Binary files a/source/images/Sitara_machine_learning_stack_diagram.png and b/source/images/Sitara_machine_learning_stack_diagram.png differ diff --git a/source/linux/Foundational_Components/Machine_Learning/arm_compute_library.rst b/source/linux/Foundational_Components/Machine_Learning/arm_compute_library.rst index 3bfa2de10..43b8ca2fd 100644 --- a/source/linux/Foundational_Components/Machine_Learning/arm_compute_library.rst +++ b/source/linux/Foundational_Components/Machine_Learning/arm_compute_library.rst @@ -10,7 +10,7 @@ Exact list of functions can be found at https://www.arm.com/products/development Supported versions ------------------ - - ARM Compute Library 24.12 + - ARM Compute Library 52.7.0 Arm Compute Library Testing --------------------------- @@ -19,10 +19,10 @@ Arm Compute Libraries, tests, and sample executables are included in the SDK fil .. code-block:: console - root@am62xx-evm:~# LD_LIBRARY_PATH=/usr/lib/tests/ /usr/lib/tests/arm_compute_validation - Version = 32bcced2af7feea6969dd1d22e58d0718dc488e3 - CommandLine = /usr/lib/tests/arm_compute_validation - Seed = 3778037091 + root@am62xx-evm:~# LD_LIBRARY_PATH=/usr/bin/arm-compute-library-52.7.0/tests/ /usr/bin/arm-compute-library-52.7.0/tests/arm_compute_validation + Version = c9a1fff898abd5109b759e8e16616519dc758fdd + CommandLine = /usr/bin/arm-compute-library-52.7.0/tests/arm_compute_validation + Seed = 165977448 cpu_has_sve = false cpu_has_sve2 = false cpu_has_svef32mm = false @@ -34,6 +34,7 @@ Arm Compute Libraries, tests, and sample executables are included in the SDK fil cpu_has_bf16 = false cpu_has_dotprod = false cpu_has_i8mm = false + cpu_has_fhm = false CPU0 = A53 CPU1 = A53 CPU2 = A53 @@ -41,15 +42,15 @@ Arm Compute Libraries, tests, and sample executables are included in the SDK fil Iterations = 1 Threads = 1 Dataset mode = PRECOMMIT - Running [0] 'UNIT/CPPScheduler/RethrowException' - Wall clock/Wall clock time: AVG=3466.0000 us + Running [0] 'UNIT/DataTypeUtils/CheckDataTypeIsPrinted@DataType=QSYMM8' + Wall clock/Wall clock time: AVG=3.0000 us .. code-block:: console - root@am62xx-evm:~# /usr/bin/arm-compute-library-24.12/examples/graph_alexnet + root@am62xx-evm:~# /usr/bin/arm-compute-library-52.7.0/examples/graph_alexnet - /usr/bin/arm-compute-library-24.12/examples/graph_alexnet + /usr/bin/arm-compute-library-52.7.0/examples/graph_alexnet Threads : 1 Target : Neon @@ -58,8 +59,8 @@ Arm Compute Libraries, tests, and sample executables are included in the SDK fil Tuner enabled? : false Cache enabled? : false Tuner mode : Normal - Tuner file : - MLGO file : + Tuner file : + MLGO file : Fast math enabled? : false Test passed @@ -69,16 +70,17 @@ Sample NN related executables (using Arm Compute Library only): .. code-block:: console - root@am62xx-evm:~# ls /usr/bin/arm-compute-library-24.12/examples/graph_* - graph_alexnet graph_inception_v4 graph_resnext50 graph_vgg19 - graph_deepspeech_v0_4_1 graph_lenet graph_shufflenet graph_vgg_vdsr - graph_edsr graph_mobilenet graph_squeezenet graph_yolov3 - graph_googlenet graph_mobilenet_v2 graph_squeezenet_v1_1 - graph_inception_resnet_v1 graph_resnet12 graph_srcnn955 - graph_inception_resnet_v2 graph_resnet50 graph_ssd_mobilenet - graph_inception_v3 graph_resnet_v2_50 graph_vgg16 + root@am62xx-evm:~# ls /usr/bin/arm-compute-library-52.7.0/examples/graph_* + graph_alexnet graph_lenet graph_squeezenet + graph_deepspeech_v0_4_1 graph_mobilenet graph_squeezenet_v1_1 + graph_edsr graph_mobilenet_v2 graph_srcnn955 + graph_googlenet graph_resnet12 graph_ssd_mobilenet + graph_inception_resnet_v1 graph_resnet50 graph_vgg16 + graph_inception_resnet_v2 graph_resnet_v2_50 graph_vgg19 + graph_inception_v3 graph_resnext50 graph_vgg_vdsr + graph_inception_v4 graph_shufflenet graph_yolov3 .. code-block:: console - root@am62xx-evm:~# ls /usr/bin/arm-compute-library-24.12/examples/neon_* + root@am62xx-evm:~# ls /usr/bin/arm-compute-library-52.7.0/examples/neon_* neon_cnn neon_copy_objects neon_gemm_qasymm8 neon_gemm_s8_f32 neon_permute neon_scale neon_sgemm diff --git a/source/linux/Foundational_Components/Machine_Learning/armnn.rst b/source/linux/Foundational_Components/Machine_Learning/armnn.rst index c22f1c2b7..574bf771e 100644 --- a/source/linux/Foundational_Components/Machine_Learning/armnn.rst +++ b/source/linux/Foundational_Components/Machine_Learning/armnn.rst @@ -23,4 +23,4 @@ in conjunction with the TIDL TensorFlow Lite Delegate. Supported versions ------------------ - - Arm NN 24.11 + - Arm NN 26.01 diff --git a/source/linux/Foundational_Components/Machine_Learning/nnstreamer.rst b/source/linux/Foundational_Components/Machine_Learning/nnstreamer.rst index 0f247a7d5..e7c977c88 100644 --- a/source/linux/Foundational_Components/Machine_Learning/nnstreamer.rst +++ b/source/linux/Foundational_Components/Machine_Learning/nnstreamer.rst @@ -12,7 +12,7 @@ https://nnstreamer.ai/ Supported versions ------------------ - - NNStreamer 2.4.2 + - NNStreamer 2.6.0 Testing NNStreamer ------------------ @@ -20,7 +20,7 @@ Testing NNStreamer .. code-block:: console root@am62xx-evm:~# nnstreamer-check - NNStreamer version: 2.4.2 + NNStreamer version: 2.6.0 loaded : TRUE path : /usr/lib/gstreamer-1.0/libnnstreamer.so ... diff --git a/source/linux/Foundational_Components/Machine_Learning/onnxrt.rst b/source/linux/Foundational_Components/Machine_Learning/onnxrt.rst index 5b9da7ab7..45d10440c 100644 --- a/source/linux/Foundational_Components/Machine_Learning/onnxrt.rst +++ b/source/linux/Foundational_Components/Machine_Learning/onnxrt.rst @@ -18,7 +18,7 @@ https://onnxruntime.ai/ Supported version ----------------- - - ONNX Runtime 1.20.1 + - ONNX Runtime 1.23.2 ONNX Runtime test applications ------------------------------ @@ -34,7 +34,7 @@ Running benchmark_model usage: perf_test [options...] model_path [result_file] Options: -m [test_mode]: Specifies the test mode. Value could be 'duration' or 'times'. - Provide 'duration' to run the test for a fix duration, and 'times' to repeated for a certain times. + Provide 'duration' to run the test for a fix duration, and 'times' to repeated for a certain times. -M: Disable memory pattern. -A: Disable memory arena -I: Generate tensor input binding (Free dimensions are treated as 1.) @@ -55,19 +55,19 @@ Running benchmark_model -o [optimization level]: Default is 99 (all). Valid values are 0 (disable), 1 (basic), 2 (extended), 99 (all). Please see onnxruntime_c_api.h (enum GraphOptimizationLevel) for the full list of all optimization levels. -u [optimized_model_path]: Specify the optimized model path for saving. - -d [CUDA only][cudnn_conv_algorithm]: Specify CUDNN convolution algorithms: 0(benchmark), 1(heuristic), 2(default). - -q [CUDA only] use separate stream for copy. + -d [CUDA only][cudnn_conv_algorithm]: Specify CUDNN convolution algorithms: 0(benchmark), 1(heuristic), 2(default). + -q [CUDA only] use separate stream for copy. -z: Set denormal as zero. When turning on this option reduces latency dramatically, a model may have denormals. - -C: Specify session configuration entries as key-value pairs: -C "| |" - Refer to onnxruntime_session_options_config_keys.h for valid keys and values. - [Example] -C "session.disable_cpu_ep_fallback|1 ep.context_enable|1" - -i: Specify EP specific runtime options as key value pairs. Different runtime options available are: + -C: Specify session configuration entries as key-value pairs: -C "| |" + Refer to onnxruntime_session_options_config_keys.h for valid keys and values. + [Example] -C "session.disable_cpu_ep_fallback|1 ep.context_enable|1" + -i: Specify EP specific runtime options as key value pairs. Different runtime options available are: [Usage]: -e -i '| |' - [ACL only] [enable_fast_math]: Options: 'true', 'false', default: 'false', + [ACL only] [enable_fast_math]: Options: 'true', 'false', default: 'false', -T [Set intra op thread affinities]: Specify intra op thread affinity string - [Example]: -T 1,2;3,4;5,6 or -T 1-2;3-4;5-6 + [Example]: -T 1,2;3,4;5,6 or -T 1-2;3-4;5-6 Use semicolon to separate configuration between threads. E.g. 1,2;3,4;5,6 specifies affinities for three threads, the first thread will be attached to the first and second logical processor. The number of affinities must be equal to intra_op_num_threads - 1 @@ -84,22 +84,22 @@ Example of running *onnxruntime_perf_test* on target using the pre-installed mob .. code-block:: console # /usr/bin/onnxruntime-tests/onnxruntime_perf_test -I -m times -r 8 -e acl -P /usr/bin/onnxruntime-tests/testdata/mobilenet_v3_small_excerpt.onnx - Session creation time cost: 0.0273071 s - First inference time cost: 20 ms - Total inference time cost: 0.14188 s + Session creation time cost: 0.139671 s + First inference time cost: 15 ms + Total inference time cost: 0.126396 s Total inference requests: 8 - Average inference time cost: 17.735 ms - Total inference run time: 0.141991 s - Number of inferences per second: 56.3415 - Avg CPU usage: 98 % - Peak working set size: 35299328 bytes - Avg CPU usage:98 - Peak working set size:35299328 + Average inference time cost: 15.7995 ms + Total inference run time: 0.126518 s + Number of inferences per second: 63.232 + Avg CPU usage: 100 % + Peak working set size: 37994496 bytes + Avg CPU usage:100 + Peak working set size:37994496 Runs:8 - Min Latency: 0.0159831 s - Max Latency: 0.0232702 s - P50 Latency: 0.0167086 s - P90 Latency: 0.0232702 s - P95 Latency: 0.0232702 s - P99 Latency: 0.0232702 s - P999 Latency: 0.0232702 s + Min Latency: 0.00955697 s + Max Latency: 0.0239688 s + P50 Latency: 0.0156388 s + P90 Latency: 0.0239688 s + P95 Latency: 0.0239688 s + P99 Latency: 0.0239688 s + P999 Latency: 0.0239688 s diff --git a/source/linux/Foundational_Components/Machine_Learning/tflite.rst b/source/linux/Foundational_Components/Machine_Learning/tflite.rst index 3a4de7735..9c90bcea1 100644 --- a/source/linux/Foundational_Components/Machine_Learning/tflite.rst +++ b/source/linux/Foundational_Components/Machine_Learning/tflite.rst @@ -18,7 +18,7 @@ It supports on-device inference with low latency and a compact binary size. You Features ******** - - TensorFlow Lite v2.18.0 via Yocto - `meta-arago-extras/recipes-framework/tensorflow-lite/tensorflow-lite_2.18.0.bb `__ + - TensorFlow Lite v2.20.0 via Yocto - `meta-arago-extras/recipes-framework/tensorflow-lite/tensorflow-lite_2.20.0.bb `__ - Multithreaded computation with acceleration using Arm Neon SIMD instructions on Cortex-A cores - C++ Library and Python interpreter (supported Python version 3) - TensorFlow Lite Model benchmark Tool (i.e. :command:`benchmark_model`) @@ -89,23 +89,21 @@ The output of the benchmarking application should be similar to: root@am62xx-evm:~# /opt/tensorflow-lite/tools/benchmark_model --graph=/usr/share/oob-demo-assets/models/ssd_mobilenet_v2_coco.tflite --num_threads=4 --use_xnnpack=false INFO: STARTING! INFO: Log parameter values verbosely: [0] - INFO: Num threads: [4] INFO: Graph: [/usr/share/oob-demo-assets/models/ssd_mobilenet_v2_coco.tflite] INFO: Signature to run: [] - INFO: #threads used for CPU inference: [4] INFO: Use xnnpack: [0] INFO: Loaded model /usr/share/oob-demo-assets/models/ssd_mobilenet_v2_coco.tflite INFO: The input model file size (MB): 67.3128 - INFO: Initialized session in 6.418ms. + INFO: Initialized session in 5.579ms. INFO: Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds. - INFO: count=1 curr=1041765 + INFO: count=1 curr=1357602 p5=1357602 median=1357602 p95=1357602 INFO: Running benchmark for at least 50 iterations and at least 1 seconds but terminate if exceeding 150 seconds. - INFO: count=50 first=977738 curr=964908 min=911877 max=1112273 avg=971535 std=39112 + INFO: count=50 first=1249964 curr=1240143 min=1238588 max=1252566 avg=1.24027e+06 std=2565 p5=1238753 median=1239807 p95=1247415 - INFO: Inference timings in us: Init: 6418, First inference: 1041765, Warmup (avg): 1.04176e+06, Inference (avg): 971535 + INFO: Inference timings in us: Init: 5579, First inference: 1357602, Warmup (avg): 1.3576e+06, Inference (avg): 1.24027e+06 INFO: Note: as the benchmark tool itself affects memory footprint, the following is only APPROXIMATE to the actual memory footprint of the model at runtime. Take the information at your discretion. - INFO: Memory footprint delta from the start of the tool (MB): init=6.14844 overall=109.848 + INFO: Memory footprint delta from the start of the tool (MB): init=6.36328 overall=109.832 Where, @@ -130,26 +128,23 @@ The output of the benchmarking application should be similar to, root@am62xx-evm:~# /opt/tensorflow-lite/tools/benchmark_model --graph=/usr/share/oob-demo-assets/models/ssd_mobilenet_v2_coco.tflite --num_threads=4 --use_xnnpack=true INFO: STARTING! INFO: Log parameter values verbosely: [0] - INFO: Num threads: [4] INFO: Graph: [/usr/share/oob-demo-assets/models/ssd_mobilenet_v2_coco.tflite] INFO: Signature to run: [] - INFO: #threads used for CPU inference: [4] INFO: Use xnnpack: [1] INFO: Loaded model /usr/share/oob-demo-assets/models/ssd_mobilenet_v2_coco.tflite INFO: Created TensorFlow Lite XNNPACK delegate for CPU. INFO: XNNPACK delegate created. INFO: Explicitly applied XNNPACK delegate, and the model graph will be partially executed by the delegate w/ 1 delegate kernels. INFO: The input model file size (MB): 67.3128 - INFO: Initialized session in 592.232ms. + INFO: Initialized session in 614.333ms. INFO: Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds. - INFO: count=1 curr=633430 - + INFO: count=1 curr=905463 p5=905463 median=905463 p95=905463 INFO: Running benchmark for at least 50 iterations and at least 1 seconds but terminate if exceeding 150 seconds. - INFO: count=50 first=605745 curr=618849 min=568228 max=722188 avg=602943 std=27690 - - INFO: Inference timings in us: Init: 592232, First inference: 633430, Warmup (avg): 633430, Inference (avg): 602943 + INFO: count=50 first=900416 curr=898333 min=898007 max=906121 avg=899641 std=1549 p5=898333 median=899281 p95=904305 + INFO: Inference timings in us: Init: 614333, First inference: 905463, Warmup (avg): 905463, Inference (avg): 899641 INFO: Note: as the benchmark tool itself affects memory footprint, the following is only APPROXIMATE to the actual memory footprint of the model at runtime. Take the information at your discretion. - INFO: Memory footprint delta from the start of the tool (MB): init=133.086 overall=149.531 + INFO: Memory footprint delta from the start of the tool (MB): init=146.363 overall=150.141 + Where, @@ -166,14 +161,14 @@ The following performance numbers are captured with :command:`benchmark_model` o :header: "SOC", "Delegates", "Inference Time (sec)", "Initialization Time (ms)", "Overall Memory Footprint (MB)" :widths: 10, 10, 20, 20, 20 - "AM62X", "CPU only", "0.977168", "6.129", "110.07" - "", "XNNPACK", "0.613474", "593.558", "149.699" - "AM62PX", "CPU only", "0.419261", "4.79", "108.707" - "", "XNNPACK", "0.274756", "1208.04", "149.395" - "AM64X", "CPU only", "1.10675", "144.535", "109.562" - "", "XNNPACK", "0.702809", "601.33", "149.602" - "AM62L", "CPU only", "1.04867", "6.088", "110.129" - "", "XNNPACK", "0.661133", "466.216", "149.703" + "AM62X", "CPU only", "1.24027", "5.579", "109.832" + "", "XNNPACK", "0.899641", "614.333", "150.141" + "AM62PX", "CPU only", "1.23341", "252.390", "111.121" + "", "XNNPACK", "0.875280", "597.639", "150.52" + "AM64X", "CPU only", "1.26429", "135.579", "110.188" + "", "XNNPACK", "0.740743", "885.636", "150.484" + "AM62L", "CPU only", "1.3708", "807.076", "111.152" + "", "XNNPACK", "0.930577", "769.145", "150.496" Based on the above data, using the XNNPACK delegate significantly improves inference times across all SoCs, though it generally increases initialization time and overall memory footprint. @@ -185,10 +180,12 @@ Based on the above data, using the XNNPACK delegate significantly improves infer Example Applications ******************** -|__SDK_FULL_NAME__| has integrated opensource components like NNStreamer which can be used for neural network inferencing using the sample tflite models under :file:`/usr/share/oob-demo-assets/models/` -Checkout the Object Detection usecase under :ref:`TI Apps Launcher - User Guide ` +.. ifconfig:: CONFIG_part_variant in ('AM62X', 'AM62LX', 'AM62PX') -Alternatively, if a display is connected, you can run the Object Detection pipeline using this command, + |__SDK_FULL_NAME__| has integrated opensource components like NNStreamer which can be used for neural network inferencing using the sample tflite models under :file:`/usr/share/oob-demo-assets/models/` + Checkout the Object Detection usecase under :ref:`TI Apps Launcher - User Guide ` + + Alternatively, if a display is connected, you can run the Object Detection pipeline using this command, .. ifconfig:: CONFIG_part_variant in ('AM62X', 'AM62LX') @@ -248,6 +245,47 @@ Alternatively, if a display is connected, you can run the Object Detection pipel The above GStreamer pipeline reads an H.264 video file, decodes it, and processes it for object detection using a TensorFlow Lite model, displaying bounding boxes around detected objects. The processed video is then composited and rendered on the screen using the ``kmssink`` element. +.. ifconfig:: CONFIG_part_variant in ('AM62DX') + + |__SDK_FULL_NAME__| has integrated opensource components like NNStreamer which can be used for neural network inferencing using the sample TensorFlow Lite models under :file:`/usr/share/oob-demo-assets/models/` + + If an audio input device is connected, you can run the Audio Classification pipeline using this command: + + .. code-block:: console + + gst-launch-1.0 \ + alsasrc ! \ + audioconvert ! \ + audioresample ! \ + audio/x-raw,format=S16LE,channels=1,rate=16000,layout=interleaved ! \ + tensor_converter frames-per-tensor=3900 ! \ + tensor_aggregator \ + frames-in=3900 \ + frames-out=15600 \ + frames-flush=3900 \ + frames-dim=1 ! \ + tensor_transform \ + mode=arithmetic \ + option=typecast:float32,add:0.5,div:32767.5 ! \ + tensor_transform \ + mode=transpose \ + option=1:0:2:3 ! \ + queue \ + leaky=2 \ + max-size-buffers=10 ! \ + tensor_filter \ + framework=tensorflow2-lite \ + model=/usr/share/oob-demo-assets/models/yamnet_audio_classification.tflite \ + custom=Delegate:XNNPACK,NumThreads:2 ! \ + tensor_decoder \ + mode=image_labeling \ + option1=/usr/share/oob-demo-assets/labels/yamnet_label_list.txt ! \ + filesink \ + buffer-mode=2 \ + location=/dev/stdout + + The above GStreamer pipeline captures real-time audio from an ALSA source, converts it to the required format, and processes it for audio event classification using the YAMNet TensorFlow Lite model. The audio data is aggregated into tensors, normalized for machine learning input, and classified to identify various audio events and sounds. The classification results are decoded to human-readable labels and output to stdout. + .. attention:: The Example Applications section is not applicable for AM64x diff --git a/source/linux/Foundational_Components_Machine_Learning.rst b/source/linux/Foundational_Components_Machine_Learning.rst index 5028f0fab..3d6ed54ae 100644 --- a/source/linux/Foundational_Components_Machine_Learning.rst +++ b/source/linux/Foundational_Components_Machine_Learning.rst @@ -19,7 +19,7 @@ Machine Learning .. rubric:: `TensorFlow Lite `__ * Open source deep learning runtime for on-device inference. - * Runs on all Cortex-A ARM cores (AM3x, AM4x, AM5x, AM6x Sitara devices). + * Runs on all Cortex-A ARM cores (AM3x, AM4x, AM6x Sitara devices). * Imports Tensorflow Lite models. * Uses TIDL import tool to create TIDL offloadable Tensorflow Lite models, which can be executed via Tensorflow Lite runtime with TIDL acceleration. @@ -27,26 +27,28 @@ Machine Learning .. rubric:: `ONNX Runtime `__ * Open source inference engine available from Arm. - * Runs on all Cortex-A ARM cores (AM3x, AM4x, AM5x, AM6x Sitara devices). + * Runs on all Cortex-A ARM cores (AM3x, AM4x, AM6x Sitara devices). .. rubric:: `Arm NN `__ * Open source inference engine available from Arm. - * Runs on all Cortex-A ARM cores (AM3x, AM4x, AM5x, AM6x Sitara devices). + * Runs on all Cortex-A ARM cores (AM3x, AM4x, AM6x Sitara devices). * Imports ONNX and TensorFlow Lite models. * Provides TensorFlow Lite delegate. - .. rubric:: `RNN Library `__ + .. rubric:: `Arm Compute Library `__ - * Provides Long Short-Term Memory (LSTM) and fully connected layers in a standalone library to allow for rapid prototyping of inference applications that require Recurrent Neural Networks. - * Runs on all Cortex-A ARM cores (AM3x, AM4x, AM5x, AM6x Sitara devices). - * Integrated into TI's Processor SDK Linux in an OOB demo for `Predictive Maintenance `__. + * Open source inference engine available from Arm. + * Runs on all Cortex-A ARM cores (AM3x, AM4x, AM6x Sitara devices). + * Provides highly optimized kernels for NEON (Advanced SIMD) and CPU acceleration. + * Used as a backend to accelerate ML frameworks like Arm NN. - .. rubric:: `TI Deep Learning (TIDL) `__ + .. rubric:: `NNStreamer `__ - * Accelerates deep learning inference on C66x DSP cores and/or on Embedded Vision Engine (EVE) subsystems. - * Available on AM57x device only. - * Supports CNN at the moment, and imports Caffe, ONNX, and Tensorflow models. + * Open source framework based on GStreamer for neural network pipelines. + * Runs on all Cortex-A ARM cores (AM3x, AM4x, AM6x Sitara devices). + * Supports many backends such as TensorFlow Lite and Arm NN. + * Enables easy integration of ML inference into streaming pipelines. .. ifconfig:: CONFIG_part_family in ('J7_family') @@ -55,17 +57,20 @@ Machine Learning TI's Processor SDK Linux, free to download and use. Jacinto machine learning today consists of Neo-AI-DLR library. -+--------------------------+-----------+-----------------------+--------------------+--------------------+ -| ML inference Library | Version | Delegate / | Python API | C/C++ API | -| | | Execution provider | | | -+==========================+===========+=======================+====================+====================+ -| TensorFlow Lite | 2.18.0 | CPU, XNNPACK, ARMNN | YES | YES | -+--------------------------+-----------+-----------------------+--------------------+--------------------+ -| ONNX Runtime | 1.20.1 | CPU, ACL | YES | YES | -+--------------------------+-----------+-----------------------+--------------------+--------------------+ -| Arm NN | 1.20.1 | ACL | YES | YES | -+--------------------------+-----------+-----------------------+--------------------+--------------------+ - ++--------------------------+-----------+-------------------------+--------------------+--------------------+ +| ML inference Library | Version | Delegate / | Python API | C/C++ API | +| | | Execution provider | | | ++==========================+===========+=========================+====================+====================+ +| TensorFlow Lite | 2.20.0 | CPU, XNNPACK, ARMNN | Yes | Yes | ++--------------------------+-----------+-------------------------+--------------------+--------------------+ +| ONNX Runtime | 1.23.2 | CPU, ACL | Yes | Yes | ++--------------------------+-----------+-------------------------+--------------------+--------------------+ +| Arm NN | 26.01 | ACL | Yes | Yes | ++--------------------------+-----------+-------------------------+--------------------+--------------------+ +| Arm Compute Library | 52.7.0 | NA (Backend Library) | Yes | Yes | ++--------------------------+-----------+-------------------------+--------------------+--------------------+ +| NNStreamer | 2.6.0 | NA (Pipeline Framework) | Yes | Yes | ++--------------------------+-----------+-------------------------+--------------------+--------------------+ .. toctree::