diff --git a/.github/workflows/build-cs-steps.yml b/.github/workflows/build-cs-steps.yml index 9b089bc6..cf680d49 100644 --- a/.github/workflows/build-cs-steps.yml +++ b/.github/workflows/build-cs-steps.yml @@ -41,16 +41,41 @@ jobs: env: NUGET_AUTH_TOKEN: ${{ secrets.AZURE_DEVOPS_PAT }} + - name: Generate temporary NuGet.config + run: | + # The repo-level NuGet.config cleared all sources and only included ORT-Nightly. + # We generate a temporary one with both nuget.org and ORT-Nightly. + # We provide credentials to allow the ORT-Nightly feed to pull from its upstreams. + $xml = @" + + + + + + + + + + + + + + + "@ + Set-Content -Path sdk/cs/NuGet.temp.config -Value $xml + shell: pwsh + # TODO: once the nightly packaging is fixed, add back the commented out lines with /p:FoundryLocalCoreVersion="*-*" # /p:FoundryLocalCoreVersion="*-*" to always use nightly version of Foundry Local Core - name: Restore dependencies run: | - # dotnet restore sdk/cs/src/Microsoft.AI.Foundry.Local.csproj /p:UseWinML=${{ inputs.useWinML }} /p:FoundryLocalCoreVersion="*-*" --configfile sdk/cs/NuGet.config - dotnet restore sdk/cs/src/Microsoft.AI.Foundry.Local.csproj /p:UseWinML=${{ inputs.useWinML }} --configfile sdk/cs/NuGet.config + # Clear the local NuGet cache to avoid bad metadata or corrupted package states. + dotnet nuget locals all --clear + # Restore using the temporary config file with credentials. + dotnet restore sdk/cs/src/Microsoft.AI.Foundry.Local.csproj /p:UseWinML=${{ inputs.useWinML }} --configfile sdk/cs/NuGet.temp.config - name: Build solution run: | - # dotnet build sdk/cs/src/Microsoft.AI.Foundry.Local.csproj --no-restore --configuration ${{ inputs.buildConfiguration }} /p:UseWinML=${{ inputs.useWinML }} /p:FoundryLocalCoreVersion="*-*" dotnet build sdk/cs/src/Microsoft.AI.Foundry.Local.csproj --no-restore --configuration ${{ inputs.buildConfiguration }} /p:UseWinML=${{ inputs.useWinML }} # need to use direct git commands to clone from Azure DevOps instead of actions/checkout @@ -86,6 +111,7 @@ jobs: - name: Run Foundry Local Core tests run: | # dotnet test sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj --verbosity normal /p:UseWinML=${{ inputs.useWinML }} /p:FoundryLocalCoreVersion="*-*" + # Use the temporary config file for test restore as well. dotnet test sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj --verbosity normal /p:UseWinML=${{ inputs.useWinML }} - name: Pack NuGet package diff --git a/.github/workflows/build-js-steps.yml b/.github/workflows/build-js-steps.yml index a806933c..cbfe356e 100644 --- a/.github/workflows/build-js-steps.yml +++ b/.github/workflows/build-js-steps.yml @@ -45,7 +45,7 @@ jobs: - name: Format version for JS shell: pwsh run: | - # Release: 0.9.0.41 -> 0.9.0-41 + # Release: 1.0.0.41 -> 1.0.0-41 $version = "${{ inputs.version }}" $versionParts = $version -split '\.' $baseVersion = ($versionParts[0..2]) -join '.' @@ -84,22 +84,18 @@ jobs: Write-Host "`nDirectory contents:" Get-ChildItem -Recurse -Depth 2 | ForEach-Object { Write-Host " $($_.FullName)" } - - - name: npm install (WinML) - if: ${{ inputs.useWinML == true }} + # The .npmrc points to an Azure Artifacts feed for CFS compliance. + # Remove it in CI so npm uses the public registry directly. + - name: Remove .npmrc (use public registry) + shell: pwsh working-directory: sdk/js - run: npm install --winml + run: | + if (Test-Path .npmrc) { Remove-Item .npmrc -Force; Write-Host "Removed .npmrc" } - - name: npm install (Standard) - if: ${{ inputs.useWinML == false }} + - name: npm install working-directory: sdk/js run: npm install - # Verify that installing new packages doesn't strip custom native binary folders - - name: npm install openai (verify persistence) - working-directory: sdk/js - run: npm install openai - - name: Set package version working-directory: sdk/js run: npm version ${{ env.ProjectVersion }} --no-git-tag-version --allow-same-version @@ -112,21 +108,15 @@ jobs: working-directory: sdk/js run: npm run build - - name: Pack npm package + - name: Pack npm package (WinML) + if: ${{ inputs.useWinML == true }} working-directory: sdk/js - run: npm pack + run: npm run pack:winml - - name: Rename WinML artifact - if: ${{ inputs.useWinML == true }} - shell: pwsh + - name: Pack npm package (Standard) + if: ${{ inputs.useWinML == false }} working-directory: sdk/js - run: | - $tgz = Get-ChildItem *.tgz | Select-Object -First 1 - if ($tgz) { - $newName = $tgz.Name -replace '^foundry-local-sdk-', 'foundry-local-sdk-winml-' - Rename-Item -Path $tgz.FullName -NewName $newName - Write-Host "Renamed $($tgz.Name) to $newName" - } + run: npm run pack - name: Upload npm packages uses: actions/upload-artifact@v4 diff --git a/.github/workflows/build-python-steps.yml b/.github/workflows/build-python-steps.yml new file mode 100644 index 00000000..dc180bb4 --- /dev/null +++ b/.github/workflows/build-python-steps.yml @@ -0,0 +1,110 @@ +name: Build Python SDK + +on: + workflow_call: + inputs: + version: + required: true + type: string + useWinML: + required: false + type: boolean + default: false + platform: + required: false + type: string + default: 'windows' + +permissions: + contents: read + +jobs: + build: + runs-on: ${{ inputs.platform }}-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + clean: true + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + # Clone test-data-shared from Azure DevOps (models for integration tests) + - name: Checkout test-data-shared from Azure DevOps + shell: pwsh + working-directory: ${{ github.workspace }}/.. + run: | + $pat = "${{ secrets.AZURE_DEVOPS_PAT }}" + $encodedPat = [Convert]::ToBase64String([Text.Encoding]::ASCII.GetBytes(":$pat")) + + git config --global http.https://dev.azure.com.extraheader "AUTHORIZATION: Basic $encodedPat" + + git lfs install + git clone --depth 1 https://dev.azure.com/microsoft/windows.ai.toolkit/_git/test-data-shared test-data-shared + + Write-Host "Clone completed successfully to ${{ github.workspace }}/../test-data-shared" + + - name: Checkout specific commit in test-data-shared + shell: pwsh + working-directory: ${{ github.workspace }}/../test-data-shared + run: | + git checkout 231f820fe285145b7ea4a449b112c1228ce66a41 + if ($LASTEXITCODE -ne 0) { + Write-Error "Git checkout failed." + exit 1 + } + + - name: Install build tool + run: | + python -m pip install build + + - name: Configure pip for Azure Artifacts + run: | + pip config set global.index-url https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ + pip config set global.extra-index-url https://pypi.org/simple/ + pip config set global.pre true + + - name: Set package version + working-directory: sdk/python + run: echo '__version__ = "${{ inputs.version }}"' > src/version.py + + - name: Build wheel (Cross-Platform) + if: ${{ inputs.useWinML == false }} + working-directory: sdk/python + run: python -m build --wheel --outdir dist/ + + - name: Build wheel (WinML) + if: ${{ inputs.useWinML == true }} + working-directory: sdk/python + run: python -m build --wheel -C winml=true --outdir dist/ + + - name: Install built wheel + working-directory: sdk/python + shell: pwsh + run: | + $wheel = (Get-ChildItem dist/*.whl | Select-Object -First 1).FullName + pip install $wheel + + - name: Install test dependencies + run: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0 + + - name: Run tests + working-directory: sdk/python + run: python -m pytest test/ -v + + - name: Upload Python packages + uses: actions/upload-artifact@v4 + with: + name: python-sdk-${{ inputs.platform }}${{ inputs.useWinML == true && '-winml' || '' }} + path: sdk/python/dist/* + + - name: Upload flcore logs + uses: actions/upload-artifact@v4 + if: always() + with: + name: python-sdk-${{ inputs.platform }}${{ inputs.useWinML == true && '-winml' || '' }}-logs + path: sdk/python/logs/** diff --git a/.github/workflows/build-rust-steps.yml b/.github/workflows/build-rust-steps.yml index 7649acaa..f007b7ee 100644 --- a/.github/workflows/build-rust-steps.yml +++ b/.github/workflows/build-rust-steps.yml @@ -28,7 +28,7 @@ jobs: working-directory: sdk/rust env: - CARGO_FEATURES: ${{ inputs.useWinML && '--features winml' || '' }} + CARGO_FEATURES: ${{ inputs.useWinML && '--features winml,nightly' || '--features nightly' }} steps: - name: Checkout repository @@ -46,6 +46,18 @@ jobs: with: workspaces: sdk/rust -> target + # The .cargo/config.toml redirects crates-io to an Azure Artifacts feed + # for CFS compliance. Remove the redirect in CI so cargo can fetch from + # crates.io directly without Azure DevOps auth. + - name: Use crates.io directly + shell: pwsh + working-directory: sdk/rust + run: | + if (Test-Path .cargo/config.toml) { + Remove-Item .cargo/config.toml + Write-Host "Removed .cargo/config.toml crates-io redirect" + } + - name: Checkout test-data-shared from Azure DevOps if: ${{ inputs.run-integration-tests }} shell: pwsh diff --git a/.github/workflows/foundry-local-sdk-build.yml b/.github/workflows/foundry-local-sdk-build.yml index 9ac5fe04..048a5a59 100644 --- a/.github/workflows/foundry-local-sdk-build.yml +++ b/.github/workflows/foundry-local-sdk-build.yml @@ -17,62 +17,30 @@ permissions: contents: read jobs: - build-cs-windows: - uses: ./.github/workflows/build-cs-steps.yml - with: - version: '0.9.0.${{ github.run_number }}' - platform: 'windows' - secrets: inherit - build-js-windows: - uses: ./.github/workflows/build-js-steps.yml - with: - version: '0.9.0.${{ github.run_number }}' - platform: 'windows' - secrets: inherit - build-rust-windows: - uses: ./.github/workflows/build-rust-steps.yml - with: - platform: 'windows' - run-integration-tests: true - secrets: inherit - - build-cs-windows-WinML: - uses: ./.github/workflows/build-cs-steps.yml - with: - version: '0.9.0.${{ github.run_number }}' - platform: 'windows' - useWinML: true - secrets: inherit - build-js-windows-WinML: - uses: ./.github/workflows/build-js-steps.yml - with: - version: '0.9.0.${{ github.run_number }}' - platform: 'windows' - useWinML: true - secrets: inherit - build-rust-windows-WinML: - uses: ./.github/workflows/build-rust-steps.yml - with: - platform: 'windows' - useWinML: true - run-integration-tests: true - secrets: inherit - + # Windows build/test moved to .pipelines/foundry-local-packaging.yml and runs in ADO + # MacOS ARM64 not supported in ADO, need to use GitHub Actions build-cs-macos: uses: ./.github/workflows/build-cs-steps.yml with: - version: '0.9.0.${{ github.run_number }}' + version: '1.0.0.${{ github.run_number }}' platform: 'macos' secrets: inherit build-js-macos: uses: ./.github/workflows/build-js-steps.yml with: - version: '0.9.0.${{ github.run_number }}' + version: '1.0.0.${{ github.run_number }}' + platform: 'macos' + secrets: inherit + build-python-macos: + uses: ./.github/workflows/build-python-steps.yml + with: + version: '1.0.0.${{ github.run_number }}' platform: 'macos' secrets: inherit build-rust-macos: uses: ./.github/workflows/build-rust-steps.yml with: + version: '1.0.0.${{ github.run_number }}' platform: 'macos' run-integration-tests: true secrets: inherit \ No newline at end of file diff --git a/.github/workflows/samples-integration-test.yml b/.github/workflows/samples-integration-test.yml new file mode 100644 index 00000000..a61fb5d4 --- /dev/null +++ b/.github/workflows/samples-integration-test.yml @@ -0,0 +1,260 @@ +name: Samples Build Check + +on: + pull_request: + paths: + - 'samples/**' + - '.github/workflows/samples-integration-test.yml' + push: + paths: + - 'samples/**' + - '.github/workflows/samples-integration-test.yml' + branches: + - main + workflow_dispatch: + +permissions: + contents: read + +jobs: + # ── Python Samples ────────────────────────────────────────────────── + python-samples: + runs-on: ${{ matrix.platform }}-latest + strategy: + fail-fast: false + matrix: + platform: [windows, macos] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + clean: true + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Configure pip for Azure Artifacts + run: | + pip config set global.index-url https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ + pip config set global.extra-index-url https://pypi.org/simple/ + pip config set global.pre true + + - name: Build and install SDK from source + working-directory: sdk/python + shell: pwsh + run: | + python -m pip install build + echo '__version__ = "0.0.0-dev"' > src/version.py + python -m build --wheel --outdir dist/ + $wheel = (Get-ChildItem dist/*.whl | Select-Object -First 1).FullName + pip install $wheel + + - name: Install sample dependencies + shell: pwsh + run: | + Get-ChildItem samples/python/*/requirements.txt -ErrorAction SilentlyContinue | ForEach-Object { + Write-Host "Installing dependencies for $($_.Directory.Name)..." + pip install -r $_.FullName + } + + - name: Syntax check Python samples + shell: pwsh + run: | + $failed = @() + $samples = Get-ChildItem samples/python/*/src/app.py -ErrorAction SilentlyContinue + foreach ($sample in $samples) { + $name = $sample.Directory.Parent.Name + Write-Host "=== Checking: $name ===" + python -m py_compile $sample.FullName + if ($LASTEXITCODE -ne 0) { + Write-Host "FAILED: $name" + $failed += $name + } else { + Write-Host "OK: $name" + } + } + if ($failed.Count -gt 0) { + Write-Error "Failed syntax checks: $($failed -join ', ')" + exit 1 + } + + # ── JavaScript Samples ────────────────────────────────────────────── + js-samples: + runs-on: ${{ matrix.platform }}-latest + strategy: + fail-fast: false + matrix: + platform: [windows, macos] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + clean: true + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '20.x' + + - name: Setup .NET SDK for NuGet authentication + uses: actions/setup-dotnet@v5 + with: + dotnet-version: '10.0.x' + + - name: Build SDK from source + working-directory: sdk/js + run: | + npm install + npm run build + npm link + + - name: Syntax check JS samples + shell: pwsh + run: | + $failed = @() + # Find all sample app.js files (either in root or src/) + $samples = @() + $samples += Get-ChildItem samples/js/*/app.js -ErrorAction SilentlyContinue + $samples += Get-ChildItem samples/js/*/src/app.js -ErrorAction SilentlyContinue + foreach ($sample in $samples) { + $dir = if ($sample.Directory.Name -eq 'src') { $sample.Directory.Parent } else { $sample.Directory } + $name = $dir.Name + Write-Host "=== Checking: $name ===" + # Link SDK and install dependencies + Push-Location $dir.FullName + npm link foundry-local-sdk 2>$null + if (Test-Path "package.json") { npm install 2>$null } + Pop-Location + # Syntax check + node --check $sample.FullName 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-Host "FAILED: $name" + $failed += $name + } else { + Write-Host "OK: $name" + } + } + if ($failed.Count -gt 0) { + Write-Error "Failed syntax checks: $($failed -join ', ')" + exit 1 + } + + # ── C# Samples ───────────────────────────────────────────────────── + cs-samples: + runs-on: ${{ matrix.platform }}-latest + strategy: + fail-fast: false + matrix: + platform: [windows, macos] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + clean: true + + - name: Setup .NET SDK + uses: actions/setup-dotnet@v5 + with: + dotnet-version: | + 8.0.x + 10.0.x + + - name: Build SDK from source + shell: pwsh + run: | + # Build cross-platform SDK package + # Note: /p:TreatWarningsAsErrors=false avoids failing on SDK doc warnings + dotnet pack sdk/cs/src/Microsoft.AI.Foundry.Local.csproj ` + -o local-packages ` + /p:Version=1.0.0-rc1 ` + /p:IsPacking=true ` + /p:TreatWarningsAsErrors=false ` + --configuration Release + + # Build WinML SDK package (Windows only) + if ($IsWindows) { + dotnet pack sdk/cs/src/Microsoft.AI.Foundry.Local.csproj ` + -o local-packages ` + /p:Version=1.0.0-rc1 ` + /p:UseWinML=true ` + /p:IsPacking=true ` + /p:TreatWarningsAsErrors=false ` + --configuration Release + } + + Write-Host "Local packages:" + Get-ChildItem local-packages/*.nupkg | ForEach-Object { Write-Host " $($_.Name)" } + + - name: Build C# samples + shell: pwsh + run: | + $failed = @() + $projects = Get-ChildItem samples/cs -Recurse -Filter "*.csproj" + foreach ($proj in $projects) { + $name = $proj.BaseName + Write-Host "`n=== Building: $name ===" + dotnet build $proj.FullName --configuration Debug 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-Host "BUILD FAILED: $name" + $failed += $name + } else { + Write-Host "BUILD PASSED: $name" + } + } + if ($failed.Count -gt 0) { + Write-Error "Failed builds: $($failed -join ', ')" + exit 1 + } + + # ── Rust Samples ──────────────────────────────────────────────────── + rust-samples: + runs-on: ${{ matrix.platform }}-latest + strategy: + fail-fast: false + matrix: + platform: [windows, macos] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + clean: true + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + components: clippy + + - name: Cache cargo dependencies + uses: Swatinem/rust-cache@v2 + with: + workspaces: samples/rust -> target + + - name: Use crates.io directly + shell: pwsh + run: | + # Remove crates-io redirect in SDK (points to Azure Artifacts) + $configPath = "sdk/rust/.cargo/config.toml" + if (Test-Path $configPath) { + Remove-Item $configPath + Write-Host "Removed sdk/rust/.cargo/config.toml" + } + # Remove crates-io redirect in samples + $configPath = "samples/rust/.cargo/config.toml" + if (Test-Path $configPath) { + Remove-Item $configPath + Write-Host "Removed samples/rust/.cargo/config.toml" + } + + - name: Build Rust samples workspace + working-directory: samples/rust + run: cargo build --workspace + + - name: Clippy check + working-directory: samples/rust + run: cargo clippy --workspace -- -D warnings diff --git a/.gitignore b/.gitignore index 8d088525..552012ec 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,6 @@ bin/ obj/ /src/cs/samples/ConsoleClient/test.http logs/ + +# Local NuGet packages built from source +local-packages/ diff --git a/.pipelines/foundry-local-packaging.yml b/.pipelines/foundry-local-packaging.yml new file mode 100644 index 00000000..c871cdf1 --- /dev/null +++ b/.pipelines/foundry-local-packaging.yml @@ -0,0 +1,807 @@ +# Foundry Local Packaging Pipeline +# +# Builds Foundry Local Core from neutron-server (windows.ai.toolkit project), +# then packages the C# and JS SDKs from this repo using the built Core. +# +# Produces artifacts: flc-nuget, flc-nuget-winml, flc-wheels, flc-wheels-winml, +# cs-sdk, cs-sdk-winml, js-sdk, js-sdk-winml, python-sdk, python-sdk-winml, +# rust-sdk, rust-sdk-winml + +pr: +- main +- releases/* + +name: $(Date:yyyyMMdd).$(Rev:r) + +parameters: +- name: version + displayName: 'Package version' + type: string + default: '1.0.0' +- name: prereleaseId + displayName: 'Pre-release identifier (e.g. rc1, beta).' + type: string + default: 'none' +- name: isRelease + displayName: 'Release build' + type: boolean + default: false +- name: neutronServerBranch + displayName: 'Foundry Local Core branch (windows.ai.toolkit/neutron-server)' + type: string + default: 'dev/FoundryLocalCore/main' + +variables: +- group: FoundryLocal-ESRP-Signing + +resources: + repositories: + - repository: neutron-server + type: git + name: windows.ai.toolkit/neutron-server + endpoint: AIFoundryLocal-WindowsAIToolkit-SC + ref: refs/heads/${{ parameters.neutronServerBranch }} + - repository: test-data-shared + type: git + name: windows.ai.toolkit/test-data-shared + endpoint: AIFoundryLocal-WindowsAIToolkit-SC + lfs: true + ref: refs/heads/main + - repository: 1ESPipelineTemplates + type: git + name: 1ESPipelineTemplates/1ESPipelineTemplates + ref: refs/tags/release + +extends: + template: v1/1ES.Official.PipelineTemplate.yml@1ESPipelineTemplates + parameters: + settings: + networkIsolationPolicy: Permissive + pool: + # default all windows jobs, individual jobs override + name: onnxruntime-Win-CPU-2022 + os: windows + sdl: + binskim: + break: false + scanOutputDirectoryOnly: true + sourceRepositoriesToScan: + include: + - repository: neutron-server + - repository: test-data-shared + stages: + # ── Build & Test FLC ── + - stage: build_core + displayName: 'Build & Test FLC' + jobs: + - job: flc_win_x64 + displayName: 'FLC win-x64' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + outputs: + - output: pipelineArtifact + artifactName: 'flc-win-x64' + targetPath: '$(Build.ArtifactStagingDirectory)/native' + steps: + - checkout: neutron-server + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-core-steps.yml@self + parameters: + flavor: win-x64 + platform: x64 + + - job: flc_win_arm64 + displayName: 'FLC win-arm64' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + outputs: + - output: pipelineArtifact + artifactName: 'flc-win-arm64' + targetPath: '$(Build.ArtifactStagingDirectory)/native' + steps: + - checkout: neutron-server + clean: true + - template: .pipelines/templates/build-core-steps.yml@self + parameters: + flavor: win-arm64 + platform: arm64 + + - job: flc_linux_x64 + displayName: 'FLC linux-x64' + pool: + name: onnxruntime-Ubuntu2404-AMD-CPU + os: linux + templateContext: + outputs: + - output: pipelineArtifact + artifactName: 'flc-linux-x64' + targetPath: '$(Build.ArtifactStagingDirectory)/native' + steps: + - checkout: neutron-server + clean: true + - template: .pipelines/templates/build-core-steps.yml@self + parameters: + flavor: linux-x64 + platform: x64 + + - job: flc_osx_arm64 + displayName: 'FLC osx-arm64' + pool: + name: Azure Pipelines + vmImage: 'macOS-14' + os: macOS + templateContext: + outputs: + - output: pipelineArtifact + artifactName: 'flc-osx-arm64' + targetPath: '$(Build.ArtifactStagingDirectory)/native' + steps: + - checkout: neutron-server + clean: true + - template: .pipelines/templates/build-core-steps.yml@self + parameters: + flavor: osx-arm64 + platform: arm64 + + # ── Package FLC ── + - stage: package_core + displayName: 'Package FLC' + dependsOn: build_core + jobs: + - job: package_flc + displayName: 'Package FLC' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + outputs: + - output: pipelineArtifact + artifactName: 'flc-nuget' + targetPath: '$(Build.ArtifactStagingDirectory)/flc-nuget' + - output: pipelineArtifact + artifactName: 'flc-wheels' + targetPath: '$(Build.ArtifactStagingDirectory)/flc-wheels' + steps: + - checkout: neutron-server + clean: true + - task: DownloadPipelineArtifact@2 + inputs: + buildType: current + artifactName: 'flc-win-x64' + targetPath: '$(Pipeline.Workspace)/flc-win-x64' + - task: DownloadPipelineArtifact@2 + inputs: + buildType: current + artifactName: 'flc-win-arm64' + targetPath: '$(Pipeline.Workspace)/flc-win-arm64' + - task: DownloadPipelineArtifact@2 + inputs: + buildType: current + artifactName: 'flc-linux-x64' + targetPath: '$(Pipeline.Workspace)/flc-linux-x64' + - task: DownloadPipelineArtifact@2 + inputs: + buildType: current + artifactName: 'flc-osx-arm64' + targetPath: '$(Pipeline.Workspace)/flc-osx-arm64' + - task: PowerShell@2 + displayName: 'List downloaded platform artifacts' + inputs: + targetType: inline + script: | + foreach ($name in @('flc-win-x64','flc-win-arm64','flc-linux-x64','flc-osx-arm64')) { + $dir = "$(Pipeline.Workspace)/$name" + Write-Host "Contents of ${dir}:" + if (Test-Path $dir) { Get-ChildItem $dir -Recurse | ForEach-Object { Write-Host $_.FullName } } + else { Write-Host " (directory not found)" } + } + - template: .pipelines/templates/package-core-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: false + platforms: + - name: win-x64 + artifactName: flc-win-x64 + - name: win-arm64 + artifactName: flc-win-arm64 + - name: linux-x64 + artifactName: flc-linux-x64 + - name: osx-arm64 + artifactName: flc-osx-arm64 + + # ── Build C# SDK ── + - stage: build_cs + displayName: 'Build C# SDK' + dependsOn: package_core + jobs: + - job: cs_sdk + displayName: 'Build' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget' + targetPath: '$(Pipeline.Workspace)/flc-nuget' + outputs: + - output: pipelineArtifact + artifactName: 'cs-sdk' + targetPath: '$(Build.ArtifactStagingDirectory)/cs-sdk' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-cs-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: false + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget' + + # ── Build JS SDK ── + - stage: build_js + displayName: 'Build JS SDK' + dependsOn: package_core + jobs: + - job: js_sdk + displayName: 'Build' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget' + targetPath: '$(Pipeline.Workspace)/flc-nuget' + outputs: + - output: pipelineArtifact + artifactName: 'js-sdk' + targetPath: '$(Build.ArtifactStagingDirectory)/js-sdk' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-js-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: false + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget' + + # ── Build Python SDK ── + - stage: build_python + displayName: 'Build Python SDK' + dependsOn: package_core + jobs: + - job: python_sdk + displayName: 'Build' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-wheels' + targetPath: '$(Pipeline.Workspace)/flc-wheels' + outputs: + - output: pipelineArtifact + artifactName: 'python-sdk' + targetPath: '$(Build.ArtifactStagingDirectory)/python-sdk' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-python-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: false + flcWheelsDir: '$(Pipeline.Workspace)/flc-wheels' + + # ── Build Rust SDK ── + - stage: build_rust + displayName: 'Build Rust SDK' + dependsOn: package_core + jobs: + - job: rust_sdk + displayName: 'Build' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget' + targetPath: '$(Pipeline.Workspace)/flc-nuget' + outputs: + - output: pipelineArtifact + artifactName: 'rust-sdk' + targetPath: '$(Build.ArtifactStagingDirectory)/rust-sdk' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-rust-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: false + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget' + + # ── Test C# SDK (win-x64) ── + - stage: test_cs + displayName: 'Test C# SDK' + dependsOn: build_cs + jobs: + - job: test_cs_win_x64 + displayName: 'Test C# (win-x64)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget' + targetPath: '$(Pipeline.Workspace)/flc-nuget' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/test-cs-steps.yml@self + parameters: + version: ${{ parameters.version }} + isWinML: false + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget' + + # TODO: Add macOS (osx-arm64) test job when a macOS ARM64 pool is available. + # TODO: Add Linux (linux-x64) test job when Linux onnxruntime dependency is stabilized. + # TODO: Add Windows ARM64 (win-arm64) test job when a Windows ARM64 pool is available. + + # ── Test JS SDK (win-x64) ── + - stage: test_js + displayName: 'Test JS SDK' + dependsOn: build_js + jobs: + - job: test_js_win_x64 + displayName: 'Test JS (win-x64)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget' + targetPath: '$(Pipeline.Workspace)/flc-nuget' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/test-js-steps.yml@self + parameters: + version: ${{ parameters.version }} + isWinML: false + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget' + + # TODO: Add macOS (osx-arm64) test job when a macOS ARM64 pool is available. + # TODO: Add Linux (linux-x64) test job when Linux onnxruntime dependency is stabilized. + # TODO: Add Windows ARM64 (win-arm64) test job when a Windows ARM64 pool is available. + + # ── Test Python SDK (win-x64) ── + - stage: test_python + displayName: 'Test Python SDK' + dependsOn: build_python + jobs: + - job: test_python_win_x64 + displayName: 'Test Python (win-x64)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-wheels' + targetPath: '$(Pipeline.Workspace)/flc-wheels' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/test-python-steps.yml@self + parameters: + version: ${{ parameters.version }} + isWinML: false + flcWheelsDir: '$(Pipeline.Workspace)/flc-wheels' + + # TODO: Add macOS (osx-arm64) test job when a macOS ARM64 pool is available. + # TODO: Add Linux (linux-x64) test job when Linux onnxruntime dependency is stabilized. + # TODO: Add Windows ARM64 (win-arm64) test job when a Windows ARM64 pool is available. + + # ── Test Rust SDK (win-x64) ── + - stage: test_rust + displayName: 'Test Rust SDK' + dependsOn: build_rust + jobs: + - job: test_rust_win_x64 + displayName: 'Test Rust (win-x64)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget' + targetPath: '$(Pipeline.Workspace)/flc-nuget' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/test-rust-steps.yml@self + parameters: + isWinML: false + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget' + + # TODO: Add macOS (osx-arm64) test job when a macOS ARM64 pool is available. + # TODO: Add Linux (linux-x64) test job when Linux onnxruntime dependency is stabilized. + # TODO: Add Windows ARM64 (win-arm64) test job when a Windows ARM64 pool is available. + + # ── Build & Test FLC (WinML) ── + - stage: build_core_winml + displayName: 'Build & Test FLC WinML' + dependsOn: [] + jobs: + - job: flc_winml_win_x64 + displayName: 'FLC win-x64 (WinML)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + outputs: + - output: pipelineArtifact + artifactName: 'flc-winml-win-x64' + targetPath: '$(Build.ArtifactStagingDirectory)/native' + steps: + - checkout: neutron-server + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-core-steps.yml@self + parameters: + flavor: win-x64 + platform: x64 + isWinML: true + + - job: flc_winml_win_arm64 + displayName: 'FLC win-arm64 (WinML)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + outputs: + - output: pipelineArtifact + artifactName: 'flc-winml-win-arm64' + targetPath: '$(Build.ArtifactStagingDirectory)/native' + steps: + - checkout: neutron-server + clean: true + - template: .pipelines/templates/build-core-steps.yml@self + parameters: + flavor: win-arm64 + platform: arm64 + isWinML: true + + # ── Package FLC (WinML) ── + - stage: package_core_winml + displayName: 'Package FLC WinML' + dependsOn: build_core_winml + jobs: + - job: package_flc_winml + displayName: 'Package FLC (WinML)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + outputs: + - output: pipelineArtifact + artifactName: 'flc-nuget-winml' + targetPath: '$(Build.ArtifactStagingDirectory)/flc-nuget' + - output: pipelineArtifact + artifactName: 'flc-wheels-winml' + targetPath: '$(Build.ArtifactStagingDirectory)/flc-wheels' + steps: + - checkout: neutron-server + clean: true + - task: DownloadPipelineArtifact@2 + inputs: + buildType: current + artifactName: 'flc-winml-win-x64' + targetPath: '$(Pipeline.Workspace)/flc-winml-win-x64' + - task: DownloadPipelineArtifact@2 + inputs: + buildType: current + artifactName: 'flc-winml-win-arm64' + targetPath: '$(Pipeline.Workspace)/flc-winml-win-arm64' + - task: PowerShell@2 + displayName: 'List downloaded WinML platform artifacts' + inputs: + targetType: inline + script: | + foreach ($name in @('flc-winml-win-x64','flc-winml-win-arm64')) { + $dir = "$(Pipeline.Workspace)/$name" + Write-Host "Contents of ${dir}:" + if (Test-Path $dir) { Get-ChildItem $dir -Recurse | ForEach-Object { Write-Host $_.FullName } } + else { Write-Host " (directory not found)" } + } + - template: .pipelines/templates/package-core-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: true + platforms: + - name: win-x64 + artifactName: flc-winml-win-x64 + - name: win-arm64 + artifactName: flc-winml-win-arm64 + + # ── Build C# SDK (WinML) ── + - stage: build_cs_winml + displayName: 'Build C# SDK WinML' + dependsOn: package_core_winml + jobs: + - job: cs_sdk_winml + displayName: 'Build' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget-winml' + targetPath: '$(Pipeline.Workspace)/flc-nuget-winml' + outputs: + - output: pipelineArtifact + artifactName: 'cs-sdk-winml' + targetPath: '$(Build.ArtifactStagingDirectory)/cs-sdk-winml' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-cs-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: true + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget-winml' + outputDir: '$(Build.ArtifactStagingDirectory)/cs-sdk-winml' + + # ── Build JS SDK (WinML) ── + - stage: build_js_winml + displayName: 'Build JS SDK WinML' + dependsOn: package_core_winml + jobs: + - job: js_sdk_winml + displayName: 'Build' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget-winml' + targetPath: '$(Pipeline.Workspace)/flc-nuget-winml' + outputs: + - output: pipelineArtifact + artifactName: 'js-sdk-winml' + targetPath: '$(Build.ArtifactStagingDirectory)/js-sdk' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-js-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: true + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget-winml' + + # ── Build Python SDK (WinML) ── + - stage: build_python_winml + displayName: 'Build Python SDK WinML' + dependsOn: package_core_winml + jobs: + - job: python_sdk_winml + displayName: 'Build' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-wheels-winml' + targetPath: '$(Pipeline.Workspace)/flc-wheels-winml' + outputs: + - output: pipelineArtifact + artifactName: 'python-sdk-winml' + targetPath: '$(Build.ArtifactStagingDirectory)/python-sdk-winml' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-python-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: true + flcWheelsDir: '$(Pipeline.Workspace)/flc-wheels-winml' + outputDir: '$(Build.ArtifactStagingDirectory)/python-sdk-winml' + + # ── Build Rust SDK (WinML) ── + - stage: build_rust_winml + displayName: 'Build Rust SDK WinML' + dependsOn: package_core_winml + jobs: + - job: rust_sdk_winml + displayName: 'Build' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget-winml' + targetPath: '$(Pipeline.Workspace)/flc-nuget-winml' + outputs: + - output: pipelineArtifact + artifactName: 'rust-sdk-winml' + targetPath: '$(Build.ArtifactStagingDirectory)/rust-sdk-winml' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/build-rust-steps.yml@self + parameters: + version: ${{ parameters.version }} + isRelease: ${{ parameters.isRelease }} + prereleaseId: ${{ parameters.prereleaseId }} + isWinML: true + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget-winml' + outputDir: '$(Build.ArtifactStagingDirectory)/rust-sdk-winml' + + # ── Test C# SDK WinML (win-x64) ── + - stage: test_cs_winml + displayName: 'Test C# SDK WinML' + dependsOn: build_cs_winml + jobs: + - job: test_cs_winml_win_x64 + displayName: 'Test C# WinML (win-x64)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget-winml' + targetPath: '$(Pipeline.Workspace)/flc-nuget-winml' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/test-cs-steps.yml@self + parameters: + version: ${{ parameters.version }} + isWinML: true + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget-winml' + + # TODO: Add Windows ARM64 (win-arm64) test job when a Windows ARM64 pool is available. + + # ── Test JS SDK WinML (win-x64) ── + - stage: test_js_winml + displayName: 'Test JS SDK WinML' + dependsOn: build_js_winml + jobs: + - job: test_js_winml_win_x64 + displayName: 'Test JS WinML (win-x64)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget-winml' + targetPath: '$(Pipeline.Workspace)/flc-nuget-winml' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/test-js-steps.yml@self + parameters: + version: ${{ parameters.version }} + isWinML: true + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget-winml' + + # TODO: Add Windows ARM64 (win-arm64) test job when a Windows ARM64 pool is available. + + # ── Test Python SDK WinML (win-x64) ── + - stage: test_python_winml + displayName: 'Test Python SDK WinML' + dependsOn: build_python_winml + jobs: + - job: test_python_winml_win_x64 + displayName: 'Test Python WinML (win-x64)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-wheels-winml' + targetPath: '$(Pipeline.Workspace)/flc-wheels-winml' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/test-python-steps.yml@self + parameters: + version: ${{ parameters.version }} + isWinML: true + flcWheelsDir: '$(Pipeline.Workspace)/flc-wheels-winml' + + # TODO: Add Windows ARM64 (win-arm64) test job when a Windows ARM64 pool is available. + + # ── Test Rust SDK WinML (win-x64) ── + - stage: test_rust_winml + displayName: 'Test Rust SDK WinML' + dependsOn: build_rust_winml + jobs: + - job: test_rust_winml_win_x64 + displayName: 'Test Rust WinML (win-x64)' + pool: + name: onnxruntime-Win-CPU-2022 + os: windows + templateContext: + inputs: + - input: pipelineArtifact + artifactName: 'flc-nuget-winml' + targetPath: '$(Pipeline.Workspace)/flc-nuget-winml' + steps: + - checkout: self + clean: true + - checkout: test-data-shared + lfs: true + - template: .pipelines/templates/test-rust-steps.yml@self + parameters: + isWinML: true + flcNugetDir: '$(Pipeline.Workspace)/flc-nuget-winml' + + # TODO: Add Windows ARM64 (win-arm64) test job when a Windows ARM64 pool is available. + diff --git a/.pipelines/templates/build-core-steps.yml b/.pipelines/templates/build-core-steps.yml new file mode 100644 index 00000000..9f024c42 --- /dev/null +++ b/.pipelines/templates/build-core-steps.yml @@ -0,0 +1,194 @@ +# Steps to build a single Foundry Local Core native AOT binary. +# Parameterized by flavor (RID) and platform (arch). +# The parent job must checkout 'neutron-server'. +parameters: +- name: flavor + type: string # e.g. win-x64, linux-x64, osx-arm64 +- name: platform + type: string # e.g. x64, arm64 +- name: isWinML + type: boolean + default: false + +steps: +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + # Multi-checkout places repos in subdirectories; single checkout places contents at root + $multiCheckout = "$(Build.SourcesDirectory)/neutron-server" + if (Test-Path $multiCheckout) { + $nsRoot = $multiCheckout + } else { + $nsRoot = "$(Build.SourcesDirectory)" + } + Write-Host "##vso[task.setvariable variable=nsRoot]$nsRoot" + Write-Host "neutron-server root: $nsRoot" + +- task: UseDotNet@2 + displayName: 'Use .NET SDK from global.json' + inputs: + packageType: sdk + useGlobalJson: true + workingDirectory: '$(nsRoot)' + +- task: PowerShell@2 + displayName: 'Override nuget.config' + inputs: + targetType: inline + script: | + $nugetConfig = @" + + + + + + + + + + + + + + + + + + "@ + Set-Content -Path "$(nsRoot)/nuget.config" -Value $nugetConfig + Write-Host "Updated nuget.config to use nuget.org, ORT-Nightly, and Neutron with mappings" + +- ${{ if eq(parameters.isWinML, true) }}: + - task: DotNetCoreCLI@2 + displayName: 'Restore FLC Core ${{ parameters.flavor }} (WinML)' + inputs: + command: restore + projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' + restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + feedsToUse: config + nugetConfigPath: '$(nsRoot)/nuget.config' + + - task: DotNetCoreCLI@2 + displayName: 'Build FLC Core ${{ parameters.flavor }} (WinML)' + inputs: + command: build + projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' + arguments: '--no-restore -r ${{ parameters.flavor }} -f net9.0-windows10.0.26100.0 /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + + - task: DotNetCoreCLI@2 + displayName: 'Publish FLC AOT ${{ parameters.flavor }} (WinML)' + inputs: + command: publish + projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' + arguments: '--no-restore --no-build -r ${{ parameters.flavor }} -f net9.0-windows10.0.26100.0 /p:Platform=${{ parameters.platform }} /p:Configuration=Release /p:PublishAot=true /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + publishWebProjects: false + zipAfterPublish: false + + - ${{ if eq(parameters.flavor, 'win-x64') }}: + - task: DotNetCoreCLI@2 + displayName: 'Restore FLC Tests ${{ parameters.flavor }} (WinML)' + inputs: + command: restore + projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' + restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + feedsToUse: config + nugetConfigPath: '$(nsRoot)/nuget.config' + + - task: DotNetCoreCLI@2 + displayName: 'Build FLC Tests ${{ parameters.flavor }} (WinML)' + inputs: + command: build + projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' + arguments: '--no-restore -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:NetTargetFramework=net9.0-windows10.0.26100.0 /p:UseWinML=true' + + - task: DotNetCoreCLI@2 + displayName: 'Test FLC ${{ parameters.flavor }} (WinML)' + inputs: + command: test + projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' + arguments: '--no-build --configuration Release -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }}' + +- ${{ if eq(parameters.isWinML, false) }}: + - task: DotNetCoreCLI@2 + displayName: 'Restore FLC Core ${{ parameters.flavor }}' + inputs: + command: restore + projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' + restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:TargetFramework=net9.0' + feedsToUse: config + nugetConfigPath: '$(nsRoot)/nuget.config' + + - task: DotNetCoreCLI@2 + displayName: 'Build FLC Core ${{ parameters.flavor }}' + inputs: + command: build + projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' + arguments: '--no-restore -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release' + + - ${{ if eq(parameters.flavor, 'win-x64') }}: + - task: DotNetCoreCLI@2 + displayName: 'Restore FLC Tests ${{ parameters.flavor }}' + inputs: + command: restore + projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' + restoreArguments: '-r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release /p:TargetFramework=net9.0' + feedsToUse: config + nugetConfigPath: '$(nsRoot)/nuget.config' + + - task: DotNetCoreCLI@2 + displayName: 'Build FLC Tests ${{ parameters.flavor }}' + inputs: + command: build + projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' + arguments: '--no-restore -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:IncludeWebService=true /p:Configuration=Release' + + - task: DotNetCoreCLI@2 + displayName: 'Test FLC ${{ parameters.flavor }}' + inputs: + command: test + projects: '$(nsRoot)/test/FoundryLocalCore/Core/FoundryLocalCore.Tests.csproj' + arguments: '--no-build --configuration Release -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }}' + + - task: DotNetCoreCLI@2 + displayName: 'Publish FLC AOT ${{ parameters.flavor }}' + inputs: + command: publish + projects: '$(nsRoot)/src/FoundryLocalCore/Core/Core.csproj' + arguments: '--no-restore --no-build -r ${{ parameters.flavor }} /p:Platform=${{ parameters.platform }} /p:Configuration=Release /p:PublishAot=true /p:TargetFramework=net9.0' + publishWebProjects: false + zipAfterPublish: false + +# Cleanup non-binary files +- task: PowerShell@2 + displayName: 'Cleanup publish artifacts' + inputs: + targetType: inline + script: | + Get-ChildItem "$(nsRoot)/artifacts/publish" -Recurse -Include "*.json", "*.xml" | + Remove-Item -Force + +# Stage the native binary for the artifact +- task: PowerShell@2 + displayName: 'Stage ${{ parameters.flavor }} binary' + inputs: + targetType: inline + script: | + $destDir = "$(Build.ArtifactStagingDirectory)/native" + New-Item -ItemType Directory -Path $destDir -Force | Out-Null + # WinML publishes additional files (e.g. WindowsAppRuntime Bootstrapper DLLs) + # beyond Microsoft.AI.Foundry.Local.Core.*. + $isWinML = "${{ parameters.isWinML }}" -eq "True" + if ($isWinML) { + Get-ChildItem "$(nsRoot)/artifacts/publish" -Recurse -File | + Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" -or $_.Name -eq "Microsoft.WindowsAppRuntime.Bootstrap.dll" } | + Copy-Item -Destination $destDir -Force + } else { + Get-ChildItem "$(nsRoot)/artifacts/publish" -Recurse -File | + Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | + Copy-Item -Destination $destDir -Force + } + Write-Host "Staged binaries:" + Get-ChildItem $destDir | ForEach-Object { Write-Host " $($_.Name)" } + diff --git a/.pipelines/templates/build-cs-steps.yml b/.pipelines/templates/build-cs-steps.yml new file mode 100644 index 00000000..978c2fff --- /dev/null +++ b/.pipelines/templates/build-cs-steps.yml @@ -0,0 +1,191 @@ +# Steps to build, sign, and pack the C# SDK NuGet package. +# When test-data-shared is checked out alongside self, ADO places repos under +# $(Build.SourcesDirectory)/. The self repo is 'Foundry-Local'. +parameters: +- name: version + type: string +- name: isRelease + type: boolean + default: false +- name: isWinML + type: boolean + default: false +- name: flcNugetDir + type: string + displayName: 'Path to directory containing the FLC .nupkg' +- name: outputDir + type: string + default: '$(Build.ArtifactStagingDirectory)/cs-sdk' + displayName: 'Path to directory for the packed SDK' +- name: prereleaseId + type: string + default: '' +steps: +# Set paths for multi-repo checkout +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + $repoRoot = "$(Build.SourcesDirectory)/Foundry-Local" + $testDataDir = "$(Build.SourcesDirectory)/test-data-shared" + Write-Host "##vso[task.setvariable variable=repoRoot]$repoRoot" + Write-Host "##vso[task.setvariable variable=testDataDir]$testDataDir" + +- task: UseDotNet@2 + displayName: 'Use .NET 9 SDK' + inputs: + packageType: sdk + version: '9.0.x' + +# Compute package version +- task: PowerShell@2 + displayName: 'Set package version' + inputs: + targetType: inline + script: | + $v = "${{ parameters.version }}" + $preId = "${{ parameters.prereleaseId }}" + if ($preId -ne '' -and $preId -ne 'none') { + $v = "$v-$preId" + } elseif ("${{ parameters.isRelease }}" -ne "True") { + $ts = Get-Date -Format "yyyyMMddHHmm" + $v = "$v-dev.$ts" + } + Write-Host "##vso[task.setvariable variable=packageVersion]$v" + Write-Host "Package version: $v" + +# List downloaded artifact for debugging +- task: PowerShell@2 + displayName: 'List downloaded FLC artifact' + inputs: + targetType: inline + script: | + Write-Host "Contents of ${{ parameters.flcNugetDir }}:" + Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse | ForEach-Object { Write-Host $_.FullName } + +# Create a temporary NuGet.config that includes the local FLC feed +- task: PowerShell@2 + displayName: 'Create NuGet.config with local FLC feed' + inputs: + targetType: inline + script: | + $nugetConfig = @" + + + + + + + + + + "@ + # Determine the FLC version from the .nupkg filename + $nupkg = Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse -Filter "Microsoft.AI.Foundry.Local.Core*.nupkg" -Exclude "*.snupkg" | Select-Object -First 1 + if (-not $nupkg) { throw "No FLC .nupkg found in ${{ parameters.flcNugetDir }}" } + $flcVer = $nupkg.BaseName -replace '^Microsoft\.AI\.Foundry\.Local\.Core(\.WinML)?\.', '' + Write-Host "##vso[task.setvariable variable=resolvedFlcVersion]$flcVer" + Write-Host "Resolved FLC version: $flcVer" + + # Point the local NuGet feed at the directory that actually contains the .nupkg + $flcFeedDir = $nupkg.DirectoryName + $nugetConfig = $nugetConfig -replace [regex]::Escape("${{ parameters.flcNugetDir }}"), $flcFeedDir + $configPath = "$(Build.ArtifactStagingDirectory)/NuGet.config" + Set-Content -Path $configPath -Value $nugetConfig + Write-Host "##vso[task.setvariable variable=customNugetConfig]$configPath" + Write-Host "Local FLC feed directory: $flcFeedDir" + +- task: NuGetAuthenticate@1 + displayName: 'Authenticate NuGet feeds' + +- task: PowerShell@2 + displayName: 'Restore SDK' + inputs: + targetType: inline + script: | + $proj = "$(repoRoot)/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj" + if (-not (Test-Path $proj)) { throw "Project not found: $proj" } + dotnet restore $proj ` + --configfile "$(customNugetConfig)" ` + /p:UseWinML=${{ parameters.isWinML }} + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +- task: PowerShell@2 + displayName: 'Build SDK' + inputs: + targetType: inline + script: | + dotnet build "$(repoRoot)/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj" ` + --no-restore --configuration Release ` + /p:UseWinML=${{ parameters.isWinML }} + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +# Discover target framework directory +- task: PowerShell@2 + displayName: 'Find target framework' + inputs: + targetType: inline + script: | + $base = "$(repoRoot)/sdk/cs/src/bin/Release" + # The SDK targets net9.0 (standard) or net9.0-windows10.0.26100.0 (WinML). + # Find whichever TFM directory was produced by the build. + $tfmDir = Get-ChildItem $base -Directory | Select-Object -First 1 + if (-not $tfmDir) { throw "No target framework directory found under $base" } + Write-Host "##vso[task.setvariable variable=TargetFramework]$($tfmDir.Name)" + Write-Host "Target framework: $($tfmDir.Name)" + +# Sign DLLs +- task: SFP.build-tasks.custom-build-task-1.EsrpCodeSigning@5 + displayName: 'Sign SDK DLLs' + inputs: + ConnectedServiceName: 'OnnxrunTimeCodeSign_20240611' + UseMSIAuthentication: true + AppRegistrationClientId: '$(esrpClientId)' + AppRegistrationTenantId: '$(esrpTenantId)' + EsrpClientId: '$(esrpClientId)' + AuthAKVName: '$(esrpAkvName)' + AuthSignCertName: '$(esrpSignCertName)' + FolderPath: '$(repoRoot)/sdk/cs/src/bin/Release/$(TargetFramework)' + Pattern: '*.dll' + SessionTimeout: 90 + ServiceEndpointUrl: 'https://api.esrp.microsoft.com/api/v2' + MaxConcurrency: 25 + signConfigType: inlineSignParams + inlineOperation: | + [{"keyCode":"CP-230012","operationSetCode":"SigntoolSign","parameters":[{"parameterName":"OpusName","parameterValue":"Microsoft"},{"parameterName":"OpusInfo","parameterValue":"http://www.microsoft.com"},{"parameterName":"PageHash","parameterValue":"/NPH"},{"parameterName":"FileDigest","parameterValue":"/fd sha256"},{"parameterName":"TimeStamp","parameterValue":"/tr \"http://rfc3161.gtm.corp.microsoft.com/TSS/HttpTspServer\" /td sha256"}],"toolName":"signtool.exe","toolVersion":"6.2.9304.0"}] + +# Pack NuGet +- task: PowerShell@2 + displayName: 'Pack NuGet' + inputs: + targetType: inline + script: | + dotnet pack "$(repoRoot)/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj" ` + --no-build --no-restore --configuration Release ` + --output "${{ parameters.outputDir }}" ` + /p:PackageVersion=$(packageVersion) ` + /p:UseWinML=${{ parameters.isWinML }} ` + /p:IncludeSymbols=true ` + /p:SymbolPackageFormat=snupkg + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +# Sign NuGet package +- task: SFP.build-tasks.custom-build-task-1.EsrpCodeSigning@5 + displayName: 'Sign SDK NuGet package' + inputs: + ConnectedServiceName: 'OnnxrunTimeCodeSign_20240611' + UseMSIAuthentication: true + AppRegistrationClientId: '$(esrpClientId)' + AppRegistrationTenantId: '$(esrpTenantId)' + EsrpClientId: '$(esrpClientId)' + AuthAKVName: '$(esrpAkvName)' + AuthSignCertName: '$(esrpSignCertName)' + FolderPath: '${{ parameters.outputDir }}' + Pattern: '*.nupkg' + SessionTimeout: 90 + ServiceEndpointUrl: 'https://api.esrp.microsoft.com/api/v2' + MaxConcurrency: 25 + signConfigType: inlineSignParams + inlineOperation: | + [{"keyCode":"CP-401405","operationSetCode":"NuGetSign","parameters":[],"toolName":"sign","toolVersion":"6.2.9304.0"},{"keyCode":"CP-401405","operationSetCode":"NuGetVerify","parameters":[],"toolName":"sign","toolVersion":"6.2.9304.0"}] diff --git a/.pipelines/templates/build-js-steps.yml b/.pipelines/templates/build-js-steps.yml new file mode 100644 index 00000000..e288bbce --- /dev/null +++ b/.pipelines/templates/build-js-steps.yml @@ -0,0 +1,156 @@ +# Steps to build and pack the JS SDK. +# When test-data-shared is checked out alongside self, ADO places repos under +# $(Build.SourcesDirectory)/. The self repo is 'Foundry-Local'. +parameters: +- name: version + type: string +- name: isRelease + type: boolean + default: false +- name: isWinML + type: boolean + default: false +- name: flcNugetDir + type: string + default: '' + displayName: 'Path to directory containing the FLC .nupkg (for tests)' +- name: prereleaseId + type: string + default: '' +steps: +# Set paths for multi-repo checkout +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + $repoRoot = "$(Build.SourcesDirectory)/Foundry-Local" + $testDataDir = "$(Build.SourcesDirectory)/test-data-shared" + Write-Host "##vso[task.setvariable variable=repoRoot]$repoRoot" + Write-Host "##vso[task.setvariable variable=testDataDir]$testDataDir" + Write-Host "Repo root: $repoRoot" + Write-Host "Test data: $testDataDir" + +- task: PowerShell@2 + displayName: 'List downloaded FLC artifact' + condition: and(succeeded(), ne('${{ parameters.flcNugetDir }}', '')) + inputs: + targetType: inline + script: | + Write-Host "Contents of ${{ parameters.flcNugetDir }}:" + Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse | ForEach-Object { Write-Host $_.FullName } + +- task: NodeTool@0 + displayName: 'Use Node.js 20' + inputs: + versionSpec: '20.x' + +# Compute version +- task: PowerShell@2 + displayName: 'Set package version' + inputs: + targetType: inline + script: | + $v = "${{ parameters.version }}" + $preId = "${{ parameters.prereleaseId }}" + if ($preId -ne '' -and $preId -ne 'none') { + $v = "$v-$preId" + } elseif ("${{ parameters.isRelease }}" -ne "True") { + $ts = Get-Date -Format "yyyyMMddHHmm" + $v = "$v-dev.$ts" + } + Write-Host "##vso[task.setvariable variable=packageVersion]$v" + +# Install dependencies including native binaries (FLC, ORT, GenAI) from NuGet feeds +- task: Npm@1 + displayName: 'npm install' + inputs: + command: custom + workingDir: $(repoRoot)/sdk/js + customCommand: 'install' + +# Overwrite the FLC native binary with the one we just built +- task: PowerShell@2 + displayName: 'Overwrite FLC with pipeline-built binary' + condition: and(succeeded(), ne('${{ parameters.flcNugetDir }}', '')) + inputs: + targetType: inline + script: | + $os = 'win32' + $arch = if ([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture -eq 'Arm64') { 'arm64' } else { 'x64' } + $platformKey = "$os-$arch" + $rid = if ($arch -eq 'arm64') { 'win-arm64' } else { 'win-x64' } + + # Detect macOS/Linux + if ($IsLinux) { + $os = 'linux' + $platformKey = "$os-$arch" + $rid = "linux-$arch" + } elseif ($IsMacOS) { + $os = 'darwin' + $platformKey = "$os-$arch" + $rid = "osx-$arch" + } + + $nupkg = Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse -Filter "Microsoft.AI.Foundry.Local.Core*.nupkg" -Exclude "*.snupkg" | Select-Object -First 1 + if (-not $nupkg) { throw "No FLC .nupkg found in ${{ parameters.flcNugetDir }}" } + + # Extract the NuGet package (it's a zip) + $extractDir = "$(Build.ArtifactStagingDirectory)/flc-extract" + $zip = [System.IO.Path]::ChangeExtension($nupkg.FullName, ".zip") + Copy-Item $nupkg.FullName $zip -Force + Expand-Archive -Path $zip -DestinationPath $extractDir -Force + + # Overwrite FLC binary in the npm-installed location + $destDir = "$(repoRoot)/sdk/js/packages/@foundry-local-core/$platformKey" + $nativeDir = "$extractDir/runtimes/$rid/native" + if (Test-Path $nativeDir) { + Get-ChildItem $nativeDir -File | ForEach-Object { + Copy-Item $_.FullName -Destination "$destDir/$($_.Name)" -Force + Write-Host "Overwrote $($_.Name) with pipeline-built version" + } + } else { + Write-Warning "No native binaries found at $nativeDir for RID $rid" + } + + Write-Host "Final binaries in $destDir`:" + Get-ChildItem $destDir | ForEach-Object { Write-Host " $($_.Name)" } + +- task: Npm@1 + displayName: 'npm version' + inputs: + command: custom + workingDir: $(repoRoot)/sdk/js + customCommand: 'version $(packageVersion) --no-git-tag-version --allow-same-version' + +- task: Npm@1 + displayName: 'npm build' + inputs: + command: custom + workingDir: $(repoRoot)/sdk/js + customCommand: 'run build' + +- ${{ if eq(parameters.isWinML, true) }}: + - task: Npm@1 + displayName: 'npm run pack:winml' + inputs: + command: custom + workingDir: $(repoRoot)/sdk/js + customCommand: 'run pack:winml' + +- ${{ else }}: + - task: Npm@1 + displayName: 'npm run pack' + inputs: + command: custom + workingDir: $(repoRoot)/sdk/js + customCommand: 'run pack' + +- task: PowerShell@2 + displayName: 'Stage artifact' + inputs: + targetType: inline + script: | + $destDir = "$(Build.ArtifactStagingDirectory)/js-sdk" + New-Item -ItemType Directory -Path $destDir -Force | Out-Null + Copy-Item "$(repoRoot)/sdk/js/*.tgz" "$destDir/" diff --git a/.pipelines/templates/build-python-steps.yml b/.pipelines/templates/build-python-steps.yml new file mode 100644 index 00000000..8ab4d8d1 --- /dev/null +++ b/.pipelines/templates/build-python-steps.yml @@ -0,0 +1,152 @@ +# Steps to build and pack the Python SDK wheel. +# When test-data-shared is checked out alongside self, ADO places repos under +# $(Build.SourcesDirectory)/. The self repo is 'Foundry-Local'. +parameters: +- name: version + type: string +- name: isRelease + type: boolean + default: false +- name: isWinML + type: boolean + default: false +- name: flcWheelsDir + type: string + displayName: 'Path to directory containing the FLC wheels (for overriding foundry-local-core)' +- name: outputDir + type: string + default: '$(Build.ArtifactStagingDirectory)/python-sdk' + displayName: 'Path to directory for the built wheel' +- name: prereleaseId + type: string + default: '' +steps: +# Set paths for multi-repo checkout +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + $repoRoot = "$(Build.SourcesDirectory)/Foundry-Local" + $testDataDir = "$(Build.SourcesDirectory)/test-data-shared" + Write-Host "##vso[task.setvariable variable=repoRoot]$repoRoot" + Write-Host "##vso[task.setvariable variable=testDataDir]$testDataDir" + +- task: UsePythonVersion@0 + displayName: 'Use Python 3.12' + inputs: + versionSpec: '3.12' + +# List downloaded FLC wheels for debugging +- task: PowerShell@2 + displayName: 'List downloaded FLC wheels' + condition: and(succeeded(), ne('${{ parameters.flcWheelsDir }}', '')) + inputs: + targetType: inline + script: | + Write-Host "Contents of ${{ parameters.flcWheelsDir }}:" + Get-ChildItem "${{ parameters.flcWheelsDir }}" -Recurse | ForEach-Object { Write-Host $_.FullName } + +# Compute package version +- task: PowerShell@2 + displayName: 'Set package version' + inputs: + targetType: inline + script: | + $v = "${{ parameters.version }}" + $preId = "${{ parameters.prereleaseId }}" + if ($preId -ne '' -and $preId -ne 'none') { + $v = "$v-$preId" + } elseif ("${{ parameters.isRelease }}" -ne "True") { + $ts = Get-Date -Format "yyyyMMddHHmm" + $v = "$v-dev.$ts" + } + Write-Host "##vso[task.setvariable variable=packageVersion]$v" + Write-Host "Package version: $v" + +# Configure pip to use ORT-Nightly feed (plus PyPI as fallback) +- task: PowerShell@2 + displayName: 'Configure pip for Azure Artifacts' + inputs: + targetType: inline + script: | + pip config set global.index-url https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ + pip config set global.extra-index-url https://pypi.org/simple/ + pip config set global.pre true + +# Install the build tool +- script: python -m pip install build + displayName: 'Install build tool' + +# Write version file +- task: PowerShell@2 + displayName: 'Set SDK version' + inputs: + targetType: inline + script: | + Set-Content -Path "$(repoRoot)/sdk/python/src/version.py" -Value '__version__ = "$(packageVersion)"' + +# Install the FLC wheels from the pipeline if provided, so the build +# backend picks up the freshly-built foundry-local-core instead of +# pulling a stale one from the feed. +- task: PowerShell@2 + displayName: 'Pre-install pipeline-built FLC wheel' + condition: and(succeeded(), ne('${{ parameters.flcWheelsDir }}', '')) + inputs: + targetType: inline + script: | + # Determine platform wheel tag for the current machine + $arch = if ([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture -eq 'Arm64') { 'arm64' } else { 'amd64' } + if ($IsLinux) { $platTag = "manylinux*x86_64" } + elseif ($IsMacOS) { $platTag = "macosx*$arch" } + else { $platTag = "win_$arch" } + + $filter = if ("${{ parameters.isWinML }}" -eq "True") { "foundry_local_core_winml*$platTag.whl" } else { "foundry_local_core-*$platTag.whl" } + $wheel = Get-ChildItem "${{ parameters.flcWheelsDir }}" -Recurse -Filter $filter | Select-Object -First 1 + if ($wheel) { + Write-Host "Installing pipeline-built FLC wheel: $($wheel.FullName)" + pip install $($wheel.FullName) + } else { + Write-Warning "No FLC wheel found matching $filter in ${{ parameters.flcWheelsDir }}" + } + +- script: pip install onnxruntime-core==1.24.3 onnxruntime-genai-core==0.12.1 + displayName: 'Install ORT native packages' + +- script: pip install "pydantic>=2.0.0" "requests>=2.32.4" "openai>=2.24.0" + displayName: 'Install pure python dependencies' + +# Build wheel — standard or WinML variant +# The wheel retains all dependencies in its metadata so end users get +# native packages installed automatically. CI uses --no-deps to avoid +# re-downloading packages that were pre-installed from pipeline builds. +- ${{ if eq(parameters.isWinML, true) }}: + - script: python -m build --wheel -C winml=true --outdir dist/ + displayName: 'Build wheel (WinML)' + workingDirectory: $(repoRoot)/sdk/python + +- ${{ else }}: + - script: python -m build --wheel --outdir dist/ + displayName: 'Build wheel' + workingDirectory: $(repoRoot)/sdk/python + +# Install the built wheel +- task: PowerShell@2 + displayName: 'Install built wheel' + inputs: + targetType: inline + script: | + $wheel = (Get-ChildItem "$(repoRoot)/sdk/python/dist/*.whl" | Select-Object -First 1).FullName + pip install --no-deps $wheel + +# Stage output +- task: PowerShell@2 + displayName: 'Stage wheel artifact' + inputs: + targetType: inline + script: | + $destDir = "${{ parameters.outputDir }}" + New-Item -ItemType Directory -Path $destDir -Force | Out-Null + Copy-Item "$(repoRoot)/sdk/python/dist/*" "$destDir/" + Write-Host "Staged wheels:" + Get-ChildItem $destDir | ForEach-Object { Write-Host " $($_.Name)" } diff --git a/.pipelines/templates/build-rust-steps.yml b/.pipelines/templates/build-rust-steps.yml new file mode 100644 index 00000000..efccfaa4 --- /dev/null +++ b/.pipelines/templates/build-rust-steps.yml @@ -0,0 +1,207 @@ +# Steps to build and package the Rust SDK crate. +# When test-data-shared is checked out alongside self, ADO places repos under +# $(Build.SourcesDirectory)/. The self repo is 'Foundry-Local'. +parameters: +- name: version + type: string +- name: isRelease + type: boolean + default: false +- name: prereleaseId + type: string + default: '' +- name: isWinML + type: boolean + default: false +- name: flcNugetDir + type: string + displayName: 'Path to directory containing the FLC .nupkg' +- name: outputDir + type: string + default: '$(Build.ArtifactStagingDirectory)/rust-sdk' + displayName: 'Path to directory for the packaged crate' +steps: +# Set paths for multi-repo checkout +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + $repoRoot = "$(Build.SourcesDirectory)/Foundry-Local" + $testDataDir = "$(Build.SourcesDirectory)/test-data-shared" + Write-Host "##vso[task.setvariable variable=repoRoot]$repoRoot" + Write-Host "##vso[task.setvariable variable=testDataDir]$testDataDir" + +# Compute package version and patch Cargo.toml +- task: PowerShell@2 + displayName: 'Set crate version' + inputs: + targetType: inline + script: | + $v = "${{ parameters.version }}" + $preId = "${{ parameters.prereleaseId }}" + if ($preId -ne '' -and $preId -ne 'none') { + $v = "$v-$preId" + } elseif ("${{ parameters.isRelease }}" -ne "True") { + $ts = Get-Date -Format "yyyyMMddHHmm" + $v = "$v-dev.$ts" + } + Write-Host "Crate version: $v" + + # Patch Cargo.toml version field + $cargoPath = "$(repoRoot)/sdk/rust/Cargo.toml" + $content = Get-Content $cargoPath -Raw + $content = $content -replace '(?m)^version\s*=\s*"[^"]+"', "version = `"$v`"" + Set-Content -Path $cargoPath -Value $content + Write-Host "Patched Cargo.toml with version $v" + +# List downloaded FLC artifact for debugging +- task: PowerShell@2 + displayName: 'List downloaded FLC artifact' + inputs: + targetType: inline + script: | + Write-Host "Contents of ${{ parameters.flcNugetDir }}:" + Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse | ForEach-Object { Write-Host $_.FullName } + +# Extract FLC native binaries from the pipeline-built .nupkg so that +# build.rs finds them already present and skips downloading from the feed. +- task: PowerShell@2 + displayName: 'Extract FLC native binaries for Rust build' + inputs: + targetType: inline + script: | + $nupkg = Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse -Filter "Microsoft.AI.Foundry.Local.Core*.nupkg" -Exclude "*.snupkg" | Select-Object -First 1 + if (-not $nupkg) { throw "No FLC .nupkg found in ${{ parameters.flcNugetDir }}" } + Write-Host "Found NuGet package: $($nupkg.FullName)" + + $extractDir = "$(Build.ArtifactStagingDirectory)/flc-extract-rust" + $zip = [System.IO.Path]::ChangeExtension($nupkg.FullName, ".zip") + Copy-Item $nupkg.FullName $zip -Force + Expand-Archive -Path $zip -DestinationPath $extractDir -Force + + # Determine RID for this agent + $arch = if ([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture -eq 'Arm64') { 'arm64' } else { 'x64' } + if ($IsLinux) { + $rid = "linux-$arch" + } elseif ($IsMacOS) { + $rid = "osx-$arch" + } else { + $rid = "win-$arch" + } + + $nativeDir = "$extractDir/runtimes/$rid/native" + if (-not (Test-Path $nativeDir)) { throw "No native binaries found at $nativeDir for RID $rid" } + + # Stage them where build.rs can discover them + $flcNativeDir = "$(Build.ArtifactStagingDirectory)/flc-native-rust" + New-Item -ItemType Directory -Path $flcNativeDir -Force | Out-Null + Get-ChildItem $nativeDir -File | Copy-Item -Destination $flcNativeDir -Force + Write-Host "##vso[task.setvariable variable=flcNativeDir]$flcNativeDir" + Write-Host "Extracted FLC native binaries to $flcNativeDir`:" + Get-ChildItem $flcNativeDir | ForEach-Object { Write-Host " $($_.Name)" } + +# Install Rust toolchain +- task: PowerShell@2 + displayName: 'Install Rust toolchain' + inputs: + targetType: inline + script: | + if ($IsWindows -or (-not $IsLinux -and -not $IsMacOS)) { + Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe + .\rustup-init.exe -y --default-toolchain stable --profile minimal -c clippy,rustfmt + Remove-Item rustup-init.exe + $cargoPath = "$env:USERPROFILE\.cargo\bin" + } else { + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal -c clippy,rustfmt + $cargoPath = "$env:HOME/.cargo/bin" + } + Write-Host "##vso[task.prependpath]$cargoPath" + +# The .cargo/config.toml redirects crates-io to an Azure Artifacts feed +# for CFS compliance. Remove the redirect in CI so cargo can fetch from +# crates.io directly without Azure DevOps auth. +- task: PowerShell@2 + displayName: 'Use crates.io directly' + inputs: + targetType: inline + script: | + $configPath = "$(repoRoot)/sdk/rust/.cargo/config.toml" + if (Test-Path $configPath) { + Remove-Item $configPath + Write-Host "Removed .cargo/config.toml crates-io redirect" + } + +- task: PowerShell@2 + displayName: 'Check formatting' + inputs: + targetType: inline + script: | + Set-Location "$(repoRoot)/sdk/rust" + cargo fmt --all -- --check + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +- task: PowerShell@2 + displayName: 'Run clippy' + inputs: + targetType: inline + script: | + Set-Location "$(repoRoot)/sdk/rust" + $features = if ("${{ parameters.isWinML }}" -eq "True") { "--features winml" } else { "" } + Invoke-Expression "cargo clippy --all-targets $features -- -D warnings" + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +- task: PowerShell@2 + displayName: 'Build' + inputs: + targetType: inline + script: | + Set-Location "$(repoRoot)/sdk/rust" + $features = if ("${{ parameters.isWinML }}" -eq "True") { "--features winml" } else { "" } + Invoke-Expression "cargo build $features" + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +# Overwrite the FLC core binary in cargo's OUT_DIR with the pipeline-built +# version so that integration tests use the freshly-built FLC. build.rs +# sets FOUNDRY_NATIVE_DIR to OUT_DIR, which the SDK checks at runtime. +- task: PowerShell@2 + displayName: 'Overwrite FLC binary with pipeline-built version' + inputs: + targetType: inline + script: | + # Find cargo's OUT_DIR for the foundry-local-sdk build script + $outDir = Get-ChildItem "$(repoRoot)/sdk/rust/target/debug/build" -Directory -Filter "foundry-local-sdk-*" -Recurse | + Where-Object { Test-Path "$($_.FullName)/out" } | + ForEach-Object { "$($_.FullName)/out" } | + Select-Object -First 1 + if (-not $outDir) { throw "Could not find cargo OUT_DIR for foundry-local-sdk" } + Write-Host "Cargo OUT_DIR: $outDir" + + # Copy pipeline-built FLC native binaries over the downloaded ones + Get-ChildItem "$(flcNativeDir)" -File -Filter "Microsoft.AI.Foundry.Local.Core.*" | ForEach-Object { + Copy-Item $_.FullName -Destination "$outDir/$($_.Name)" -Force + Write-Host "Overwrote $($_.Name) with pipeline-built version" + } + +# --allow-dirty allows packaging with uncommitted changes (build.rs modifies generated files) +- task: PowerShell@2 + displayName: 'Package crate' + inputs: + targetType: inline + script: | + Set-Location "$(repoRoot)/sdk/rust" + $features = if ("${{ parameters.isWinML }}" -eq "True") { "--features winml" } else { "" } + Invoke-Expression "cargo package $features --allow-dirty" + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +# Stage output +- task: PowerShell@2 + displayName: 'Stage crate artifact' + inputs: + targetType: inline + script: | + $destDir = "${{ parameters.outputDir }}" + New-Item -ItemType Directory -Path $destDir -Force | Out-Null + Copy-Item "$(repoRoot)/sdk/rust/target/package/*.crate" "$destDir/" + Write-Host "Staged crates:" + Get-ChildItem $destDir | ForEach-Object { Write-Host " $($_.Name)" } diff --git a/.pipelines/templates/package-core-steps.yml b/.pipelines/templates/package-core-steps.yml new file mode 100644 index 00000000..960b6cd3 --- /dev/null +++ b/.pipelines/templates/package-core-steps.yml @@ -0,0 +1,256 @@ +# Steps to collect per-platform FLC native binaries, organize into NuGet layout, +# pack + sign the NuGet package, and build Python wheels (wheel package name and +# platforms depend on the isWinML parameter). The parent job must download all +# platform artifacts and checkout neutron-server. +parameters: +- name: version + type: string +- name: isRelease + type: boolean + default: false +- name: isWinML + type: boolean + default: false +- name: prereleaseId + type: string + default: '' +- name: platforms + type: object # list of { name, artifactName } + +steps: +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + $nsRoot = "$(Build.SourcesDirectory)" + Write-Host "##vso[task.setvariable variable=nsRoot]$nsRoot" + +- task: PowerShell@2 + displayName: 'Organize native binaries' + inputs: + targetType: inline + script: | + $unifiedPath = "$(Build.ArtifactStagingDirectory)/unified" + New-Item -ItemType Directory -Path $unifiedPath -Force | Out-Null + + $platformsJson = @' + ${{ convertToJson(parameters.platforms) }} + '@ + $platforms = $platformsJson | ConvertFrom-Json + + foreach ($p in $platforms) { + $srcDir = "$(Pipeline.Workspace)/$($p.artifactName)" + Write-Host "Looking for artifacts at: $srcDir" + if (-not (Test-Path $srcDir)) { + throw "Artifact directory $srcDir does not exist. All platform artifacts must be present to produce a complete NuGet package." + } + $destDir = "$unifiedPath/runtimes/$($p.name)/native" + New-Item -ItemType Directory -Path $destDir -Force | Out-Null + # WinML artifacts include WindowsAppRuntime Bootstrapper DLLs in addition + # to Microsoft.AI.Foundry.Local.Core.*. + $isWinML = "${{ parameters.isWinML }}" -eq "True" + if ($isWinML) { + Get-ChildItem $srcDir -File | + Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" -or $_.Name -eq "Microsoft.WindowsAppRuntime.Bootstrap.dll" } | + Copy-Item -Destination $destDir -Force + } else { + Get-ChildItem $srcDir -File | Where-Object { $_.Name -like "Microsoft.AI.Foundry.Local.Core.*" } | + Copy-Item -Destination $destDir -Force + } + Write-Host "Copied $($p.name) binaries to $destDir" + } + + # Copy build integration files from neutron-server + $nsRoot = "$(nsRoot)" + foreach ($dir in @("build", "buildTransitive")) { + $src = "$nsRoot/src/FoundryLocalCore/Core/$dir" + if (Test-Path $src) { + Copy-Item -Path $src -Destination "$unifiedPath/$dir" -Recurse -Force + } + } + $license = "$nsRoot/src/FoundryLocalCore/Core/LICENSE.txt" + if (Test-Path $license) { + Copy-Item $license "$unifiedPath/LICENSE.txt" -Force + } + +# Compute version +- task: PowerShell@2 + displayName: 'Set FLC package version' + inputs: + targetType: inline + script: | + $v = "${{ parameters.version }}" + $preId = "${{ parameters.prereleaseId }}" + if ($preId -ne '' -and $preId -ne 'none') { + $v = "$v-$preId" + } elseif ("${{ parameters.isRelease }}" -ne "True") { + $ts = Get-Date -Format "yyyyMMddHHmm" + $commitId = "$(Build.SourceVersion)".Substring(0, 8) + $v = "$v-dev-$ts-$commitId" + } + Write-Host "##vso[task.setvariable variable=flcVersion]$v" + Write-Host "FLC version: $v" + +# Pack NuGet +- task: PowerShell@2 + displayName: 'Pack FLC NuGet' + inputs: + targetType: inline + script: | + $nsRoot = "$(nsRoot)" + [xml]$propsXml = Get-Content "$nsRoot/Directory.Packages.props" + $pg = $propsXml.Project.PropertyGroup + + $outDir = "$(Build.ArtifactStagingDirectory)/flc-nuget" + New-Item -ItemType Directory -Path $outDir -Force | Out-Null + + if ("${{ parameters.isWinML }}" -eq "True") { + $nuspec = "$nsRoot/src/FoundryLocalCore/Core/WinMLNuget.nuspec" + $id = "Microsoft.AI.Foundry.Local.Core.WinML" + $ortVer = $pg.OnnxRuntimeFoundryVersionForWinML + $genaiVer = $pg.OnnxRuntimeGenAIWinML + $winAppSdkVer = $pg.WinAppSdkVersion + $props = "id=$id;version=$(flcVersion);commitId=$(Build.SourceVersion);OnnxRuntimeFoundryVersion=$ortVer;OnnxRuntimeGenAIWinML=$genaiVer;WinAppSdkVersion=$winAppSdkVer" + } else { + $nuspec = "$nsRoot/src/FoundryLocalCore/Core/NativeNuget.nuspec" + $id = "Microsoft.AI.Foundry.Local.Core" + $ortVer = $pg.OnnxRuntimeFoundryVersion + $genaiVer = $pg.OnnxRuntimeGenAIFoundryVersion + $props = "id=$id;version=$(flcVersion);commitId=$(Build.SourceVersion);OnnxRuntimeFoundryVersion=$ortVer;OnnxRuntimeGenAIFoundryVersion=$genaiVer" + } + + $nugetArgs = @( + 'pack', $nuspec, + '-OutputDirectory', $outDir, + '-BasePath', "$(Build.ArtifactStagingDirectory)/unified", + '-Properties', $props, + '-Symbols', '-SymbolPackageFormat', 'snupkg' + ) + Write-Host "Running: nuget $($nugetArgs -join ' ')" + & nuget $nugetArgs + if ($LASTEXITCODE -ne 0) { throw "NuGet pack failed" } + +# Sign NuGet package +- task: SFP.build-tasks.custom-build-task-1.EsrpCodeSigning@5 + displayName: 'Sign FLC NuGet package' + inputs: + ConnectedServiceName: 'OnnxrunTimeCodeSign_20240611' + UseMSIAuthentication: true + AppRegistrationClientId: '$(esrpClientId)' + AppRegistrationTenantId: '$(esrpTenantId)' + EsrpClientId: '$(esrpClientId)' + AuthAKVName: '$(esrpAkvName)' + AuthSignCertName: '$(esrpSignCertName)' + FolderPath: '$(Build.ArtifactStagingDirectory)/flc-nuget' + Pattern: '*.nupkg' + SessionTimeout: 90 + ServiceEndpointUrl: 'https://api.esrp.microsoft.com/api/v2' + MaxConcurrency: 25 + signConfigType: inlineSignParams + inlineOperation: | + [{"keyCode":"CP-401405","operationSetCode":"NuGetSign","parameters":[],"toolName":"sign","toolVersion":"6.2.9304.0"},{"keyCode":"CP-401405","operationSetCode":"NuGetVerify","parameters":[],"toolName":"sign","toolVersion":"6.2.9304.0"}] + +# Build Python wheels from the NuGet package +- task: PowerShell@2 + displayName: 'Build foundry_local_core Python Wheels' + inputs: + targetType: inline + script: | + $stagingDir = "$(Build.ArtifactStagingDirectory)/flc-wheels" + New-Item -ItemType Directory -Path $stagingDir -Force | Out-Null + + $isWinML = "${{ parameters.isWinML }}" -eq "True" + + # Find and extract the NuGet package (.nupkg is a zip archive) + $nupkgFilter = if ($isWinML) { "Microsoft.AI.Foundry.Local.Core.WinML*.nupkg" } else { "Microsoft.AI.Foundry.Local.Core*.nupkg" } + $nupkg = Get-ChildItem "$(Build.ArtifactStagingDirectory)/flc-nuget" -Filter $nupkgFilter | Where-Object { $_.Name -notlike "*.snupkg" } | Select-Object -First 1 + if (-not $nupkg) { throw "No FLC .nupkg found matching $nupkgFilter" } + Write-Host "Found NuGet package: $($nupkg.Name)" + + $extractDir = "$(Build.ArtifactStagingDirectory)/flc-extracted" + $nupkgZip = [System.IO.Path]::ChangeExtension($nupkg.FullName, ".zip") + Copy-Item -Path $nupkg.FullName -Destination $nupkgZip -Force + Expand-Archive -Path $nupkgZip -DestinationPath $extractDir -Force + + # Convert NuGet version to PEP 440 + # NuGet: 1.0.0-dev-202603271723-bb400310 → PEP 440: 1.0.0.dev202603271723 + # The commit hash is dropped because .devN requires N to be a pure integer. + $nupkgVersion = $nupkg.BaseName -replace '^Microsoft\.AI\.Foundry\.Local\.Core(\.WinML)?\.', '' + $parts = $nupkgVersion -split '-' + $pyVersion = if ($parts.Count -ge 3 -and $parts[1] -eq 'dev') { "$($parts[0]).dev$($parts[2])" } + elseif ($parts.Count -eq 2) { "$($parts[0])$($parts[1])" } + else { $parts[0] } + Write-Host "Python package version: $pyVersion" + + $packageName = if ($isWinML) { "foundry_local_core_winml" } else { "foundry_local_core" } + + if ($isWinML) { + $platforms = @( + @{rid="win-x64"; pyKey="bin"; tag="win_amd64"}, + @{rid="win-arm64"; pyKey="bin"; tag="win_arm64"} + ) + } else { + $platforms = @( + @{rid="win-x64"; pyKey="bin"; tag="win_amd64"}, + @{rid="win-arm64"; pyKey="bin"; tag="win_arm64"}, + @{rid="linux-x64"; pyKey="bin"; tag="manylinux_2_28_x86_64"}, + @{rid="osx-arm64"; pyKey="bin"; tag="macosx_11_0_arm64"} + ) + } + + foreach ($p in $platforms) { + $nativeSrc = "$extractDir/runtimes/$($p.rid)/native" + if (-not (Test-Path $nativeSrc)) { + Write-Warning "No native binaries found for $($p.rid) — skipping." + continue + } + + $wheelRoot = "$(Build.ArtifactStagingDirectory)/wheels-build/flc_wheel_$($p.tag)" + $pkgDir = "$wheelRoot/$packageName" + New-Item -ItemType Directory -Path "$pkgDir/$($p.pyKey)" -Force | Out-Null + "" | Set-Content -Encoding ascii "$pkgDir/__init__.py" + Get-ChildItem $nativeSrc -File | Copy-Item -Destination "$pkgDir/$($p.pyKey)" + + $normalizedName = $packageName.Replace('_', '-') + $wheelTag = "py3-none-$($p.tag)" + $distInfoName = "$packageName-$pyVersion" + $wheelName = "$distInfoName-$wheelTag.whl" + $distInfoDir = "$wheelRoot/$distInfoName.dist-info" + New-Item -ItemType Directory -Path $distInfoDir -Force | Out-Null + + $utf8NoBom = [System.Text.UTF8Encoding]::new($false) + + [System.IO.File]::WriteAllText("$distInfoDir/WHEEL", + "Wheel-Version: 1.0`nGenerator: custom`nRoot-Is-Purelib: false`nTag: $wheelTag`n", $utf8NoBom) + + [System.IO.File]::WriteAllText("$distInfoDir/METADATA", + "Metadata-Version: 2.1`nName: $normalizedName`nVersion: $pyVersion`n", $utf8NoBom) + + $recordLines = Get-ChildItem $wheelRoot -Recurse -File | ForEach-Object { + $rel = $_.FullName.Substring($wheelRoot.Length + 1).Replace('\', '/') + $raw = (Get-FileHash $_.FullName -Algorithm SHA256).Hash + $bytes = [byte[]]::new($raw.Length / 2) + for ($i = 0; $i -lt $raw.Length; $i += 2) { $bytes[$i/2] = [Convert]::ToByte($raw.Substring($i, 2), 16) } + $b64 = [Convert]::ToBase64String($bytes) -replace '\+','-' -replace '/','_' -replace '=','' + "$rel,sha256=$b64,$($_.Length)" + } + $recordContent = ($recordLines + "$distInfoName.dist-info/RECORD,,") -join "`n" + [System.IO.File]::WriteAllText("$distInfoDir/RECORD", $recordContent, $utf8NoBom) + + $wheelPath = "$stagingDir/$wheelName" + Add-Type -AssemblyName System.IO.Compression.FileSystem + $zip = [System.IO.Compression.ZipFile]::Open($wheelPath, 'Create') + try { + Get-ChildItem $wheelRoot -Recurse -File | ForEach-Object { + $rel = $_.FullName.Substring($wheelRoot.Length + 1).Replace('\', '/') + [System.IO.Compression.ZipFileExtensions]::CreateEntryFromFile($zip, $_.FullName, $rel) | Out-Null + } + } finally { + $zip.Dispose() + } + Write-Host "Created wheel: $wheelName" + } + + Write-Host "`nAll wheels:" + Get-ChildItem $stagingDir -Filter "*.whl" | ForEach-Object { Write-Host " $($_.Name)" } diff --git a/.pipelines/templates/test-cs-steps.yml b/.pipelines/templates/test-cs-steps.yml new file mode 100644 index 00000000..f7dc1aff --- /dev/null +++ b/.pipelines/templates/test-cs-steps.yml @@ -0,0 +1,116 @@ +# Lightweight test-only steps for the C# SDK. +# Builds from source and runs tests — no signing or NuGet packing. +parameters: +- name: version + type: string +- name: isWinML + type: boolean + default: false +- name: flcNugetDir + type: string + displayName: 'Path to directory containing the FLC .nupkg' + +steps: +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + $repoRoot = "$(Build.SourcesDirectory)/Foundry-Local" + $testDataDir = "$(Build.SourcesDirectory)/test-data-shared" + Write-Host "##vso[task.setvariable variable=repoRoot]$repoRoot" + Write-Host "##vso[task.setvariable variable=testDataDir]$testDataDir" + +- task: UseDotNet@2 + displayName: 'Use .NET 9 SDK' + inputs: + packageType: sdk + version: '9.0.x' + +- task: PowerShell@2 + displayName: 'List downloaded FLC artifact' + inputs: + targetType: inline + script: | + Write-Host "Contents of ${{ parameters.flcNugetDir }}:" + Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse | ForEach-Object { Write-Host $_.FullName } + +- ${{ if eq(parameters.isWinML, true) }}: + - task: PowerShell@2 + displayName: 'Install Windows App SDK Runtime' + inputs: + targetType: 'inline' + script: | + $installerUrl = "https://aka.ms/windowsappsdk/1.8/latest/windowsappruntimeinstall-x64.exe" + $installerPath = "$env:TEMP\windowsappruntimeinstall.exe" + + Write-Host "Downloading Windows App SDK Runtime installer from $installerUrl..." + Invoke-WebRequest -Uri $installerUrl -OutFile $installerPath + + Write-Host "Installing Windows App SDK Runtime..." + & $installerPath --quiet --force + + if ($LASTEXITCODE -ne 0) { + Write-Error "Installation failed with exit code $LASTEXITCODE" + exit 1 + } + + Write-Host "Windows App SDK Runtime installed successfully." + errorActionPreference: 'stop' + +- task: PowerShell@2 + displayName: 'Create NuGet.config with local FLC feed' + inputs: + targetType: inline + script: | + $nugetConfig = @" + + + + + + + + + "@ + $nupkg = Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse -Filter "Microsoft.AI.Foundry.Local.Core*.nupkg" -Exclude "*.snupkg" | Select-Object -First 1 + if (-not $nupkg) { throw "No FLC .nupkg found in ${{ parameters.flcNugetDir }}" } + $flcVer = $nupkg.BaseName -replace '^Microsoft\.AI\.Foundry\.Local\.Core(\.WinML)?\.', '' + Write-Host "##vso[task.setvariable variable=resolvedFlcVersion]$flcVer" + + $flcFeedDir = $nupkg.DirectoryName + $nugetConfig = $nugetConfig -replace [regex]::Escape("${{ parameters.flcNugetDir }}"), $flcFeedDir + $configPath = "$(Build.ArtifactStagingDirectory)/NuGet.config" + Set-Content -Path $configPath -Value $nugetConfig + Write-Host "##vso[task.setvariable variable=customNugetConfig]$configPath" + +- task: NuGetAuthenticate@1 + displayName: 'Authenticate NuGet feeds' + +- task: PowerShell@2 + displayName: 'Restore & build tests' + inputs: + targetType: inline + script: | + dotnet restore "$(repoRoot)/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj" ` + --configfile "$(customNugetConfig)" ` + /p:UseWinML=${{ parameters.isWinML }} ` + /p:FoundryLocalCoreVersion=$(resolvedFlcVersion) + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + + dotnet build "$(repoRoot)/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj" ` + --no-restore --configuration Release ` + /p:UseWinML=${{ parameters.isWinML }} + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +- task: PowerShell@2 + displayName: 'Run SDK tests' + inputs: + targetType: inline + script: | + dotnet test "$(repoRoot)/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj" ` + --no-build --configuration Release ` + /p:UseWinML=${{ parameters.isWinML }} + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + env: + TF_BUILD: 'true' diff --git a/.pipelines/templates/test-js-steps.yml b/.pipelines/templates/test-js-steps.yml new file mode 100644 index 00000000..41ef7f62 --- /dev/null +++ b/.pipelines/templates/test-js-steps.yml @@ -0,0 +1,121 @@ +# Lightweight test-only steps for the JS SDK. +# Builds from source and runs tests — no npm pack or artifact staging. +parameters: +- name: version + type: string +- name: isWinML + type: boolean + default: false +- name: flcNugetDir + type: string + displayName: 'Path to directory containing the FLC .nupkg' + +steps: +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + $repoRoot = "$(Build.SourcesDirectory)/Foundry-Local" + $testDataDir = "$(Build.SourcesDirectory)/test-data-shared" + Write-Host "##vso[task.setvariable variable=repoRoot]$repoRoot" + Write-Host "##vso[task.setvariable variable=testDataDir]$testDataDir" + +- ${{ if eq(parameters.isWinML, true) }}: + - task: PowerShell@2 + displayName: 'Install Windows App SDK Runtime' + inputs: + targetType: 'inline' + script: | + $installerUrl = "https://aka.ms/windowsappsdk/1.8/latest/windowsappruntimeinstall-x64.exe" + $installerPath = "$env:TEMP\windowsappruntimeinstall.exe" + + Write-Host "Downloading Windows App SDK Runtime installer from $installerUrl..." + Invoke-WebRequest -Uri $installerUrl -OutFile $installerPath + + Write-Host "Installing Windows App SDK Runtime..." + & $installerPath --quiet --force + + if ($LASTEXITCODE -ne 0) { + Write-Error "Installation failed with exit code $LASTEXITCODE" + exit 1 + } + + Write-Host "Windows App SDK Runtime installed successfully." + errorActionPreference: 'stop' + +- task: PowerShell@2 + displayName: 'List downloaded FLC artifact' + inputs: + targetType: inline + script: | + Write-Host "Contents of ${{ parameters.flcNugetDir }}:" + Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse | ForEach-Object { Write-Host $_.FullName } + +- task: NodeTool@0 + displayName: 'Use Node.js 20' + inputs: + versionSpec: '20.x' + +- task: Npm@1 + displayName: 'npm install' + inputs: + command: custom + workingDir: $(repoRoot)/sdk/js + customCommand: 'install' + +# Overwrite the FLC native binary with the pipeline-built one +- task: PowerShell@2 + displayName: 'Overwrite FLC with pipeline-built binary' + inputs: + targetType: inline + script: | + $os = 'win32' + $arch = if ([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture -eq 'Arm64') { 'arm64' } else { 'x64' } + $platformKey = "$os-$arch" + $rid = if ($arch -eq 'arm64') { 'win-arm64' } else { 'win-x64' } + + if ($IsLinux) { + $os = 'linux' + $platformKey = "$os-$arch" + $rid = "linux-$arch" + } elseif ($IsMacOS) { + $os = 'darwin' + $platformKey = "$os-$arch" + $rid = "osx-$arch" + } + + $nupkg = Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse -Filter "Microsoft.AI.Foundry.Local.Core*.nupkg" -Exclude "*.snupkg" | Select-Object -First 1 + if (-not $nupkg) { throw "No FLC .nupkg found in ${{ parameters.flcNugetDir }}" } + + $extractDir = "$(Build.ArtifactStagingDirectory)/flc-extract" + $zip = [System.IO.Path]::ChangeExtension($nupkg.FullName, ".zip") + Copy-Item $nupkg.FullName $zip -Force + Expand-Archive -Path $zip -DestinationPath $extractDir -Force + + $destDir = "$(repoRoot)/sdk/js/packages/@foundry-local-core/$platformKey" + $nativeDir = "$extractDir/runtimes/$rid/native" + if (Test-Path $nativeDir) { + Get-ChildItem $nativeDir -File | ForEach-Object { + Copy-Item $_.FullName -Destination "$destDir/$($_.Name)" -Force + Write-Host "Overwrote $($_.Name) with pipeline-built version" + } + } else { + Write-Warning "No native binaries found at $nativeDir for RID $rid" + } + +- task: Npm@1 + displayName: 'npm build' + inputs: + command: custom + workingDir: $(repoRoot)/sdk/js + customCommand: 'run build' + +- task: Npm@1 + displayName: 'npm test' + inputs: + command: custom + workingDir: $(repoRoot)/sdk/js + customCommand: 'test' + env: + TF_BUILD: 'true' diff --git a/.pipelines/templates/test-python-steps.yml b/.pipelines/templates/test-python-steps.yml new file mode 100644 index 00000000..1da74ee2 --- /dev/null +++ b/.pipelines/templates/test-python-steps.yml @@ -0,0 +1,132 @@ +# Lightweight test-only steps for the Python SDK. +# Builds from source and runs tests — no artifact staging. +parameters: +- name: version + type: string +- name: isWinML + type: boolean + default: false +- name: flcWheelsDir + type: string + displayName: 'Path to directory containing the FLC wheels' + +steps: +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + $repoRoot = "$(Build.SourcesDirectory)/Foundry-Local" + $testDataDir = "$(Build.SourcesDirectory)/test-data-shared" + Write-Host "##vso[task.setvariable variable=repoRoot]$repoRoot" + Write-Host "##vso[task.setvariable variable=testDataDir]$testDataDir" + +- ${{ if eq(parameters.isWinML, true) }}: + - task: PowerShell@2 + displayName: 'Install Windows App SDK Runtime' + inputs: + targetType: 'inline' + script: | + $installerUrl = "https://aka.ms/windowsappsdk/1.8/latest/windowsappruntimeinstall-x64.exe" + $installerPath = "$env:TEMP\windowsappruntimeinstall.exe" + + Write-Host "Downloading Windows App SDK Runtime installer from $installerUrl..." + Invoke-WebRequest -Uri $installerUrl -OutFile $installerPath + + Write-Host "Installing Windows App SDK Runtime..." + & $installerPath --quiet --force + + if ($LASTEXITCODE -ne 0) { + Write-Error "Installation failed with exit code $LASTEXITCODE" + exit 1 + } + + Write-Host "Windows App SDK Runtime installed successfully." + errorActionPreference: 'stop' + +- task: UsePythonVersion@0 + displayName: 'Use Python 3.12' + inputs: + versionSpec: '3.12' + +- task: PowerShell@2 + displayName: 'List downloaded FLC wheels' + condition: and(succeeded(), ne('${{ parameters.flcWheelsDir }}', '')) + inputs: + targetType: inline + script: | + Write-Host "Contents of ${{ parameters.flcWheelsDir }}:" + Get-ChildItem "${{ parameters.flcWheelsDir }}" -Recurse | ForEach-Object { Write-Host $_.FullName } + +- task: PowerShell@2 + displayName: 'Configure pip for Azure Artifacts' + inputs: + targetType: inline + script: | + pip config set global.index-url https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ + pip config set global.extra-index-url https://pypi.org/simple/ + pip config set global.pre true + +- script: python -m pip install build + displayName: 'Install build tool' + +- task: PowerShell@2 + displayName: 'Set SDK version' + inputs: + targetType: inline + script: | + Set-Content -Path "$(repoRoot)/sdk/python/src/version.py" -Value '__version__ = "${{ parameters.version }}"' + +- task: PowerShell@2 + displayName: 'Pre-install pipeline-built FLC wheel' + condition: and(succeeded(), ne('${{ parameters.flcWheelsDir }}', '')) + inputs: + targetType: inline + script: | + # Determine platform wheel tag for the current machine + $arch = if ([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture -eq 'Arm64') { 'arm64' } else { 'amd64' } + if ($IsLinux) { $platTag = "manylinux*x86_64" } + elseif ($IsMacOS) { $platTag = "macosx*$arch" } + else { $platTag = "win_$arch" } + + $filter = if ("${{ parameters.isWinML }}" -eq "True") { "foundry_local_core_winml*$platTag.whl" } else { "foundry_local_core-*$platTag.whl" } + $wheel = Get-ChildItem "${{ parameters.flcWheelsDir }}" -Recurse -Filter $filter | Select-Object -First 1 + if ($wheel) { + Write-Host "Installing pipeline-built FLC wheel: $($wheel.FullName)" + pip install $($wheel.FullName) + } else { + Write-Warning "No FLC wheel found matching $filter" + } + +- script: pip install onnxruntime-core==1.24.3 onnxruntime-genai-core==0.12.1 + displayName: 'Install ORT native packages' + +- script: pip install "pydantic>=2.0.0" "requests>=2.32.4" "openai>=2.24.0" + displayName: 'Install pure python dependencies' + +- ${{ if not(parameters.isWinML) }}: + - script: python -m build --wheel --outdir dist/ + displayName: 'Build wheel' + workingDirectory: $(repoRoot)/sdk/python + +- ${{ if parameters.isWinML }}: + - script: python -m build --wheel -C winml=true --outdir dist/ + displayName: 'Build wheel (WinML)' + workingDirectory: $(repoRoot)/sdk/python + +- task: PowerShell@2 + displayName: 'Install built wheel' + inputs: + targetType: inline + script: | + $wheel = (Get-ChildItem "$(repoRoot)/sdk/python/dist/*.whl" | Select-Object -First 1).FullName + pip install --no-deps $wheel + +- script: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0 + displayName: 'Install test dependencies' + +- script: python -m pytest test/ -v + displayName: 'Run tests' + workingDirectory: $(repoRoot)/sdk/python + env: + TF_BUILD: 'true' diff --git a/.pipelines/templates/test-rust-steps.yml b/.pipelines/templates/test-rust-steps.yml new file mode 100644 index 00000000..31bfd75e --- /dev/null +++ b/.pipelines/templates/test-rust-steps.yml @@ -0,0 +1,159 @@ +# Lightweight test-only steps for the Rust SDK. +# Builds from source and runs tests — no cargo package or artifact staging. +parameters: +- name: isWinML + type: boolean + default: false +- name: flcNugetDir + type: string + displayName: 'Path to directory containing the FLC .nupkg' + +steps: +- task: PowerShell@2 + displayName: 'Set source paths' + inputs: + targetType: inline + script: | + $repoRoot = "$(Build.SourcesDirectory)/Foundry-Local" + $testDataDir = "$(Build.SourcesDirectory)/test-data-shared" + Write-Host "##vso[task.setvariable variable=repoRoot]$repoRoot" + Write-Host "##vso[task.setvariable variable=testDataDir]$testDataDir" + +- ${{ if eq(parameters.isWinML, true) }}: + - task: PowerShell@2 + displayName: 'Install Windows App SDK Runtime' + inputs: + targetType: 'inline' + script: | + $installerUrl = "https://aka.ms/windowsappsdk/1.8/latest/windowsappruntimeinstall-x64.exe" + $installerPath = "$env:TEMP\windowsappruntimeinstall.exe" + + Write-Host "Downloading Windows App SDK Runtime installer from $installerUrl..." + Invoke-WebRequest -Uri $installerUrl -OutFile $installerPath + + Write-Host "Installing Windows App SDK Runtime..." + & $installerPath --quiet --force + + if ($LASTEXITCODE -ne 0) { + Write-Error "Installation failed with exit code $LASTEXITCODE" + exit 1 + } + + Write-Host "Windows App SDK Runtime installed successfully." + errorActionPreference: 'stop' + +- task: PowerShell@2 + displayName: 'List downloaded FLC artifact' + inputs: + targetType: inline + script: | + Write-Host "Contents of ${{ parameters.flcNugetDir }}:" + Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse | ForEach-Object { Write-Host $_.FullName } + +# Extract FLC native binaries from the pipeline-built .nupkg +- task: PowerShell@2 + displayName: 'Extract FLC native binaries' + inputs: + targetType: inline + script: | + $nupkg = Get-ChildItem "${{ parameters.flcNugetDir }}" -Recurse -Filter "Microsoft.AI.Foundry.Local.Core*.nupkg" -Exclude "*.snupkg" | Select-Object -First 1 + if (-not $nupkg) { throw "No FLC .nupkg found in ${{ parameters.flcNugetDir }}" } + + $extractDir = "$(Build.ArtifactStagingDirectory)/flc-extract-rust" + $zip = [System.IO.Path]::ChangeExtension($nupkg.FullName, ".zip") + Copy-Item $nupkg.FullName $zip -Force + Expand-Archive -Path $zip -DestinationPath $extractDir -Force + + $arch = if ([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture -eq 'Arm64') { 'arm64' } else { 'x64' } + if ($IsLinux) { + $rid = "linux-$arch" + } elseif ($IsMacOS) { + $rid = "osx-$arch" + } else { + $rid = "win-$arch" + } + + $nativeDir = "$extractDir/runtimes/$rid/native" + if (-not (Test-Path $nativeDir)) { throw "No native binaries found at $nativeDir for RID $rid" } + + $flcNativeDir = "$(Build.ArtifactStagingDirectory)/flc-native-rust" + New-Item -ItemType Directory -Path $flcNativeDir -Force | Out-Null + Get-ChildItem $nativeDir -File | Copy-Item -Destination $flcNativeDir -Force + Write-Host "##vso[task.setvariable variable=flcNativeDir]$flcNativeDir" + Write-Host "Extracted FLC native binaries for $rid" + +- task: PowerShell@2 + displayName: 'Install Rust toolchain' + inputs: + targetType: inline + script: | + if ($IsWindows -or (-not $IsLinux -and -not $IsMacOS)) { + Invoke-WebRequest -Uri https://win.rustup.rs/x86_64 -OutFile rustup-init.exe + .\rustup-init.exe -y --default-toolchain stable --profile minimal -c clippy,rustfmt + Remove-Item rustup-init.exe + $cargoPath = "$env:USERPROFILE\.cargo\bin" + } else { + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal -c clippy,rustfmt + $cargoPath = "$env:HOME/.cargo/bin" + } + Write-Host "##vso[task.prependpath]$cargoPath" + +- task: PowerShell@2 + displayName: 'Use crates.io directly' + inputs: + targetType: inline + script: | + $configPath = "$(repoRoot)/sdk/rust/.cargo/config.toml" + if (Test-Path $configPath) { + Remove-Item $configPath + Write-Host "Removed .cargo/config.toml crates-io redirect" + } + +- task: PowerShell@2 + displayName: 'Build' + inputs: + targetType: inline + script: | + Set-Location "$(repoRoot)/sdk/rust" + $features = if ("${{ parameters.isWinML }}" -eq "True") { "--features winml" } else { "" } + Invoke-Expression "cargo build $features" + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +# Overwrite FLC binary with pipeline-built version +- task: PowerShell@2 + displayName: 'Overwrite FLC binary with pipeline-built version' + inputs: + targetType: inline + script: | + $outDir = Get-ChildItem "$(repoRoot)/sdk/rust/target/debug/build" -Directory -Filter "foundry-local-sdk-*" -Recurse | + Where-Object { Test-Path "$($_.FullName)/out" } | + ForEach-Object { "$($_.FullName)/out" } | + Select-Object -First 1 + if (-not $outDir) { throw "Could not find cargo OUT_DIR for foundry-local-sdk" } + + Get-ChildItem "$(flcNativeDir)" -File -Filter "Microsoft.AI.Foundry.Local.Core.*" | ForEach-Object { + Copy-Item $_.FullName -Destination "$outDir/$($_.Name)" -Force + Write-Host "Overwrote $($_.Name) with pipeline-built version" + } + +- task: PowerShell@2 + displayName: 'Run unit tests' + inputs: + targetType: inline + script: | + Set-Location "$(repoRoot)/sdk/rust" + $features = if ("${{ parameters.isWinML }}" -eq "True") { "--features winml" } else { "" } + Invoke-Expression "cargo test --lib $features" + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + +- task: PowerShell@2 + displayName: 'Run integration tests' + inputs: + targetType: inline + script: | + Set-Location "$(repoRoot)/sdk/rust" + $features = if ("${{ parameters.isWinML }}" -eq "True") { "--features winml" } else { "" } + Invoke-Expression "cargo test --tests $features -- --include-ignored --test-threads=1 --nocapture" + if ($LASTEXITCODE -ne 0) { exit $LASTEXITCODE } + env: + TF_BUILD: 'true' diff --git a/README.md b/README.md index 14c53229..07bc9b4d 100644 --- a/README.md +++ b/README.md @@ -232,9 +232,9 @@ const result = await audioClient.transcribe('recording.wav'); console.log('Transcription:', result.text); // Or stream in real-time -await audioClient.transcribeStreaming('recording.wav', (chunk) => { +for await (const chunk of audioClient.transcribeStreaming('recording.wav')) { process.stdout.write(chunk.text); -}); +} await whisperModel.unload(); ``` diff --git a/samples/cs/GettingStarted/Directory.Packages.props b/samples/cs/Directory.Packages.props similarity index 68% rename from samples/cs/GettingStarted/Directory.Packages.props rename to samples/cs/Directory.Packages.props index 2d91a9fe..21384b45 100644 --- a/samples/cs/GettingStarted/Directory.Packages.props +++ b/samples/cs/Directory.Packages.props @@ -5,9 +5,12 @@ 1.23.2 - - + + + + + diff --git a/samples/cs/GettingStarted/README.md b/samples/cs/GettingStarted/README.md deleted file mode 100644 index afe6e88d..00000000 --- a/samples/cs/GettingStarted/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# 🚀 Getting started with the Foundry Local C# SDK - -There are two NuGet packages for the Foundry Local SDK - a WinML and a cross-platform package - that have *exactly* the same API surface but are optimised for different platforms: - -- **Windows**: Uses the `Microsoft.AI.Foundry.Local.WinML` package that is specific to Windows applications. The WinML package uses Windows Machine Learning to deliver optimal performance and user experience on Windows devices. -- **Cross-Platform**: Use the `Microsoft.AI.Foundry.Local` package that can be used for cross-platform applications (Windows, Linux, macOS). - -> [!TIP] -> Whilst you can use either package on Windows, we recommend using the WinML package for Windows applications to take advantage of the Windows ML framework for optimal performance and user experience. Your end users will benefit with: -> - a wider range of hardware acceleration options that are automatically managed by Windows ML. -> - a smaller application package size because downloading hardware-specific libraries occurs at application runtime rather than bundled with your application. - -Both the WinML and cross-platform packages provide the same APIs, so you can easily switch between the two packages if you need to target multiple platforms. The samples include the following projects: - -- **HelloFoundryLocalSdk**: A simple console application that initializes the Foundry Local SDK, downloads a model, loads it and does chat completions. -- **FoundryLocalWebServer**: A simple console application that shows how to set up a local OpenAI-compliant web server using the Foundry Local SDK. -- **AudioTranscriptionExample**: A simple console application that demonstrates how to use the Foundry Local SDK for audio transcription tasks. -- **ModelManagementExample**: A simple console application that demonstrates how to manage models - such as variant selection and updates - using the Foundry Local SDK. -- **ToolCallingFoundryLocalSdk**: A simple console application that initializes the Foundry Local SDK, downloads a model, loads it and does tool calling with chat completions. -- **ToolCallingFoundryLocalWebServer**: A simple console application that shows how to set up a local OpenAI-compliant web server with tool calling using the Foundry Local SDK. - -## Running the samples - -1. Clone the Foundry Local repository from GitHub. - ```bash - git clone https://github.com/microsoft/Foundry-Local.git - ``` -2. Open and run the samples. - - **Windows:** - 1. Open the `Foundry-Local/samples/cs/GettingStarted/windows/FoundrySamplesWinML.sln` solution in Visual Studio or your preferred IDE. - 1. If you're using Visual Studio, run any of the sample projects (e.g., `HelloFoundryLocalSdk`) by selecting the project in the Solution Explorer and selecting the **Start** button (or pressing **F5**). - - Alternatively, you can run the projects using the .NET CLI. For x64 (update the `` as needed): - ```bash - cd Foundry-Local/samples/cs/GettingStarted/windows - dotnet run --project /.csproj -r:win-x64 - ``` - or for ARM64: - ```bash - ```bash - cd Foundry-Local/samples/cs/GettingStarted/windows - dotnet run --project /.csproj -r:win-arm64 - ``` - - - **macOS or Linux:** - 1. Open the `Foundry-Local/samples/cs/GettingStarted/cross-platform/FoundrySamplesXPlatform.sln` solution in Visual Studio Code or your preferred IDE. - 1. Run the project using the .NET CLI (update the `` and `` as needed): - ```bash - cd Foundry-Local/samples/cs/GettingStarted/cross-platform - dotnet run --project /.csproj -r: - ``` - For example, to run the `HelloFoundryLocalSdk` project on macOS (Apple Silicon), use the following command: - - ```bash - cd Foundry-Local/samples/cs/GettingStarted/cross-platform - dotnet run --project HelloFoundryLocalSdk/HelloFoundryLocalSdk.csproj -r:osx-arm64 - ``` - - diff --git a/samples/cs/GettingStarted/cross-platform/AudioTranscriptionExample/AudioTranscriptionExample.csproj b/samples/cs/GettingStarted/cross-platform/AudioTranscriptionExample/AudioTranscriptionExample.csproj deleted file mode 100644 index 02eefb31..00000000 --- a/samples/cs/GettingStarted/cross-platform/AudioTranscriptionExample/AudioTranscriptionExample.csproj +++ /dev/null @@ -1,39 +0,0 @@ - - - - Exe - net9.0 - enable - enable - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - - - - - - - - - - PreserveNewest - - - - - diff --git a/samples/cs/GettingStarted/cross-platform/FoundryLocalWebServer/FoundryLocalWebServer.csproj b/samples/cs/GettingStarted/cross-platform/FoundryLocalWebServer/FoundryLocalWebServer.csproj deleted file mode 100644 index 672e8726..00000000 --- a/samples/cs/GettingStarted/cross-platform/FoundryLocalWebServer/FoundryLocalWebServer.csproj +++ /dev/null @@ -1,33 +0,0 @@ - - - - Exe - net9.0 - enable - enable - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - - - - - - - - - diff --git a/samples/cs/GettingStarted/cross-platform/FoundrySamplesXPlatform.sln b/samples/cs/GettingStarted/cross-platform/FoundrySamplesXPlatform.sln deleted file mode 100644 index a51c62d6..00000000 --- a/samples/cs/GettingStarted/cross-platform/FoundrySamplesXPlatform.sln +++ /dev/null @@ -1,53 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.14.36705.20 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HelloFoundryLocalSdk", "HelloFoundryLocalSdk\HelloFoundryLocalSdk.csproj", "{785AAE8A-8CD6-4916-B858-29B8A7EF8FF2}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToolCallingFoundryLocalSdk", "ToolCallingFoundryLocalSdk\ToolCallingFoundryLocalSdk.csproj", "{2F99B88E-BE58-4ED6-A71E-60B6EE955D1B}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{8EC462FD-D22E-90A8-E5CE-7E832BA40C5D}" - ProjectSection(SolutionItems) = preProject - ..\Directory.Packages.props = ..\Directory.Packages.props - ..\nuget.config = ..\nuget.config - EndProjectSection -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FoundryLocalWebServer", "FoundryLocalWebServer\FoundryLocalWebServer.csproj", "{D1D6C453-3088-4D8D-B320-24D718601C26}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToolCallingFoundryLocalWebServer", "ToolCallingFoundryLocalWebServer\ToolCallingFoundryLocalWebServer.csproj", "{B59762E0-B699-4F80-B2B6-8BC5751A4620}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AudioTranscriptionExample", "AudioTranscriptionExample\AudioTranscriptionExample.csproj", "{2FAD8210-8AEB-4063-9C61-57B7AD26772D}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ModelManagementExample", "ModelManagementExample\ModelManagementExample.csproj", "{AAD0233C-9FDD-46A7-9428-2F72BC76D38E}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {785AAE8A-8CD6-4916-B858-29B8A7EF8FF2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {785AAE8A-8CD6-4916-B858-29B8A7EF8FF2}.Debug|Any CPU.Build.0 = Debug|Any CPU - {785AAE8A-8CD6-4916-B858-29B8A7EF8FF2}.Release|Any CPU.ActiveCfg = Release|Any CPU - {785AAE8A-8CD6-4916-B858-29B8A7EF8FF2}.Release|Any CPU.Build.0 = Release|Any CPU - {D1D6C453-3088-4D8D-B320-24D718601C26}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {D1D6C453-3088-4D8D-B320-24D718601C26}.Debug|Any CPU.Build.0 = Debug|Any CPU - {D1D6C453-3088-4D8D-B320-24D718601C26}.Release|Any CPU.ActiveCfg = Release|Any CPU - {D1D6C453-3088-4D8D-B320-24D718601C26}.Release|Any CPU.Build.0 = Release|Any CPU - {2FAD8210-8AEB-4063-9C61-57B7AD26772D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {2FAD8210-8AEB-4063-9C61-57B7AD26772D}.Debug|Any CPU.Build.0 = Debug|Any CPU - {2FAD8210-8AEB-4063-9C61-57B7AD26772D}.Release|Any CPU.ActiveCfg = Release|Any CPU - {2FAD8210-8AEB-4063-9C61-57B7AD26772D}.Release|Any CPU.Build.0 = Release|Any CPU - {AAD0233C-9FDD-46A7-9428-2F72BC76D38E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {AAD0233C-9FDD-46A7-9428-2F72BC76D38E}.Debug|Any CPU.Build.0 = Debug|Any CPU - {AAD0233C-9FDD-46A7-9428-2F72BC76D38E}.Release|Any CPU.ActiveCfg = Release|Any CPU - {AAD0233C-9FDD-46A7-9428-2F72BC76D38E}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {9FC1F302-B28C-4CAB-8ABA-24FA9EBBED6F} - EndGlobalSection -EndGlobal diff --git a/samples/cs/GettingStarted/cross-platform/HelloFoundryLocalSdk/HelloFoundryLocalSdk.csproj b/samples/cs/GettingStarted/cross-platform/HelloFoundryLocalSdk/HelloFoundryLocalSdk.csproj deleted file mode 100644 index bb8df514..00000000 --- a/samples/cs/GettingStarted/cross-platform/HelloFoundryLocalSdk/HelloFoundryLocalSdk.csproj +++ /dev/null @@ -1,32 +0,0 @@ - - - - Exe - net9.0 - enable - enable - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - - - - - - - - diff --git a/samples/cs/GettingStarted/cross-platform/ModelManagementExample/ModelManagementExample.csproj b/samples/cs/GettingStarted/cross-platform/ModelManagementExample/ModelManagementExample.csproj deleted file mode 100644 index 70af7023..00000000 --- a/samples/cs/GettingStarted/cross-platform/ModelManagementExample/ModelManagementExample.csproj +++ /dev/null @@ -1,33 +0,0 @@ - - - - Exe - net9.0 - enable - enable - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - - - - - - - - - diff --git a/samples/cs/GettingStarted/cross-platform/ToolCallingFoundryLocalSdk/ToolCallingFoundryLocalSdk.csproj b/samples/cs/GettingStarted/cross-platform/ToolCallingFoundryLocalSdk/ToolCallingFoundryLocalSdk.csproj deleted file mode 100644 index aa2b5400..00000000 --- a/samples/cs/GettingStarted/cross-platform/ToolCallingFoundryLocalSdk/ToolCallingFoundryLocalSdk.csproj +++ /dev/null @@ -1,31 +0,0 @@ - - - - Exe - net9.0 - enable - enable - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - - - - - - - diff --git a/samples/cs/GettingStarted/cross-platform/ToolCallingFoundryLocalWebServer/ToolCallingFoundryLocalWebServer.csproj b/samples/cs/GettingStarted/cross-platform/ToolCallingFoundryLocalWebServer/ToolCallingFoundryLocalWebServer.csproj deleted file mode 100644 index dcaeb80d..00000000 --- a/samples/cs/GettingStarted/cross-platform/ToolCallingFoundryLocalWebServer/ToolCallingFoundryLocalWebServer.csproj +++ /dev/null @@ -1,32 +0,0 @@ - - - - Exe - net9.0 - enable - enable - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - - - - - - - - diff --git a/samples/cs/GettingStarted/nuget.config b/samples/cs/GettingStarted/nuget.config deleted file mode 100644 index 5cf1e78e..00000000 --- a/samples/cs/GettingStarted/nuget.config +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/samples/cs/GettingStarted/windows/AudioTranscriptionExample/AudioTranscriptionExample.csproj b/samples/cs/GettingStarted/windows/AudioTranscriptionExample/AudioTranscriptionExample.csproj deleted file mode 100644 index 98219697..00000000 --- a/samples/cs/GettingStarted/windows/AudioTranscriptionExample/AudioTranscriptionExample.csproj +++ /dev/null @@ -1,36 +0,0 @@ - - - - Exe - enable - enable - - net9.0-windows10.0.26100 - false - ARM64;x64 - None - false - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - - - PreserveNewest - - - - \ No newline at end of file diff --git a/samples/cs/GettingStarted/windows/FoundryLocalWebServer/FoundryLocalWebServer.csproj b/samples/cs/GettingStarted/windows/FoundryLocalWebServer/FoundryLocalWebServer.csproj deleted file mode 100644 index f08a2b4a..00000000 --- a/samples/cs/GettingStarted/windows/FoundryLocalWebServer/FoundryLocalWebServer.csproj +++ /dev/null @@ -1,30 +0,0 @@ - - - - Exe - enable - enable - - net9.0-windows10.0.26100 - false - ARM64;x64 - None - false - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - \ No newline at end of file diff --git a/samples/cs/GettingStarted/windows/FoundrySamplesWinML.sln b/samples/cs/GettingStarted/windows/FoundrySamplesWinML.sln deleted file mode 100644 index 10a0d851..00000000 --- a/samples/cs/GettingStarted/windows/FoundrySamplesWinML.sln +++ /dev/null @@ -1,71 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.14.36705.20 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HelloFoundryLocalSdk", "HelloFoundryLocalSdk\HelloFoundryLocalSdk.csproj", "{72ABF21E-2BFD-412A-9039-A594B392F00C}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToolCallingFoundryLocalSdk", "ToolCallingFoundryLocalSdk\ToolCallingFoundryLocalSdk.csproj", "{93C21DF0-17D5-4927-9507-C10A79359E7D}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FoundryLocalWebServer", "FoundryLocalWebServer\FoundryLocalWebServer.csproj", "{77026F3A-25E0-40AB-B941-2A6252E13A35}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToolCallingFoundryLocalWebServer", "ToolCallingFoundryLocalWebServer\ToolCallingFoundryLocalWebServer.csproj", "{5A8536E2-04B6-4F06-80B1-1018069DF73F}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AudioTranscriptionExample", "AudioTranscriptionExample\AudioTranscriptionExample.csproj", "{80F60523-40E1-4743-A256-974B21A9C6AB}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{8EC462FD-D22E-90A8-E5CE-7E832BA40C5D}" - ProjectSection(SolutionItems) = preProject - ..\Directory.Packages.props = ..\Directory.Packages.props - ..\nuget.config = ..\nuget.config - EndProjectSection -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ModelManagementExample", "ModelManagementExample\ModelManagementExample.csproj", "{6BBA4217-6798-4629-AF27-6526FCC5FA5B}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|ARM64 = Debug|ARM64 - Debug|x64 = Debug|x64 - Release|ARM64 = Release|ARM64 - Release|x64 = Release|x64 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {72ABF21E-2BFD-412A-9039-A594B392F00C}.Debug|ARM64.ActiveCfg = Debug|ARM64 - {72ABF21E-2BFD-412A-9039-A594B392F00C}.Debug|ARM64.Build.0 = Debug|ARM64 - {72ABF21E-2BFD-412A-9039-A594B392F00C}.Debug|x64.ActiveCfg = Debug|x64 - {72ABF21E-2BFD-412A-9039-A594B392F00C}.Debug|x64.Build.0 = Debug|x64 - {72ABF21E-2BFD-412A-9039-A594B392F00C}.Release|ARM64.ActiveCfg = Release|ARM64 - {72ABF21E-2BFD-412A-9039-A594B392F00C}.Release|ARM64.Build.0 = Release|ARM64 - {72ABF21E-2BFD-412A-9039-A594B392F00C}.Release|x64.ActiveCfg = Release|x64 - {72ABF21E-2BFD-412A-9039-A594B392F00C}.Release|x64.Build.0 = Release|x64 - {77026F3A-25E0-40AB-B941-2A6252E13A35}.Debug|ARM64.ActiveCfg = Debug|ARM64 - {77026F3A-25E0-40AB-B941-2A6252E13A35}.Debug|ARM64.Build.0 = Debug|ARM64 - {77026F3A-25E0-40AB-B941-2A6252E13A35}.Debug|x64.ActiveCfg = Debug|x64 - {77026F3A-25E0-40AB-B941-2A6252E13A35}.Debug|x64.Build.0 = Debug|x64 - {77026F3A-25E0-40AB-B941-2A6252E13A35}.Release|ARM64.ActiveCfg = Release|ARM64 - {77026F3A-25E0-40AB-B941-2A6252E13A35}.Release|ARM64.Build.0 = Release|ARM64 - {77026F3A-25E0-40AB-B941-2A6252E13A35}.Release|x64.ActiveCfg = Release|x64 - {77026F3A-25E0-40AB-B941-2A6252E13A35}.Release|x64.Build.0 = Release|x64 - {80F60523-40E1-4743-A256-974B21A9C6AB}.Debug|ARM64.ActiveCfg = Debug|ARM64 - {80F60523-40E1-4743-A256-974B21A9C6AB}.Debug|ARM64.Build.0 = Debug|ARM64 - {80F60523-40E1-4743-A256-974B21A9C6AB}.Debug|x64.ActiveCfg = Debug|x64 - {80F60523-40E1-4743-A256-974B21A9C6AB}.Debug|x64.Build.0 = Debug|x64 - {80F60523-40E1-4743-A256-974B21A9C6AB}.Release|ARM64.ActiveCfg = Release|ARM64 - {80F60523-40E1-4743-A256-974B21A9C6AB}.Release|ARM64.Build.0 = Release|ARM64 - {80F60523-40E1-4743-A256-974B21A9C6AB}.Release|x64.ActiveCfg = Release|x64 - {80F60523-40E1-4743-A256-974B21A9C6AB}.Release|x64.Build.0 = Release|x64 - {6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Debug|ARM64.ActiveCfg = Debug|Any CPU - {6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Debug|ARM64.Build.0 = Debug|Any CPU - {6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Debug|x64.ActiveCfg = Debug|x64 - {6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Debug|x64.Build.0 = Debug|x64 - {6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Release|ARM64.ActiveCfg = Release|Any CPU - {6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Release|ARM64.Build.0 = Release|Any CPU - {6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Release|x64.ActiveCfg = Release|x64 - {6BBA4217-6798-4629-AF27-6526FCC5FA5B}.Release|x64.Build.0 = Release|x64 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {17462B72-2BD9-446A-8E57-E313251686D9} - EndGlobalSection -EndGlobal diff --git a/samples/cs/GettingStarted/windows/HelloFoundryLocalSdk/HelloFoundryLocalSdk.csproj b/samples/cs/GettingStarted/windows/HelloFoundryLocalSdk/HelloFoundryLocalSdk.csproj deleted file mode 100644 index 23d2ee91..00000000 --- a/samples/cs/GettingStarted/windows/HelloFoundryLocalSdk/HelloFoundryLocalSdk.csproj +++ /dev/null @@ -1,30 +0,0 @@ - - - - Exe - enable - enable - - net9.0-windows10.0.26100 - false - ARM64;x64 - None - false - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - \ No newline at end of file diff --git a/samples/cs/GettingStarted/windows/ModelManagementExample/ModelManagementExample.csproj b/samples/cs/GettingStarted/windows/ModelManagementExample/ModelManagementExample.csproj deleted file mode 100644 index bc4afe67..00000000 --- a/samples/cs/GettingStarted/windows/ModelManagementExample/ModelManagementExample.csproj +++ /dev/null @@ -1,30 +0,0 @@ - - - - Exe - enable - enable - - net9.0-windows10.0.26100 - false - ARM64;x64 - None - false - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - \ No newline at end of file diff --git a/samples/cs/GettingStarted/windows/ToolCallingFoundryLocalSdk/ToolCallingFoundryLocalSdk.csproj b/samples/cs/GettingStarted/windows/ToolCallingFoundryLocalSdk/ToolCallingFoundryLocalSdk.csproj deleted file mode 100644 index de209c13..00000000 --- a/samples/cs/GettingStarted/windows/ToolCallingFoundryLocalSdk/ToolCallingFoundryLocalSdk.csproj +++ /dev/null @@ -1,30 +0,0 @@ - - - - Exe - enable - enable - - net9.0-windows10.0.26100 - false - ARM64;x64 - None - false - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - \ No newline at end of file diff --git a/samples/cs/GettingStarted/windows/ToolCallingFoundryLocalWebServer/ToolCallingFoundryLocalWebServer.csproj b/samples/cs/GettingStarted/windows/ToolCallingFoundryLocalWebServer/ToolCallingFoundryLocalWebServer.csproj deleted file mode 100644 index 9101d778..00000000 --- a/samples/cs/GettingStarted/windows/ToolCallingFoundryLocalWebServer/ToolCallingFoundryLocalWebServer.csproj +++ /dev/null @@ -1,30 +0,0 @@ - - - - Exe - enable - enable - - net9.0-windows10.0.26100 - false - ARM64;x64 - None - false - - - - $(NETCoreSdkRuntimeIdentifier) - - - - - - - - - - - - - - \ No newline at end of file diff --git a/samples/cs/README.md b/samples/cs/README.md new file mode 100644 index 00000000..1847bb8e --- /dev/null +++ b/samples/cs/README.md @@ -0,0 +1,43 @@ +# 🚀 Foundry Local C# Samples + +These samples demonstrate how to use the Foundry Local C# SDK. Each sample uses a **unified project file** that automatically detects your operating system and selects the optimal NuGet package: + +- **Windows**: Uses `Microsoft.AI.Foundry.Local.WinML` for hardware acceleration via Windows ML. +- **macOS / Linux**: Uses `Microsoft.AI.Foundry.Local` for cross-platform support. + +Both packages provide the same APIs, so the same source code works on all platforms. + +## Samples + +| Sample | Description | +|---|---| +| [native-chat-completions](native-chat-completions/) | Initialize the SDK, download a model, and run chat completions. | +| [audio-transcription-example](audio-transcription-example/) | Transcribe audio files using the Foundry Local SDK. | +| [foundry-local-web-server](foundry-local-web-server/) | Set up a local OpenAI-compliant web server. | +| [tool-calling-foundry-local-sdk](tool-calling-foundry-local-sdk/) | Use tool calling with native chat completions. | +| [tool-calling-foundry-local-web-server](tool-calling-foundry-local-web-server/) | Use tool calling with the local web server. | +| [model-management-example](model-management-example/) | Manage models, variant selection, and updates. | +| [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive chat assistant (tutorial). | +| [tutorial-document-summarizer](tutorial-document-summarizer/) | Summarize documents with AI (tutorial). | +| [tutorial-tool-calling](tutorial-tool-calling/) | Create a tool-calling assistant (tutorial). | +| [tutorial-voice-to-text](tutorial-voice-to-text/) | Transcribe and summarize audio (tutorial). | + +## Running a sample + +1. Clone the repository: + ```bash + git clone https://github.com/microsoft/Foundry-Local.git + cd Foundry-Local/samples/cs + ``` + +2. Open and run a sample: + ```bash + cd native-chat-completions + dotnet run + ``` + + The unified project file automatically selects the correct SDK package for your platform. + +> [!TIP] +> On Windows, we recommend using the WinML package (selected automatically) for optimal performance. Your users benefit from a wider range of hardware acceleration options and a smaller application package size. + diff --git a/samples/cs/GettingStarted/src/Shared/Utils.cs b/samples/cs/Shared/Utils.cs similarity index 100% rename from samples/cs/GettingStarted/src/Shared/Utils.cs rename to samples/cs/Shared/Utils.cs diff --git a/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj b/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj new file mode 100644 index 00000000..bd42e38b --- /dev/null +++ b/samples/cs/audio-transcription-example/AudioTranscriptionExample.csproj @@ -0,0 +1,55 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + PreserveNewest + + + + + + + + + diff --git a/samples/cs/audio-transcription-example/AudioTranscriptionExample.sln b/samples/cs/audio-transcription-example/AudioTranscriptionExample.sln new file mode 100644 index 00000000..46fb73d9 --- /dev/null +++ b/samples/cs/audio-transcription-example/AudioTranscriptionExample.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AudioTranscriptionExample", "AudioTranscriptionExample.csproj", "{11616852-BB4F-4B60-9FAC-D94E2688BB30}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|Any CPU.Build.0 = Debug|ARM64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|x64.ActiveCfg = Debug|x64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|x64.Build.0 = Debug|x64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|x86.ActiveCfg = Debug|ARM64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Debug|x86.Build.0 = Debug|ARM64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|Any CPU.ActiveCfg = Release|ARM64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|Any CPU.Build.0 = Release|ARM64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|x64.ActiveCfg = Release|x64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|x64.Build.0 = Release|x64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|x86.ActiveCfg = Release|ARM64 + {11616852-BB4F-4B60-9FAC-D94E2688BB30}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs b/samples/cs/audio-transcription-example/Program.cs similarity index 76% rename from samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs rename to samples/cs/audio-transcription-example/Program.cs index be1db5db..ac5689c1 100644 --- a/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs +++ b/samples/cs/audio-transcription-example/Program.cs @@ -1,5 +1,9 @@ -using Microsoft.AI.Foundry.Local; +// +// +using Microsoft.AI.Foundry.Local; +// +// var config = new Configuration { AppName = "foundry_local_samples", @@ -16,9 +20,11 @@ // EP packages include dependencies and may be large. // Download is only required again if a new version of the EP is released. // For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); +// +// // Get the model catalog var catalog = await mgr.GetCatalogAsync(); @@ -44,15 +50,17 @@ await model.DownloadAsync(progress => Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); Console.WriteLine("done."); +// -// Get a chat client +// +// Get an audio client var audioClient = await model.GetAudioClientAsync(); - +audioClient.Settings.Language = "en"; // Get a transcription with streaming outputs -Console.WriteLine("Transcribing audio with streaming output:"); -var audioFile = Path.Combine(AppContext.BaseDirectory, "Recording.mp3"); +var audioFile = args.Length > 0 ? args[0] : Path.Combine(AppContext.BaseDirectory, "Recording.mp3"); +Console.WriteLine($"Transcribing audio with streaming output: {Path.GetFileName(audioFile)}"); var response = audioClient.TranscribeAudioStreamingAsync(audioFile, CancellationToken.None); await foreach (var chunk in response) { @@ -61,7 +69,11 @@ await model.DownloadAsync(progress => } Console.WriteLine(); +// +// // Tidy up - unload the model -await model.UnloadAsync(); \ No newline at end of file +await model.UnloadAsync(); +// +// \ No newline at end of file diff --git a/samples/cs/GettingStarted/src/AudioTranscriptionExample/Recording.mp3 b/samples/cs/audio-transcription-example/Recording.mp3 similarity index 100% rename from samples/cs/GettingStarted/src/AudioTranscriptionExample/Recording.mp3 rename to samples/cs/audio-transcription-example/Recording.mp3 diff --git a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj new file mode 100644 index 00000000..fe890be2 --- /dev/null +++ b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.csproj @@ -0,0 +1,52 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/foundry-local-web-server/FoundryLocalWebServer.sln b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.sln new file mode 100644 index 00000000..91d7e953 --- /dev/null +++ b/samples/cs/foundry-local-web-server/FoundryLocalWebServer.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FoundryLocalWebServer", "FoundryLocalWebServer.csproj", "{2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|Any CPU.Build.0 = Debug|ARM64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|x64.ActiveCfg = Debug|x64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|x64.Build.0 = Debug|x64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|x86.ActiveCfg = Debug|ARM64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Debug|x86.Build.0 = Debug|ARM64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|Any CPU.ActiveCfg = Release|ARM64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|Any CPU.Build.0 = Release|ARM64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|x64.ActiveCfg = Release|x64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|x64.Build.0 = Release|x64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|x86.ActiveCfg = Release|ARM64 + {2DEC84E5-8530-45AF-B26D-EC78A6A7D6E7}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs b/samples/cs/foundry-local-web-server/Program.cs similarity index 89% rename from samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs rename to samples/cs/foundry-local-web-server/Program.cs index f50ac1b0..9225ad7d 100644 --- a/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs +++ b/samples/cs/foundry-local-web-server/Program.cs @@ -1,7 +1,11 @@ -using Microsoft.AI.Foundry.Local; +// +// +using Microsoft.AI.Foundry.Local; using OpenAI; using System.ClientModel; +// +// var config = new Configuration { AppName = "foundry_local_samples", @@ -22,9 +26,11 @@ // EP packages include dependencies and may be large. // Download is only required again if a new version of the EP is released. // For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); +// +// // Get the model catalog var catalog = await mgr.GetCatalogAsync(); @@ -46,8 +52,10 @@ await model.DownloadAsync(progress => Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); Console.WriteLine("done."); +// +// // Start the web service Console.Write($"Starting web service on {config.Web.Urls}..."); await mgr.StartWebServiceAsync(); @@ -79,4 +87,6 @@ await model.DownloadAsync(progress => // Tidy up // Stop the web service and unload model await mgr.StopWebServiceAsync(); -await model.UnloadAsync(); \ No newline at end of file +await model.UnloadAsync(); +// +// \ No newline at end of file diff --git a/samples/cs/model-management-example/ModelManagementExample.csproj b/samples/cs/model-management-example/ModelManagementExample.csproj new file mode 100644 index 00000000..4d948c56 --- /dev/null +++ b/samples/cs/model-management-example/ModelManagementExample.csproj @@ -0,0 +1,48 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/model-management-example/ModelManagementExample.sln b/samples/cs/model-management-example/ModelManagementExample.sln new file mode 100644 index 00000000..f255391b --- /dev/null +++ b/samples/cs/model-management-example/ModelManagementExample.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ModelManagementExample", "ModelManagementExample.csproj", "{9316B939-946C-4956-A4E7-9410017FD319}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9316B939-946C-4956-A4E7-9410017FD319}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {9316B939-946C-4956-A4E7-9410017FD319}.Debug|Any CPU.Build.0 = Debug|ARM64 + {9316B939-946C-4956-A4E7-9410017FD319}.Debug|x64.ActiveCfg = Debug|x64 + {9316B939-946C-4956-A4E7-9410017FD319}.Debug|x64.Build.0 = Debug|x64 + {9316B939-946C-4956-A4E7-9410017FD319}.Debug|x86.ActiveCfg = Debug|ARM64 + {9316B939-946C-4956-A4E7-9410017FD319}.Debug|x86.Build.0 = Debug|ARM64 + {9316B939-946C-4956-A4E7-9410017FD319}.Release|Any CPU.ActiveCfg = Release|ARM64 + {9316B939-946C-4956-A4E7-9410017FD319}.Release|Any CPU.Build.0 = Release|ARM64 + {9316B939-946C-4956-A4E7-9410017FD319}.Release|x64.ActiveCfg = Release|x64 + {9316B939-946C-4956-A4E7-9410017FD319}.Release|x64.Build.0 = Release|x64 + {9316B939-946C-4956-A4E7-9410017FD319}.Release|x86.ActiveCfg = Release|ARM64 + {9316B939-946C-4956-A4E7-9410017FD319}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs b/samples/cs/model-management-example/Program.cs similarity index 80% rename from samples/cs/GettingStarted/src/ModelManagementExample/Program.cs rename to samples/cs/model-management-example/Program.cs index 2b6fe2e8..a34d2737 100644 --- a/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs +++ b/samples/cs/model-management-example/Program.cs @@ -16,11 +16,8 @@ var mgr = FoundryLocalManager.Instance; -// Ensure that any Execution Provider (EP) downloads run and are completed. -// EP packages include dependencies and may be large. -// Download is only required again if a new version of the EP is released. -// For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +// Download and register all execution providers. +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); // Model catalog operations @@ -51,39 +48,35 @@ // Get a model using an alias from the catalog var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found"); -// `model.SelectedVariant` indicates which variant will be used by default. -// // Models in Model.Variants are ordered by priority, with the highest priority first. // The first downloaded model is selected by default. // The highest priority is selected if no models have been downloaded. // If the selected variant is not the highest priority, it means that Foundry Local // has found a locally cached variant for you to improve performance (remove need to download). Console.WriteLine("\nThe default selected model variant is: " + model.Id); -if (model.SelectedVariant != model.Variants.First()) +if (model.Id != model.Variants.First().Id) { - Debug.Assert(await model.SelectedVariant.IsCachedAsync()); + Debug.Assert(await model.IsCachedAsync()); Console.WriteLine("The model variant was selected due to being locally cached."); } -// OPTIONAL: `model` can be used directly and `model.SelectedVariant` will be used as the default. -// You can explicitly select or use a specific ModelVariant if you want more control -// over the device and/or execution provider used. -// Model and ModelVariant can be used interchangeably in methods such as -// DownloadAsync, LoadAsync, UnloadAsync and GetChatClientAsync. +// OPTIONAL: `model` can be used directly with its currently selected variant. +// You can explicitly select (`model.SelectVariant`) or use a specific variant from `model.Variants` +// if you want more control over the device and/or execution provider used. // // Choices: -// - Use a ModelVariant directly from the catalog if you know the variant Id +// - Use a model variant directly from the catalog if you know the variant Id // - `var modelVariant = await catalog.GetModelVariantAsync("qwen2.5-0.5b-instruct-generic-gpu:3")` // -// - Get the ModelVariant from Model.Variants +// - Get the model variant from IModel.Variants // - `var modelVariant = model.Variants.First(v => v.Id == "qwen2.5-0.5b-instruct-generic-cpu:4")` // - `var modelVariant = model.Variants.First(v => v.Info.Runtime?.DeviceType == DeviceType.GPU)` // - optional: update selected variant in `model` using `model.SelectVariant(modelVariant);` if you wish to use // `model` in your code. // For this example we explicitly select the CPU variant, and call SelectVariant so all the following example code -// uses the `model` instance. +// uses the `model` instance. It would be equally valid to use `modelVariant` directly. Console.WriteLine("Selecting CPU variant of model"); var modelVariant = model.Variants.First(v => v.Info.Runtime?.DeviceType == DeviceType.CPU); model.SelectVariant(modelVariant); diff --git a/samples/cs/native-chat-completions/NativeChatCompletions.csproj b/samples/cs/native-chat-completions/NativeChatCompletions.csproj new file mode 100644 index 00000000..4d948c56 --- /dev/null +++ b/samples/cs/native-chat-completions/NativeChatCompletions.csproj @@ -0,0 +1,48 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/native-chat-completions/NativeChatCompletions.sln b/samples/cs/native-chat-completions/NativeChatCompletions.sln new file mode 100644 index 00000000..a127bfba --- /dev/null +++ b/samples/cs/native-chat-completions/NativeChatCompletions.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NativeChatCompletions", "NativeChatCompletions.csproj", "{A53372CE-F7E1-4F09-B186-77F76E388659}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A53372CE-F7E1-4F09-B186-77F76E388659}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Debug|Any CPU.Build.0 = Debug|ARM64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Debug|x64.ActiveCfg = Debug|x64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Debug|x64.Build.0 = Debug|x64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Debug|x86.ActiveCfg = Debug|ARM64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Debug|x86.Build.0 = Debug|ARM64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Release|Any CPU.ActiveCfg = Release|ARM64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Release|Any CPU.Build.0 = Release|ARM64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Release|x64.ActiveCfg = Release|x64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Release|x64.Build.0 = Release|x64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Release|x86.ActiveCfg = Release|ARM64 + {A53372CE-F7E1-4F09-B186-77F76E388659}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs b/samples/cs/native-chat-completions/Program.cs similarity index 57% rename from samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs rename to samples/cs/native-chat-completions/Program.cs index 52efe410..033786b1 100644 --- a/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs +++ b/samples/cs/native-chat-completions/Program.cs @@ -1,6 +1,10 @@ -using Microsoft.AI.Foundry.Local; +// +// +using Microsoft.AI.Foundry.Local; using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; +// +// CancellationToken ct = new CancellationToken(); var config = new Configuration @@ -15,13 +19,47 @@ var mgr = FoundryLocalManager.Instance; -// Ensure that any Execution Provider (EP) downloads run and are completed. +// Discover available execution providers and their registration status. +var eps = mgr.DiscoverEps(); +int maxNameLen = 30; +Console.WriteLine("Available execution providers:"); +Console.WriteLine($" {"Name".PadRight(maxNameLen)} Registered"); +Console.WriteLine($" {new string('─', maxNameLen)} {"──────────"}"); +foreach (var ep in eps) +{ + Console.WriteLine($" {ep.Name.PadRight(maxNameLen)} {ep.IsRegistered}"); +} + +// Download and register all execution providers with per-EP progress. // EP packages include dependencies and may be large. // Download is only required again if a new version of the EP is released. // For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +Console.WriteLine("\nDownloading execution providers:"); +if (eps.Length > 0) +{ + string currentEp = ""; + await mgr.DownloadAndRegisterEpsAsync((epName, percent) => + { + if (epName != currentEp) + { + if (currentEp != "") + { + Console.WriteLine(); + } + currentEp = epName; + } + Console.Write($"\r {epName.PadRight(maxNameLen)} {percent,6:F1}%"); + }); + Console.WriteLine(); +} +else +{ + Console.WriteLine("No execution providers to download."); +} +// +// // Get the model catalog var catalog = await mgr.GetCatalogAsync(); @@ -43,7 +81,9 @@ await model.DownloadAsync(progress => Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); Console.WriteLine("done."); +// +// // Get a chat client var chatClient = await model.GetChatClientAsync(); @@ -62,6 +102,10 @@ await model.DownloadAsync(progress => Console.Out.Flush(); } Console.WriteLine(); +// +// // Tidy up - unload the model -await model.UnloadAsync(); \ No newline at end of file +await model.UnloadAsync(); +// +// \ No newline at end of file diff --git a/samples/cs/nuget.config b/samples/cs/nuget.config new file mode 100644 index 00000000..9913c715 --- /dev/null +++ b/samples/cs/nuget.config @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs b/samples/cs/tool-calling-foundry-local-sdk/Program.cs similarity index 93% rename from samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs rename to samples/cs/tool-calling-foundry-local-sdk/Program.cs index 3cdf3d38..8ac96369 100644 --- a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs +++ b/samples/cs/tool-calling-foundry-local-sdk/Program.cs @@ -1,9 +1,13 @@ -using Microsoft.AI.Foundry.Local; +// +// +using Microsoft.AI.Foundry.Local; using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels; using Betalgo.Ranul.OpenAI.ObjectModels.SharedModels; using System.Text.Json; +// +// CancellationToken ct = new CancellationToken(); var config = new Configuration @@ -22,9 +26,11 @@ // EP packages include dependencies and may be large. // Download is only required again if a new version of the EP is released. // For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); +// +// // Get the model catalog var catalog = await mgr.GetCatalogAsync(); @@ -48,6 +54,7 @@ await model.DownloadAsync(progress => Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); Console.WriteLine("done."); +// // Get a chat client @@ -63,6 +70,7 @@ await model.DownloadAsync(progress => ]; +// // Prepare tools List tools = [ @@ -86,8 +94,10 @@ await model.DownloadAsync(progress => } } ]; +// +// // Get a streaming chat completion response var toolCallResponses = new List(); Console.WriteLine("Chat completion response:"); @@ -150,7 +160,11 @@ await model.DownloadAsync(progress => Console.Out.Flush(); } Console.WriteLine(); +// +// // Tidy up - unload the model -await model.UnloadAsync(); \ No newline at end of file +await model.UnloadAsync(); +// +// \ No newline at end of file diff --git a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj new file mode 100644 index 00000000..4d948c56 --- /dev/null +++ b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.csproj @@ -0,0 +1,48 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.sln b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.sln new file mode 100644 index 00000000..adbf5ea2 --- /dev/null +++ b/samples/cs/tool-calling-foundry-local-sdk/ToolCallingFoundryLocalSdk.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToolCallingFoundryLocalSdk", "ToolCallingFoundryLocalSdk.csproj", "{7B40637D-D7E3-4A95-9B57-8D0EF84C8532}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|Any CPU.Build.0 = Debug|ARM64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|x64.ActiveCfg = Debug|x64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|x64.Build.0 = Debug|x64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|x86.ActiveCfg = Debug|ARM64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Debug|x86.Build.0 = Debug|ARM64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|Any CPU.ActiveCfg = Release|ARM64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|Any CPU.Build.0 = Release|ARM64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|x64.ActiveCfg = Release|x64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|x64.Build.0 = Release|x64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|x86.ActiveCfg = Release|ARM64 + {7B40637D-D7E3-4A95-9B57-8D0EF84C8532}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs b/samples/cs/tool-calling-foundry-local-web-server/Program.cs similarity index 92% rename from samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs rename to samples/cs/tool-calling-foundry-local-web-server/Program.cs index 6d6937fd..48ee6c6f 100644 --- a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs +++ b/samples/cs/tool-calling-foundry-local-web-server/Program.cs @@ -1,4 +1,5 @@ -using Microsoft.AI.Foundry.Local; +// +using Microsoft.AI.Foundry.Local; using OpenAI; using OpenAI.Chat; using System.ClientModel; @@ -20,11 +21,8 @@ var mgr = FoundryLocalManager.Instance; -// Ensure that any Execution Provider (EP) downloads run and are completed. -// EP packages include dependencies and may be large. -// Download is only required again if a new version of the EP is released. -// For cross platform builds there is no dynamic EP download and this will return immediately. -await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); +// Download and register all execution providers. +await Utils.RunWithSpinner("Registering execution providers", mgr.DownloadAndRegisterEpsAsync()); // Get the model catalog @@ -178,4 +176,5 @@ await model.DownloadAsync(progress => // Tidy up // Stop the web service and unload model await mgr.StopWebServiceAsync(); -await model.UnloadAsync(); \ No newline at end of file +await model.UnloadAsync(); +// \ No newline at end of file diff --git a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj b/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj new file mode 100644 index 00000000..fe890be2 --- /dev/null +++ b/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.csproj @@ -0,0 +1,52 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.sln b/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.sln new file mode 100644 index 00000000..7d1568e1 --- /dev/null +++ b/samples/cs/tool-calling-foundry-local-web-server/ToolCallingFoundryLocalWebServer.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ToolCallingFoundryLocalWebServer", "ToolCallingFoundryLocalWebServer.csproj", "{F9BD2479-A235-4BBF-A722-DF180A076143}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|Any CPU.Build.0 = Debug|ARM64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|x64.ActiveCfg = Debug|x64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|x64.Build.0 = Debug|x64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|x86.ActiveCfg = Debug|ARM64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Debug|x86.Build.0 = Debug|ARM64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Release|Any CPU.ActiveCfg = Release|ARM64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Release|Any CPU.Build.0 = Release|ARM64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Release|x64.ActiveCfg = Release|x64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Release|x64.Build.0 = Release|x64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Release|x86.ActiveCfg = Release|ARM64 + {F9BD2479-A235-4BBF-A722-DF180A076143}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/cs/tutorial-chat-assistant/Program.cs b/samples/cs/tutorial-chat-assistant/Program.cs new file mode 100644 index 00000000..10e9a63b --- /dev/null +++ b/samples/cs/tutorial-chat-assistant/Program.cs @@ -0,0 +1,101 @@ +// +// +using Microsoft.AI.Foundry.Local; +using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; +using Microsoft.Extensions.Logging; +// + +// +CancellationToken ct = CancellationToken.None; + +var config = new Configuration +{ + AppName = "foundry_local_samples", + LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information +}; + +using var loggerFactory = LoggerFactory.Create(builder => +{ + builder.SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information); +}); +var logger = loggerFactory.CreateLogger(); + +// Initialize the singleton instance +await FoundryLocalManager.CreateAsync(config, logger); +var mgr = FoundryLocalManager.Instance; + +// Select and load a model from the catalog +var catalog = await mgr.GetCatalogAsync(); +var model = await catalog.GetModelAsync("qwen2.5-0.5b") + ?? throw new Exception("Model not found"); + +await model.DownloadAsync(progress => +{ + Console.Write($"\rDownloading model: {progress:F2}%"); + if (progress >= 100f) Console.WriteLine(); +}); + +await model.LoadAsync(); +Console.WriteLine("Model loaded and ready."); + +// Get a chat client +var chatClient = await model.GetChatClientAsync(); +// + +// +// Start the conversation with a system prompt +var messages = new List +{ + new ChatMessage + { + Role = "system", + Content = "You are a helpful, friendly assistant. Keep your responses " + + "concise and conversational. If you don't know something, say so." + } +}; +// + +Console.WriteLine("\nChat assistant ready! Type 'quit' to exit.\n"); + +// +while (true) +{ + Console.Write("You: "); + var userInput = Console.ReadLine(); + if (string.IsNullOrWhiteSpace(userInput) || + userInput.Equals("quit", StringComparison.OrdinalIgnoreCase) || + userInput.Equals("exit", StringComparison.OrdinalIgnoreCase)) + { + break; + } + + // Add the user's message to conversation history + messages.Add(new ChatMessage { Role = "user", Content = userInput }); + + // + // Stream the response token by token + Console.Write("Assistant: "); + var fullResponse = string.Empty; + var streamingResponse = chatClient.CompleteChatStreamingAsync(messages, ct); + await foreach (var chunk in streamingResponse) + { + var content = chunk.Choices[0].Message.Content; + if (!string.IsNullOrEmpty(content)) + { + Console.Write(content); + Console.Out.Flush(); + fullResponse += content; + } + } + Console.WriteLine("\n"); + // + + // Add the complete response to conversation history + messages.Add(new ChatMessage { Role = "assistant", Content = fullResponse }); +} +// + +// Clean up - unload the model +await model.UnloadAsync(); +Console.WriteLine("Model unloaded. Goodbye!"); +// diff --git a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj new file mode 100644 index 00000000..a3533047 --- /dev/null +++ b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.csproj @@ -0,0 +1,50 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.sln b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.sln new file mode 100644 index 00000000..a9c77e16 --- /dev/null +++ b/samples/cs/tutorial-chat-assistant/TutorialChatAssistant.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TutorialChatAssistant", "TutorialChatAssistant.csproj", "{5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|Any CPU.Build.0 = Debug|ARM64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|x64.ActiveCfg = Debug|x64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|x64.Build.0 = Debug|x64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|x86.ActiveCfg = Debug|ARM64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Debug|x86.Build.0 = Debug|ARM64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|Any CPU.ActiveCfg = Release|ARM64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|Any CPU.Build.0 = Release|ARM64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|x64.ActiveCfg = Release|x64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|x64.Build.0 = Release|x64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|x86.ActiveCfg = Release|ARM64 + {5D5778BD-B40A-4D9E-BC2F-65AD50EE6F94}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/cs/tutorial-document-summarizer/Program.cs b/samples/cs/tutorial-document-summarizer/Program.cs new file mode 100644 index 00000000..bc5546f6 --- /dev/null +++ b/samples/cs/tutorial-document-summarizer/Program.cs @@ -0,0 +1,109 @@ +// +// +using Microsoft.AI.Foundry.Local; +using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; +using Microsoft.Extensions.Logging; +// + +// +CancellationToken ct = CancellationToken.None; + +var config = new Configuration +{ + AppName = "foundry_local_samples", + LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information +}; + +using var loggerFactory = LoggerFactory.Create(builder => +{ + builder.SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information); +}); +var logger = loggerFactory.CreateLogger(); + +// Initialize the singleton instance +await FoundryLocalManager.CreateAsync(config, logger); +var mgr = FoundryLocalManager.Instance; + +// Select and load a model from the catalog +var catalog = await mgr.GetCatalogAsync(); +var model = await catalog.GetModelAsync("qwen2.5-0.5b") + ?? throw new Exception("Model not found"); + +await model.DownloadAsync(progress => +{ + Console.Write($"\rDownloading model: {progress:F2}%"); + if (progress >= 100f) Console.WriteLine(); +}); + +await model.LoadAsync(); +Console.WriteLine("Model loaded and ready.\n"); + +// Get a chat client +var chatClient = await model.GetChatClientAsync(); +// + +// +var systemPrompt = + "Summarize the following document into concise bullet points. " + + "Focus on the key points and main ideas."; + +// +var target = args.Length > 0 ? args[0] : "document.txt"; +// + +if (Directory.Exists(target)) +{ + await SummarizeDirectoryAsync(chatClient, target, systemPrompt, ct); +} +else +{ + Console.WriteLine($"--- {Path.GetFileName(target)} ---"); + await SummarizeFileAsync(chatClient, target, systemPrompt, ct); +} +// + +// Clean up +await model.UnloadAsync(); +Console.WriteLine("\nModel unloaded. Done!"); + +async Task SummarizeFileAsync( + dynamic client, + string filePath, + string prompt, + CancellationToken token) +{ + var fileContent = await File.ReadAllTextAsync(filePath, token); + var messages = new List + { + new ChatMessage { Role = "system", Content = prompt }, + new ChatMessage { Role = "user", Content = fileContent } + }; + + var response = await client.CompleteChatAsync(messages, token); + Console.WriteLine(response.Choices[0].Message.Content); +} + +async Task SummarizeDirectoryAsync( + dynamic client, + string directory, + string prompt, + CancellationToken token) +{ + var txtFiles = Directory.GetFiles(directory, "*.txt") + .OrderBy(f => f) + .ToArray(); + + if (txtFiles.Length == 0) + { + Console.WriteLine($"No .txt files found in {directory}"); + return; + } + + foreach (var txtFile in txtFiles) + { + Console.WriteLine($"--- {Path.GetFileName(txtFile)} ---"); + await SummarizeFileAsync(client, txtFile, prompt, token); + Console.WriteLine(); + } +} +// diff --git a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj new file mode 100644 index 00000000..a3533047 --- /dev/null +++ b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.csproj @@ -0,0 +1,50 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.sln b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.sln new file mode 100644 index 00000000..7d7a0fc9 --- /dev/null +++ b/samples/cs/tutorial-document-summarizer/TutorialDocumentSummarizer.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TutorialDocumentSummarizer", "TutorialDocumentSummarizer.csproj", "{6868D03F-BD8E-46ED-9A5B-95346A3810A4}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|Any CPU.Build.0 = Debug|ARM64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|x64.ActiveCfg = Debug|x64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|x64.Build.0 = Debug|x64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|x86.ActiveCfg = Debug|ARM64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Debug|x86.Build.0 = Debug|ARM64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|Any CPU.ActiveCfg = Release|ARM64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|Any CPU.Build.0 = Release|ARM64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|x64.ActiveCfg = Release|x64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|x64.Build.0 = Release|x64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|x86.ActiveCfg = Release|ARM64 + {6868D03F-BD8E-46ED-9A5B-95346A3810A4}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/cs/tutorial-tool-calling/Program.cs b/samples/cs/tutorial-tool-calling/Program.cs new file mode 100644 index 00000000..74f137db --- /dev/null +++ b/samples/cs/tutorial-tool-calling/Program.cs @@ -0,0 +1,228 @@ +// +// +using System.Text.Json; +using Microsoft.AI.Foundry.Local; +using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; +using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels; +using Betalgo.Ranul.OpenAI.ObjectModels.SharedModels; +using Microsoft.Extensions.Logging; +// + +CancellationToken ct = CancellationToken.None; + +// +// --- Tool definitions --- +List tools = +[ + new ToolDefinition + { + Type = "function", + Function = new FunctionDefinition() + { + Name = "get_weather", + Description = "Get the current weather for a location", + Parameters = new PropertyDefinition() + { + Type = "object", + Properties = new Dictionary() + { + { "location", new PropertyDefinition() { Type = "string", Description = "The city or location" } }, + { "unit", new PropertyDefinition() { Type = "string", Description = "Temperature unit (celsius or fahrenheit)" } } + }, + Required = ["location"] + } + } + }, + new ToolDefinition + { + Type = "function", + Function = new FunctionDefinition() + { + Name = "calculate", + Description = "Perform a math calculation", + Parameters = new PropertyDefinition() + { + Type = "object", + Properties = new Dictionary() + { + { "expression", new PropertyDefinition() { Type = "string", Description = "The math expression to evaluate" } } + }, + Required = ["expression"] + } + } + } +]; + +// --- Tool implementations --- +string ExecuteTool(string functionName, JsonElement arguments) +{ + switch (functionName) + { + case "get_weather": + var location = arguments.GetProperty("location") + .GetString() ?? "unknown"; + var unit = arguments.TryGetProperty("unit", out var u) + ? u.GetString() ?? "celsius" + : "celsius"; + var temp = unit == "celsius" ? 22 : 72; + return JsonSerializer.Serialize(new + { + location, + temperature = temp, + unit, + condition = "Sunny" + }); + + case "calculate": + var expression = arguments.GetProperty("expression") + .GetString() ?? ""; + try + { + var result = new System.Data.DataTable() + .Compute(expression, null); + return JsonSerializer.Serialize(new + { + expression, + result = result?.ToString() + }); + } + catch (Exception ex) + { + return JsonSerializer.Serialize(new + { + error = ex.Message + }); + } + + default: + return JsonSerializer.Serialize(new + { + error = $"Unknown function: {functionName}" + }); + } +} +// + +// +// --- Main application --- +var config = new Configuration +{ + AppName = "foundry_local_samples", + LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information +}; + +using var loggerFactory = LoggerFactory.Create(builder => +{ + builder.SetMinimumLevel( + Microsoft.Extensions.Logging.LogLevel.Information + ); +}); +var logger = loggerFactory.CreateLogger(); + +await FoundryLocalManager.CreateAsync(config, logger); +var mgr = FoundryLocalManager.Instance; + +var catalog = await mgr.GetCatalogAsync(); +var model = await catalog.GetModelAsync("qwen2.5-0.5b") + ?? throw new Exception("Model not found"); + +await model.DownloadAsync(progress => +{ + Console.Write($"\rDownloading model: {progress:F2}%"); + if (progress >= 100f) Console.WriteLine(); +}); + +await model.LoadAsync(); +Console.WriteLine("Model loaded and ready."); + +var chatClient = await model.GetChatClientAsync(); +chatClient.Settings.ToolChoice = ToolChoice.Auto; + +var messages = new List +{ + new ChatMessage + { + Role = "system", + Content = "You are a helpful assistant with access to tools. " + + "Use them when needed to answer questions accurately." + } +}; +// + +// +Console.WriteLine("\nTool-calling assistant ready! Type 'quit' to exit.\n"); + +while (true) +{ + Console.Write("You: "); + var userInput = Console.ReadLine(); + if (string.IsNullOrWhiteSpace(userInput) || + userInput.Equals("quit", StringComparison.OrdinalIgnoreCase) || + userInput.Equals("exit", StringComparison.OrdinalIgnoreCase)) + { + break; + } + + messages.Add(new ChatMessage + { + Role = "user", + Content = userInput + }); + + var response = await chatClient.CompleteChatAsync( + messages, tools, ct + ); + + var choice = response.Choices[0].Message; + + if (choice.ToolCalls is { Count: > 0 }) + { + messages.Add(choice); + + foreach (var toolCall in choice.ToolCalls) + { + var toolArgs = JsonDocument.Parse( + toolCall.FunctionCall.Arguments + ).RootElement; + Console.WriteLine( + $" Tool call: {toolCall.FunctionCall.Name}({toolArgs})" + ); + + var result = ExecuteTool( + toolCall.FunctionCall.Name, toolArgs + ); + messages.Add(new ChatMessage + { + Role = "tool", + ToolCallId = toolCall.Id, + Content = result + }); + } + + var finalResponse = await chatClient.CompleteChatAsync( + messages, tools, ct + ); + var answer = finalResponse.Choices[0].Message.Content ?? ""; + messages.Add(new ChatMessage + { + Role = "assistant", + Content = answer + }); + Console.WriteLine($"Assistant: {answer}\n"); + } + else + { + var answer = choice.Content ?? ""; + messages.Add(new ChatMessage + { + Role = "assistant", + Content = answer + }); + Console.WriteLine($"Assistant: {answer}\n"); + } +} + +await model.UnloadAsync(); +Console.WriteLine("Model unloaded. Goodbye!"); +// +// diff --git a/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj b/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj new file mode 100644 index 00000000..a3533047 --- /dev/null +++ b/samples/cs/tutorial-tool-calling/TutorialToolCalling.csproj @@ -0,0 +1,50 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/tutorial-tool-calling/TutorialToolCalling.sln b/samples/cs/tutorial-tool-calling/TutorialToolCalling.sln new file mode 100644 index 00000000..6a86331b --- /dev/null +++ b/samples/cs/tutorial-tool-calling/TutorialToolCalling.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TutorialToolCalling", "TutorialToolCalling.csproj", "{155923AB-A0C6-447D-A46A-7C8318D31596}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|Any CPU.Build.0 = Debug|ARM64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|x64.ActiveCfg = Debug|x64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|x64.Build.0 = Debug|x64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|x86.ActiveCfg = Debug|ARM64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Debug|x86.Build.0 = Debug|ARM64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Release|Any CPU.ActiveCfg = Release|ARM64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Release|Any CPU.Build.0 = Release|ARM64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Release|x64.ActiveCfg = Release|x64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Release|x64.Build.0 = Release|x64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Release|x86.ActiveCfg = Release|ARM64 + {155923AB-A0C6-447D-A46A-7C8318D31596}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/cs/tutorial-voice-to-text/Program.cs b/samples/cs/tutorial-voice-to-text/Program.cs new file mode 100644 index 00000000..976b44e4 --- /dev/null +++ b/samples/cs/tutorial-voice-to-text/Program.cs @@ -0,0 +1,104 @@ +// +// +using Microsoft.AI.Foundry.Local; +using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; +using Microsoft.Extensions.Logging; +using System.Text; +// + +// +CancellationToken ct = CancellationToken.None; + +var config = new Configuration +{ + AppName = "foundry_local_samples", + LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information +}; + +using var loggerFactory = LoggerFactory.Create(builder => +{ + builder.SetMinimumLevel( + Microsoft.Extensions.Logging.LogLevel.Information + ); +}); +var logger = loggerFactory.CreateLogger(); + +// Initialize the singleton instance +await FoundryLocalManager.CreateAsync(config, logger); +var mgr = FoundryLocalManager.Instance; +var catalog = await mgr.GetCatalogAsync(); +// + +// +// Load the speech-to-text model +var speechModel = await catalog.GetModelAsync("whisper-tiny") + ?? throw new Exception("Speech model not found"); + +await speechModel.DownloadAsync(progress => +{ + Console.Write($"\rDownloading speech model: {progress:F2}%"); + if (progress >= 100f) Console.WriteLine(); +}); + +await speechModel.LoadAsync(); +Console.WriteLine("Speech model loaded."); + +// Transcribe the audio file +var audioClient = await speechModel.GetAudioClientAsync(); +var transcriptionText = new StringBuilder(); + +Console.WriteLine("\nTranscription:"); +var audioResponse = audioClient + .TranscribeAudioStreamingAsync("meeting-notes.wav", ct); +await foreach (var chunk in audioResponse) +{ + Console.Write(chunk.Text); + transcriptionText.Append(chunk.Text); +} +Console.WriteLine(); + +// Unload the speech model to free memory +await speechModel.UnloadAsync(); +// + +// +// Load the chat model for summarization +var chatModel = await catalog.GetModelAsync("qwen2.5-0.5b") + ?? throw new Exception("Chat model not found"); + +await chatModel.DownloadAsync(progress => +{ + Console.Write($"\rDownloading chat model: {progress:F2}%"); + if (progress >= 100f) Console.WriteLine(); +}); + +await chatModel.LoadAsync(); +Console.WriteLine("Chat model loaded."); + +// Summarize the transcription into organized notes +var chatClient = await chatModel.GetChatClientAsync(); +var messages = new List +{ + new ChatMessage + { + Role = "system", + Content = "You are a note-taking assistant. Summarize " + + "the following transcription into organized, " + + "concise notes with bullet points." + }, + new ChatMessage + { + Role = "user", + Content = transcriptionText.ToString() + } +}; + +var chatResponse = await chatClient.CompleteChatAsync(messages, ct); +var summary = chatResponse.Choices[0].Message.Content; +Console.WriteLine($"\nSummary:\n{summary}"); + +// Clean up +await chatModel.UnloadAsync(); +Console.WriteLine("\nDone. Models unloaded."); +// +// diff --git a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj new file mode 100644 index 00000000..a3533047 --- /dev/null +++ b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.csproj @@ -0,0 +1,50 @@ + + + + Exe + enable + enable + + + + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + + net9.0 + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.sln b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.sln new file mode 100644 index 00000000..ae2a2b39 --- /dev/null +++ b/samples/cs/tutorial-voice-to-text/TutorialVoiceToText.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TutorialVoiceToText", "TutorialVoiceToText.csproj", "{C12663C3-AB3F-4652-BC43-A92E43602ACC}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|Any CPU.ActiveCfg = Debug|ARM64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|Any CPU.Build.0 = Debug|ARM64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|x64.ActiveCfg = Debug|x64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|x64.Build.0 = Debug|x64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|x86.ActiveCfg = Debug|ARM64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Debug|x86.Build.0 = Debug|ARM64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|Any CPU.ActiveCfg = Release|ARM64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|Any CPU.Build.0 = Release|ARM64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|x64.ActiveCfg = Release|x64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|x64.Build.0 = Release|x64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|x86.ActiveCfg = Release|ARM64 + {C12663C3-AB3F-4652-BC43-A92E43602ACC}.Release|x86.Build.0 = Release|ARM64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/samples/js/audio-transcription-example/app.js b/samples/js/audio-transcription-example/app.js index fe441d1b..c2517ec7 100644 --- a/samples/js/audio-transcription-example/app.js +++ b/samples/js/audio-transcription-example/app.js @@ -1,14 +1,20 @@ +// +// import { FoundryLocalManager } from 'foundry-local-sdk'; +// // Initialize the Foundry Local SDK console.log('Initializing Foundry Local SDK...'); +// const manager = FoundryLocalManager.create({ appName: 'foundry_local_samples', logLevel: 'info' }); +// console.log('✓ SDK initialized successfully'); +// // Get the model object const modelAlias = 'whisper-tiny'; // Using an available model from the list above let model = await manager.catalog.getModel(modelAlias); @@ -25,29 +31,36 @@ console.log('\n✓ Model downloaded'); console.log(`\nLoading model ${modelAlias}...`); await model.load(); console.log('✓ Model loaded'); +// +// // Create audio client console.log('\nCreating audio client...'); const audioClient = model.createAudioClient(); console.log('✓ Audio client created'); // Example audio transcription -console.log('\nTesting audio transcription...'); -const transcription = await audioClient.transcribe('./Recording.mp3'); +const audioFile = process.argv[2] || './Recording.mp3'; +console.log(`\nTranscribing ${audioFile}...`); +const transcription = await audioClient.transcribe(audioFile); console.log('\nAudio transcription result:'); console.log(transcription.text); console.log('✓ Audio transcription completed'); -// Same example but with streaming transcription using callback +// Same example but with streaming transcription using async iteration console.log('\nTesting streaming audio transcription...'); -await audioClient.transcribeStreaming('./Recording.mp3', (result) => { +for await (const result of audioClient.transcribeStreaming(audioFile)) { // Output the intermediate transcription results as they are received without line ending process.stdout.write(result.text); -}); +} console.log('\n✓ Streaming transcription completed'); +// +// // Unload the model console.log('Unloading model...'); await model.unload(); console.log(`✓ Model unloaded`); +// +// diff --git a/samples/js/chat-and-audio-foundry-local/.npmrc b/samples/js/chat-and-audio-foundry-local/.npmrc new file mode 100644 index 00000000..114ea2a4 --- /dev/null +++ b/samples/js/chat-and-audio-foundry-local/.npmrc @@ -0,0 +1,2 @@ +registry=https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/npm/registry/ +always-auth=true diff --git a/samples/js/chat-and-audio-foundry-local/src/app.js b/samples/js/chat-and-audio-foundry-local/src/app.js index b3084816..50bc195f 100644 --- a/samples/js/chat-and-audio-foundry-local/src/app.js +++ b/samples/js/chat-and-audio-foundry-local/src/app.js @@ -11,7 +11,7 @@ const WHISPER_MODEL = "whisper-tiny"; async function main() { console.log("Initializing Foundry Local SDK..."); const manager = FoundryLocalManager.create({ - appName: "ChatAndAudioSample", + appName: "foundry_local_samples", logLevel: "info", }); @@ -76,22 +76,19 @@ async function main() { // Summarize the transcription console.log("Generating summary...\n"); - await chatClient.completeStreamingChat( - [ - { - role: "system", - content: - "You are a helpful assistant. Summarize the following transcribed audio and extract key themes and action items.", - }, - { role: "user", content: transcription.text }, - ], - (chunk) => { - const content = chunk.choices?.[0]?.message?.content; - if (content) { - process.stdout.write(content); - } + for await (const chunk of chatClient.completeStreamingChat([ + { + role: "system", + content: + "You are a helpful assistant. Summarize the following transcribed audio and extract key themes and action items.", + }, + { role: "user", content: transcription.text }, + ])) { + const content = chunk.choices?.[0]?.message?.content; + if (content) { + process.stdout.write(content); } - ); + } console.log("\n"); // --- Clean up --- diff --git a/samples/js/copilot-sdk-foundry-local/.npmrc b/samples/js/copilot-sdk-foundry-local/.npmrc new file mode 100644 index 00000000..114ea2a4 --- /dev/null +++ b/samples/js/copilot-sdk-foundry-local/.npmrc @@ -0,0 +1,2 @@ +registry=https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/npm/registry/ +always-auth=true diff --git a/samples/js/electron-chat-application/.npmrc b/samples/js/electron-chat-application/.npmrc new file mode 100644 index 00000000..114ea2a4 --- /dev/null +++ b/samples/js/electron-chat-application/.npmrc @@ -0,0 +1,2 @@ +registry=https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/npm/registry/ +always-auth=true diff --git a/samples/js/langchain-integration-example/app.js b/samples/js/langchain-integration-example/app.js index 94e0afdc..9e4b7b60 100644 --- a/samples/js/langchain-integration-example/app.js +++ b/samples/js/langchain-integration-example/app.js @@ -1,17 +1,22 @@ +// +// import { ChatOpenAI } from "@langchain/openai"; import { ChatPromptTemplate } from "@langchain/core/prompts"; import { FoundryLocalManager } from 'foundry-local-sdk'; +// // Initialize the Foundry Local SDK console.log('Initializing Foundry Local SDK...'); const endpointUrl = 'http://localhost:5764'; +// const manager = FoundryLocalManager.create({ appName: 'foundry_local_samples', logLevel: 'info', webServiceUrls: endpointUrl }); +// console.log('✓ SDK initialized successfully'); // Get the model object @@ -35,6 +40,7 @@ console.log('\nStarting web service...'); manager.startWebService(); console.log('✓ Web service started'); +// // Configure ChatOpenAI to use your locally-running model const llm = new ChatOpenAI({ @@ -61,7 +67,9 @@ const prompt = ChatPromptTemplate.fromMessages([ // Build a simple chain by connecting the prompt to the language model const chain = prompt.pipe(llm); +// +// const input = "I love to code."; console.log(`Translating '${input}' to French...`); @@ -76,9 +84,11 @@ await chain.invoke({ }).catch(err => { console.error("Error:", err); }); +// // Tidy up console.log('Unloading model and stopping web service...'); await model.unload(); manager.stopWebService(); -console.log(`✓ Model unloaded and web service stopped`); \ No newline at end of file +console.log(`✓ Model unloaded and web service stopped`); +// \ No newline at end of file diff --git a/samples/js/native-chat-completions/app.js b/samples/js/native-chat-completions/app.js index af566ef7..9e34c90f 100644 --- a/samples/js/native-chat-completions/app.js +++ b/samples/js/native-chat-completions/app.js @@ -1,14 +1,50 @@ +// +// import { FoundryLocalManager } from 'foundry-local-sdk'; +// // Initialize the Foundry Local SDK console.log('Initializing Foundry Local SDK...'); +// const manager = FoundryLocalManager.create({ appName: 'foundry_local_samples', logLevel: 'info' }); +// console.log('✓ SDK initialized successfully'); +// Discover available execution providers and their registration status. +const eps = manager.discoverEps(); +const maxNameLen = 30; +console.log('\nAvailable execution providers:'); +console.log(` ${'Name'.padEnd(maxNameLen)} Registered`); +console.log(` ${'─'.repeat(maxNameLen)} ──────────`); +for (const ep of eps) { + console.log(` ${ep.name.padEnd(maxNameLen)} ${ep.isRegistered}`); +} + +// Download and register all execution providers with per-EP progress. +// EP packages include dependencies and may be large. +// Download is only required again if a new version of the EP is released. +console.log('\nDownloading execution providers:'); +if (eps.length > 0) { + let currentEp = ''; + await manager.downloadAndRegisterEps((epName, percent) => { + if (epName !== currentEp) { + if (currentEp !== '') { + process.stdout.write('\n'); + } + currentEp = epName; + } + process.stdout.write(`\r ${epName.padEnd(maxNameLen)} ${percent.toFixed(1).padStart(5)}%`); + }); + process.stdout.write('\n'); +} else { + console.log('No execution providers to download.'); +} + +// // Get the model object const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above const model = await manager.catalog.getModel(modelAlias); @@ -24,7 +60,9 @@ console.log('\n✓ Model downloaded'); console.log(`\nLoading model ${modelAlias}...`); await model.load(); console.log('✓ Model loaded'); +// +// // Create chat client console.log('\nCreating chat client...'); const chatClient = model.createChatClient(); @@ -38,22 +76,27 @@ const completion = await chatClient.completeChat([ console.log('\nChat completion result:'); console.log(completion.choices[0]?.message?.content); +// +// // Example streaming completion console.log('\nTesting streaming completion...'); -await chatClient.completeStreamingChat( - [{ role: 'user', content: 'Write a short poem about programming.' }], - (chunk) => { - const content = chunk.choices?.[0]?.message?.content; - if (content) { - process.stdout.write(content); - } +for await (const chunk of chatClient.completeStreamingChat( + [{ role: 'user', content: 'Write a short poem about programming.' }] +)) { + const content = chunk.choices?.[0]?.message?.content; + if (content) { + process.stdout.write(content); } -); +} console.log('\n'); +// +// // Unload the model console.log('Unloading model...'); await model.unload(); console.log(`✓ Model unloaded`); +// +// \ No newline at end of file diff --git a/samples/js/tool-calling-foundry-local/.npmrc b/samples/js/tool-calling-foundry-local/.npmrc new file mode 100644 index 00000000..114ea2a4 --- /dev/null +++ b/samples/js/tool-calling-foundry-local/.npmrc @@ -0,0 +1,2 @@ +registry=https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/npm/registry/ +always-auth=true diff --git a/samples/js/tool-calling-foundry-local/src/app.js b/samples/js/tool-calling-foundry-local/src/app.js index f11eacdd..f92464ee 100644 --- a/samples/js/tool-calling-foundry-local/src/app.js +++ b/samples/js/tool-calling-foundry-local/src/app.js @@ -1,8 +1,11 @@ +// // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// import { OpenAI } from "openai"; import { FoundryLocalManager } from "foundry-local-sdk"; +// // By using an alias, the most suitable model will be downloaded // to your end-user's device. @@ -10,22 +13,27 @@ import { FoundryLocalManager } from "foundry-local-sdk"; // following command in your terminal: `foundry model list`. const alias = "qwen2.5-0.5b"; +// function multiplyNumbers(first, second) { return first * second; } +// async function runToolCallingExample() { let manager = null; let model = null; try { + // console.log("Initializing Foundry Local SDK..."); manager = FoundryLocalManager.create({ - appName: "FoundryLocalSample", + appName: "foundry_local_samples", serviceEndpoint: "http://localhost:5000", logLevel: "info" }); + // + // const catalog = manager.catalog; model = await catalog.getModel(alias); if (!model) { @@ -47,7 +55,9 @@ async function runToolCallingExample() { baseURL: `${endpoint.replace(/\/$/, "")}/v1`, apiKey: "local" }); + // + // // Prepare messages const messages = [ { @@ -154,7 +164,9 @@ async function runToolCallingExample() { } console.log(); + // } finally { + // if (model) { try { if (await model.isLoaded()) { @@ -172,6 +184,7 @@ async function runToolCallingExample() { console.warn("Cleanup warning while stopping service:", cleanupError); } } + // } } @@ -179,3 +192,4 @@ await runToolCallingExample().catch((error) => { console.error("Error running sample:", error); process.exitCode = 1; }); +// diff --git a/samples/js/tutorial-chat-assistant/app.js b/samples/js/tutorial-chat-assistant/app.js new file mode 100644 index 00000000..9a5a430c --- /dev/null +++ b/samples/js/tutorial-chat-assistant/app.js @@ -0,0 +1,84 @@ +// +// +import { FoundryLocalManager } from 'foundry-local-sdk'; +import * as readline from 'readline'; +// + +// +// Initialize the Foundry Local SDK +const manager = FoundryLocalManager.create({ + appName: 'foundry_local_samples', + logLevel: 'info' +}); + +// Select and load a model from the catalog +const model = await manager.catalog.getModel('qwen2.5-0.5b'); + +await model.download((progress) => { + process.stdout.write(`\rDownloading model: ${progress.toFixed(2)}%`); +}); +console.log('\nModel downloaded.'); + +await model.load(); +console.log('Model loaded and ready.'); + +// Create a chat client +const chatClient = model.createChatClient(); +// + +// +// Start the conversation with a system prompt +const messages = [ + { + role: 'system', + content: 'You are a helpful, friendly assistant. Keep your responses ' + + 'concise and conversational. If you don\'t know something, say so.' + } +]; +// + +// Set up readline for console input +const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout +}); + +const askQuestion = (prompt) => new Promise((resolve) => rl.question(prompt, resolve)); + +console.log('\nChat assistant ready! Type \'quit\' to exit.\n'); + +// +while (true) { + const userInput = await askQuestion('You: '); + if (userInput.trim().toLowerCase() === 'quit' || + userInput.trim().toLowerCase() === 'exit') { + break; + } + + // Add the user's message to conversation history + messages.push({ role: 'user', content: userInput }); + + // + // Stream the response token by token + process.stdout.write('Assistant: '); + let fullResponse = ''; + await chatClient.completeStreamingChat(messages, (chunk) => { + const content = chunk.choices?.[0]?.message?.content; + if (content) { + process.stdout.write(content); + fullResponse += content; + } + }); + console.log('\n'); + // + + // Add the complete response to conversation history + messages.push({ role: 'assistant', content: fullResponse }); +} +// + +// Clean up - unload the model +await model.unload(); +console.log('Model unloaded. Goodbye!'); +rl.close(); +// diff --git a/samples/js/tutorial-chat-assistant/package.json b/samples/js/tutorial-chat-assistant/package.json new file mode 100644 index 00000000..3e2393ce --- /dev/null +++ b/samples/js/tutorial-chat-assistant/package.json @@ -0,0 +1,9 @@ +{ + "name": "tutorial-chat-assistant", + "version": "1.0.0", + "type": "module", + "main": "app.js", + "dependencies": { + "foundry-local-sdk": "*" + } +} diff --git a/samples/js/tutorial-document-summarizer/app.js b/samples/js/tutorial-document-summarizer/app.js new file mode 100644 index 00000000..f43e204d --- /dev/null +++ b/samples/js/tutorial-document-summarizer/app.js @@ -0,0 +1,84 @@ +// +// +import { FoundryLocalManager } from 'foundry-local-sdk'; +import { readFileSync, readdirSync, statSync } from 'fs'; +import { join, basename } from 'path'; +// + +async function summarizeFile(chatClient, filePath, systemPrompt) { + const content = readFileSync(filePath, 'utf-8'); + const messages = [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: content } + ]; + + const response = await chatClient.completeChat(messages); + console.log(response.choices[0]?.message?.content); +} + +async function summarizeDirectory(chatClient, directory, systemPrompt) { + const txtFiles = readdirSync(directory) + .filter(f => f.endsWith('.txt')) + .sort(); + + if (txtFiles.length === 0) { + console.log(`No .txt files found in ${directory}`); + return; + } + + for (const fileName of txtFiles) { + console.log(`--- ${fileName} ---`); + await summarizeFile(chatClient, join(directory, fileName), systemPrompt); + console.log(); + } +} + +// +// Initialize the Foundry Local SDK +const manager = FoundryLocalManager.create({ + appName: 'foundry_local_samples', + logLevel: 'info' +}); + +// Select and load a model from the catalog +const model = await manager.catalog.getModel('qwen2.5-0.5b'); + +await model.download((progress) => { + process.stdout.write(`\rDownloading model: ${progress.toFixed(2)}%`); +}); +console.log('\nModel downloaded.'); + +await model.load(); +console.log('Model loaded and ready.\n'); + +// Create a chat client +const chatClient = model.createChatClient(); +// + +// +const systemPrompt = + 'Summarize the following document into concise bullet points. ' + + 'Focus on the key points and main ideas.'; + +// +const target = process.argv[2] || 'document.txt'; +// + +try { + const stats = statSync(target); + if (stats.isDirectory()) { + await summarizeDirectory(chatClient, target, systemPrompt); + } else { + console.log(`--- ${basename(target)} ---`); + await summarizeFile(chatClient, target, systemPrompt); + } +} catch { + console.log(`--- ${basename(target)} ---`); + await summarizeFile(chatClient, target, systemPrompt); +} +// + +// Clean up +await model.unload(); +console.log('\nModel unloaded. Done!'); +// diff --git a/samples/js/tutorial-document-summarizer/package.json b/samples/js/tutorial-document-summarizer/package.json new file mode 100644 index 00000000..c3c62321 --- /dev/null +++ b/samples/js/tutorial-document-summarizer/package.json @@ -0,0 +1,9 @@ +{ + "name": "tutorial-document-summarizer", + "version": "1.0.0", + "type": "module", + "main": "app.js", + "dependencies": { + "foundry-local-sdk": "*" + } +} diff --git a/samples/js/tutorial-tool-calling/app.js b/samples/js/tutorial-tool-calling/app.js new file mode 100644 index 00000000..efdd710c --- /dev/null +++ b/samples/js/tutorial-tool-calling/app.js @@ -0,0 +1,186 @@ +// +// +import { FoundryLocalManager } from 'foundry-local-sdk'; +import * as readline from 'readline'; +// + +// +// --- Tool definitions --- +const tools = [ + { + type: 'function', + function: { + name: 'get_weather', + description: 'Get the current weather for a location', + parameters: { + type: 'object', + properties: { + location: { + type: 'string', + description: 'The city or location' + }, + unit: { + type: 'string', + enum: ['celsius', 'fahrenheit'], + description: 'Temperature unit' + } + }, + required: ['location'] + } + } + }, + { + type: 'function', + function: { + name: 'calculate', + description: 'Perform a math calculation', + parameters: { + type: 'object', + properties: { + expression: { + type: 'string', + description: + 'The math expression to evaluate' + } + }, + required: ['expression'] + } + } + } +]; + +// --- Tool implementations --- +function getWeather(location, unit = 'celsius') { + return { + location, + temperature: unit === 'celsius' ? 22 : 72, + unit, + condition: 'Sunny' + }; +} + +function calculate(expression) { + // Input is validated against a strict allowlist of numeric/math characters, + // making this safe from code injection in this tutorial context. + const allowed = /^[0-9+\-*/(). ]+$/; + if (!allowed.test(expression)) { + return { error: 'Invalid expression' }; + } + try { + const result = Function( + `"use strict"; return (${expression})` + )(); + return { expression, result }; + } catch (err) { + return { error: err.message }; + } +} + +const toolFunctions = { + get_weather: (args) => getWeather(args.location, args.unit), + calculate: (args) => calculate(args.expression) +}; +// + +// +async function processToolCalls(messages, response, chatClient) { + let choice = response.choices[0]?.message; + + while (choice?.tool_calls?.length > 0) { + messages.push(choice); + + for (const toolCall of choice.tool_calls) { + const functionName = toolCall.function.name; + const args = JSON.parse(toolCall.function.arguments); + console.log( + ` Tool call: ${functionName}` + + `(${JSON.stringify(args)})` + ); + + const result = toolFunctions[functionName](args); + messages.push({ + role: 'tool', + tool_call_id: toolCall.id, + content: JSON.stringify(result) + }); + } + + response = await chatClient.completeChat( + messages, { tools } + ); + choice = response.choices[0]?.message; + } + + return choice?.content ?? ''; +} +// + +// +// --- Main application --- +const manager = FoundryLocalManager.create({ + appName: 'foundry_local_samples', + logLevel: 'info' +}); + +const model = await manager.catalog.getModel('qwen2.5-0.5b'); + +await model.download((progress) => { + process.stdout.write( + `\rDownloading model: ${progress.toFixed(2)}%` + ); +}); +console.log('\nModel downloaded.'); + +await model.load(); +console.log('Model loaded and ready.'); + +const chatClient = model.createChatClient(); + +const messages = [ + { + role: 'system', + content: + 'You are a helpful assistant with access to tools. ' + + 'Use them when needed to answer questions accurately.' + } +]; + +const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout +}); + +const askQuestion = (prompt) => + new Promise((resolve) => rl.question(prompt, resolve)); + +console.log( + '\nTool-calling assistant ready! Type \'quit\' to exit.\n' +); + +while (true) { + const userInput = await askQuestion('You: '); + if ( + userInput.trim().toLowerCase() === 'quit' || + userInput.trim().toLowerCase() === 'exit' + ) { + break; + } + + messages.push({ role: 'user', content: userInput }); + + const response = await chatClient.completeChat( + messages, { tools } + ); + const answer = await processToolCalls( + messages, response, chatClient + ); + + messages.push({ role: 'assistant', content: answer }); + console.log(`Assistant: ${answer}\n`); +} + +await model.unload(); +console.log('Model unloaded. Goodbye!'); +rl.close(); +// +// diff --git a/samples/js/tutorial-tool-calling/package.json b/samples/js/tutorial-tool-calling/package.json new file mode 100644 index 00000000..07337434 --- /dev/null +++ b/samples/js/tutorial-tool-calling/package.json @@ -0,0 +1,9 @@ +{ + "name": "tutorial-tool-calling", + "version": "1.0.0", + "type": "module", + "main": "app.js", + "dependencies": { + "foundry-local-sdk": "*" + } +} diff --git a/samples/js/tutorial-voice-to-text/app.js b/samples/js/tutorial-voice-to-text/app.js new file mode 100644 index 00000000..08074100 --- /dev/null +++ b/samples/js/tutorial-voice-to-text/app.js @@ -0,0 +1,78 @@ +// +// +import { FoundryLocalManager } from 'foundry-local-sdk'; +import { fileURLToPath } from 'url'; +import path from 'path'; +// + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +// +// Initialize the Foundry Local SDK +const manager = FoundryLocalManager.create({ + appName: 'foundry_local_samples', + logLevel: 'info' +}); +// + +// +// Load the speech-to-text model +const speechModel = await manager.catalog.getModel('whisper-tiny'); +await speechModel.download((progress) => { + process.stdout.write( + `\rDownloading speech model: ${progress.toFixed(2)}%` + ); +}); +console.log('\nSpeech model downloaded.'); + +await speechModel.load(); +console.log('Speech model loaded.'); + +// Transcribe the audio file +const audioClient = speechModel.createAudioClient(); +const transcription = await audioClient.transcribe( + path.join(__dirname, 'meeting-notes.wav') +); +console.log(`\nTranscription:\n${transcription.text}`); + +// Unload the speech model to free memory +await speechModel.unload(); +// + +// +// Load the chat model for summarization +const chatModel = await manager.catalog.getModel('qwen2.5-0.5b'); +await chatModel.download((progress) => { + process.stdout.write( + `\rDownloading chat model: ${progress.toFixed(2)}%` + ); +}); +console.log('\nChat model downloaded.'); + +await chatModel.load(); +console.log('Chat model loaded.'); + +// Summarize the transcription into organized notes +const chatClient = chatModel.createChatClient(); +const messages = [ + { + role: 'system', + content: 'You are a note-taking assistant. Summarize ' + + 'the following transcription into organized, ' + + 'concise notes with bullet points.' + }, + { + role: 'user', + content: transcription.text + } +]; + +const response = await chatClient.completeChat(messages); +const summary = response.choices[0]?.message?.content; +console.log(`\nSummary:\n${summary}`); + +// Clean up +await chatModel.unload(); +console.log('\nDone. Models unloaded.'); +// +// diff --git a/samples/js/tutorial-voice-to-text/package.json b/samples/js/tutorial-voice-to-text/package.json new file mode 100644 index 00000000..55f2ea83 --- /dev/null +++ b/samples/js/tutorial-voice-to-text/package.json @@ -0,0 +1,9 @@ +{ + "name": "tutorial-voice-to-text", + "version": "1.0.0", + "type": "module", + "main": "app.js", + "dependencies": { + "foundry-local-sdk": "*" + } +} diff --git a/samples/js/web-server-example/app.js b/samples/js/web-server-example/app.js index 5e97edfc..b03bf9df 100644 --- a/samples/js/web-server-example/app.js +++ b/samples/js/web-server-example/app.js @@ -1,18 +1,24 @@ +// +// import { FoundryLocalManager } from 'foundry-local-sdk'; import { OpenAI } from 'openai'; +// // Initialize the Foundry Local SDK console.log('Initializing Foundry Local SDK...'); const endpointUrl = 'http://localhost:5764'; +// const manager = FoundryLocalManager.create({ appName: 'foundry_local_samples', logLevel: 'info', webServiceUrls: endpointUrl }); +// console.log('✓ SDK initialized successfully'); +// // Get the model object const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above const model = await manager.catalog.getModel(modelAlias); @@ -28,7 +34,9 @@ console.log('\n✓ Model downloaded'); console.log(`\nLoading model ${modelAlias}...`); await model.load(); console.log('✓ Model loaded'); +// +// // Start the web service console.log('\nStarting web service...'); manager.startWebService(); @@ -52,9 +60,11 @@ const response = await openai.chat.completions.create({ }); console.log(response.choices[0].message.content); +// // Tidy up console.log('Unloading model and stopping web service...'); await model.unload(); manager.stopWebService(); console.log(`✓ Model unloaded and web service stopped`); +// diff --git a/samples/python/audio-transcription/Recording.mp3 b/samples/python/audio-transcription/Recording.mp3 new file mode 100644 index 00000000..deb38418 Binary files /dev/null and b/samples/python/audio-transcription/Recording.mp3 differ diff --git a/samples/python/audio-transcription/requirements.txt b/samples/python/audio-transcription/requirements.txt new file mode 100644 index 00000000..c79aa6dd --- /dev/null +++ b/samples/python/audio-transcription/requirements.txt @@ -0,0 +1 @@ +foundry-local-sdk diff --git a/samples/python/audio-transcription/src/app.py b/samples/python/audio-transcription/src/app.py new file mode 100644 index 00000000..20f9be04 --- /dev/null +++ b/samples/python/audio-transcription/src/app.py @@ -0,0 +1,39 @@ +# +# +import sys +from foundry_local_sdk import Configuration, FoundryLocalManager +# + + +# +# Initialize the Foundry Local SDK +config = Configuration(app_name="foundry_local_samples") +FoundryLocalManager.initialize(config) +manager = FoundryLocalManager.instance + +# Load the whisper model for speech-to-text +model = manager.catalog.get_model("whisper-tiny") +model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True, + ) +) +print() +model.load() +print("Model loaded.") +# + +# +# Get the audio client and transcribe +audio_client = model.get_audio_client() +audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3" +result = audio_client.transcribe(audio_file) +print("Transcription:") +print(result.text) +# + +# Clean up +model.unload() +# diff --git a/samples/python/functioncalling/README.md b/samples/python/functioncalling/README.md deleted file mode 100644 index 71048eae..00000000 --- a/samples/python/functioncalling/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Foundry Local Function Calling Configuration Guide - -This guide walks you through enabling function calling support in Foundry Local with Phi-4-mini. - -## Prerequisites - -- Foundry Local version 0.5.100 or higher -- Access to modify model configuration files - -## Setup Instructions - -### Step 1: Install Foundry Local - -Ensure you have Foundry Local version 0.5.100 or higher installed on your system. - -### Step 2: Configure Phi-4-mini Chat Template - -Replace the existing **inference_model.json** file for Phi-4-mini with the following configuration: - -```json -{ - "Name": "Phi-4-mini-instruct-generic-cpu", - "PromptTemplate": { - "system": "<|system|>{Content}<|tool|>{Tool}<|/tool|><|end|>", - "user": "<|user|>{Content}<|end|>", - "assistant": "<|assistant|>{Content}<|end|>", - "tool": "<|tool|>{Tool}<|/tool|>", - "prompt": "<|system|> You are a helpful assistant with these tools. If you decide to call functions:\n* prefix function calls with functools marker (no closing marker required)\n* all function calls should be generated in a single JSON list formatted as functools[{\"name\": [function name], \"arguments\": [function arguments as JSON]}, ...]\n * follow the provided JSON schema. Do not hallucinate arguments or values. Do not blindly copy values from the provided samples\n * respect the argument type formatting. E.g., if the type is number and format is float, write value 7 as 7.0\n * make sure you pick the right functions that match the user intent<|end|><|user|>{Content}<|end|><|assistant|>" - } -} -``` - -### Step 3: Restart Foundry Service - -Execute the following command in your terminal to restart the Foundry service: - -```bash -foundry service restart -``` - -### Step 4: Test the Configuration - -Run the provided [Notebook](./fl_tools..ipynb) to test and validate the function calling functionality. - -## Related Resources - -- **Test Notebook**: [fl_tools.ipynb](./fl_tools..ipynb) - -## Notes - -- The configuration enables proper function calling syntax with the `functools` marker -- Ensure all JSON formatting rules are followed when the model generates function calls -- The system prompt includes specific instructions for proper function argument handling \ No newline at end of file diff --git a/samples/python/functioncalling/fl_tools.ipynb b/samples/python/functioncalling/fl_tools.ipynb deleted file mode 100644 index 0f9c76ed..00000000 --- a/samples/python/functioncalling/fl_tools.ipynb +++ /dev/null @@ -1,362 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "1fd99963", - "metadata": {}, - "source": [ - "# Function Calling Examples with Foundry Local\n", - "\n", - "This notebook demonstrates how to use function calling capabilities with Foundry Local SDK and OpenAI API.\n", - "\n", - "## Package Installation\n", - "\n", - "Install the OpenAI package for API communication:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "537d41f7", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: openai in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (1.93.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from openai) (4.9.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from openai) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from openai) (0.28.1)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from openai) (0.10.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from openai) (2.11.7)\n", - "Requirement already satisfied: sniffio in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from openai) (1.3.1)\n", - "Requirement already satisfied: tqdm>4 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from openai) (4.67.1)\n", - "Requirement already satisfied: typing-extensions<5,>=4.11 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from openai) (4.14.0)\n", - "Requirement already satisfied: idna>=2.8 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from anyio<5,>=3.5.0->openai) (3.10)\n", - "Requirement already satisfied: certifi in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (2025.6.15)\n", - "Requirement already satisfied: httpcore==1.* in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (1.0.9)\n", - "Requirement already satisfied: h11>=0.16 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.16.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.33.2 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (2.33.2)\n", - "Requirement already satisfied: typing-inspection>=0.4.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.4.1)\n", - "Requirement already satisfied: colorama in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from tqdm>4->openai) (0.4.6)\n" - ] - } - ], - "source": [ - "! pip install openai" - ] - }, - { - "cell_type": "markdown", - "id": "6ee98d72", - "metadata": {}, - "source": [ - "Install the Foundry Local SDK for local model management:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "810ac3f4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: foundry-local-sdk in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (0.3.1)\n", - "Requirement already satisfied: httpx in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from foundry-local-sdk) (0.28.1)\n", - "Requirement already satisfied: pydantic>=2.0.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from foundry-local-sdk) (2.11.7)\n", - "Requirement already satisfied: tqdm in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from foundry-local-sdk) (4.67.1)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from pydantic>=2.0.0->foundry-local-sdk) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.33.2 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from pydantic>=2.0.0->foundry-local-sdk) (2.33.2)\n", - "Requirement already satisfied: typing-extensions>=4.12.2 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from pydantic>=2.0.0->foundry-local-sdk) (4.14.0)\n", - "Requirement already satisfied: typing-inspection>=0.4.0 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from pydantic>=2.0.0->foundry-local-sdk) (0.4.1)\n", - "Requirement already satisfied: anyio in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from httpx->foundry-local-sdk) (4.9.0)\n", - "Requirement already satisfied: certifi in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from httpx->foundry-local-sdk) (2025.6.15)\n", - "Requirement already satisfied: httpcore==1.* in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from httpx->foundry-local-sdk) (1.0.9)\n", - "Requirement already satisfied: idna in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from httpx->foundry-local-sdk) (3.10)\n", - "Requirement already satisfied: h11>=0.16 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from httpcore==1.*->httpx->foundry-local-sdk) (0.16.0)\n", - "Requirement already satisfied: sniffio>=1.1 in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from anyio->httpx->foundry-local-sdk) (1.3.1)\n", - "Requirement already satisfied: colorama in c:\\users\\kinfeylo\\appdata\\local\\miniforge3\\envs\\pydev\\lib\\site-packages (from tqdm->foundry-local-sdk) (0.4.6)\n" - ] - } - ], - "source": [ - "! pip install foundry-local-sdk" - ] - }, - { - "cell_type": "markdown", - "id": "abe07aeb", - "metadata": {}, - "source": [ - "## Setup and Configuration\n", - "\n", - "Import the FoundryLocalManager for managing local models:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b21785a2", - "metadata": {}, - "outputs": [], - "source": [ - "from foundry_local import FoundryLocalManager" - ] - }, - { - "cell_type": "markdown", - "id": "9335da67", - "metadata": {}, - "source": [ - "Define the model alias that will be used throughout this example:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "503f23fa", - "metadata": {}, - "outputs": [], - "source": [ - "alias = \"phi-4-mini\"" - ] - }, - { - "cell_type": "markdown", - "id": "5a9b1ecf", - "metadata": {}, - "source": [ - "Create a FoundryLocalManager instance using the specified model alias:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "804611d5", - "metadata": {}, - "outputs": [], - "source": [ - "manager = FoundryLocalManager(alias)" - ] - }, - { - "cell_type": "markdown", - "id": "c81e6f38", - "metadata": {}, - "source": [ - "Import the OpenAI library for API interactions:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "1ab277b9", - "metadata": {}, - "outputs": [], - "source": [ - "import openai" - ] - }, - { - "cell_type": "markdown", - "id": "e8a95a1b", - "metadata": {}, - "source": [ - "Create an OpenAI client using the local endpoint and API key from the manager:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "e837fa0a", - "metadata": {}, - "outputs": [], - "source": [ - "client = openai.OpenAI(\n", - " base_url=manager.endpoint,\n", - " api_key=manager.api_key # API key is not required for local usage\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "ac745238", - "metadata": {}, - "source": [ - "## Function Definitions\n", - "\n", - "Define the available tools/functions for the AI model. This includes flight booking and hotel booking functions:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "bea7d21e", - "metadata": {}, - "outputs": [], - "source": [ - "tool_list = '[{\"name\": \"booking_flight_tickets\", \"description\": \"booking flights\", \"parameters\": {\"origin_airport_code\": {\"description\": \"The name of Departure airport code\", \"type\": \"string\"}, \"destination_airport_code\": {\"description\": \"The name of Destination airport code\", \"type\": \"string\"}, \"departure_date\": {\"description\": \"The date of outbound flight\", \"type\": \"string\"}, \"return_date\": {\"description\": \"The date of return flight\", \"type\": \"string\"}}}, {\"name\": \"booking_hotels\", \"description\": \"booking hotel\", \"parameters\": {\"destination\": {\"description\": \"The name of the city\", \"type\": \"string\"}, \"check_in_date\": {\"description\": \"The date of check in\", \"type\": \"string\"}, \"checkout_date\": {\"description\": \"The date of check out\", \"type\": \"string\"}}}]'" - ] - }, - { - "cell_type": "markdown", - "id": "3d255cf1", - "metadata": {}, - "source": [ - "## Parallel Function Support" - ] - }, - { - "cell_type": "markdown", - "id": "c98886fc", - "metadata": {}, - "source": [ - "Create a chat completion request that will trigger multiple function calls (flight booking and hotel booking) in parallel:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "8e616290", - "metadata": {}, - "outputs": [], - "source": [ - "stream = client.chat.completions.create(\n", - " model=manager.get_model_info(alias).id,\n", - " messages=[{\"role\": \"user\", \"content\": \"book flight ticket from Beijing to Paris(using airport code) in 2025-12-04 to 2025-12-10 , then book hotel from 2025-12-04 to 2025-12-10 in Paris\"}],\n", - " tools=[{\"name\": \"booking_flight_tickets\", \"description\": \"booking flights\", \"parameters\": {\"origin_airport_code\": {\"description\": \"The name of Departure airport code\", \"type\": \"string\"}, \"destination_airport_code\": {\"description\": \"The name of Destination airport code\", \"type\": \"string\"}, \"departure_date\": {\"description\": \"The date of outbound flight\", \"type\": \"string\"}, \"return_date\": {\"description\": \"The date of return flight\", \"type\": \"string\"}}}, {\"name\": \"booking_hotels\", \"description\": \"booking hotel\", \"parameters\": {\"destination\": {\"description\": \"The name of the city\", \"type\": \"string\"}, \"check_in_date\": {\"description\": \"The date of check in\", \"type\": \"string\"}, \"checkout_date\": {\"description\": \"The date of check out\", \"type\": \"string\"}}}],\n", - " temperature=0.00001,\n", - " max_tokens=4096,\n", - " top_p = 1.0,\n", - " stream=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "3b8840e8", - "metadata": {}, - "source": [ - "Process and display the streaming response from the model:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "7f96f7c2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "functools[{\"name\": \"booking_flight_tickets\", \"arguments\": {\"origin_airport_code\": \"PEK\", \"destination_airport_code\": \"CDG\", \"departure_date\": \"2025-12-04\", \"return_date\": \"2025-12-10\"}}, {\"name\": \"booking_hotels\", \"arguments\": {\"destination\": \"Paris\", \"check_in_date\": \"2025-12-04\", \"checkout_date\": \"2025-12-10\"}}]" - ] - } - ], - "source": [ - "for chunk in stream:\n", - " if chunk.choices[0].delta.content is not None:\n", - " print(chunk.choices[0].delta.content, end=\"\", flush=True)" - ] - }, - { - "cell_type": "markdown", - "id": "3589be65", - "metadata": {}, - "source": [ - "## Single Function Support" - ] - }, - { - "cell_type": "markdown", - "id": "def59c2b", - "metadata": {}, - "source": [ - "Create a chat completion request for a single function call (weather inquiry). Note: This example shows a different format for defining tools:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "2d5ed823", - "metadata": {}, - "outputs": [], - "source": [ - "stream = client.chat.completions.create(\n", - " model=manager.get_model_info(alias).id,\n", - " messages=[{\"role\": \"user\", \"content\": \"What is the weather today in Paris?\"}],\n", - " tools=[\n", - " {\n", - " \"function\": {\n", - " \"name\": \"get_current_weather\",\n", - " \"arguments\": {\n", - " \"format\": \"celsius\",\n", - " \"location\": \"Paris\"\n", - " }\n", - " }\n", - " }\n", - " ],\n", - " temperature=0.00001,\n", - " max_tokens=4096,\n", - " top_p = 1.0,\n", - " stream=True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "2907e462", - "metadata": {}, - "source": [ - "Process and display the streaming response for the single function call:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "e8ef8b66", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "functools[{\"name\": \"get_current_weather\", \"arguments\": {\"format\": \"celsius\", \"location\": \"Paris\"}}]" - ] - } - ], - "source": [ - "for chunk in stream:\n", - " if chunk.choices[0].delta.content is not None:\n", - " print(chunk.choices[0].delta.content, end=\"\", flush=True)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "pydev", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/samples/python/hello-foundry-local/README.md b/samples/python/hello-foundry-local/README.md deleted file mode 100644 index c7753a88..00000000 --- a/samples/python/hello-foundry-local/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Sample: Hello Foundry Local! - -This is a simple example of how to use the Foundry Local SDK to run a model locally and make requests to it. The example demonstrates how to set up the SDK, initialize a model, and make a request to the model. - -Install the Foundry Local SDK and OpenAI packages using pip: - -```bash -pip install foundry-local-sdk openai -``` - -> [!TIP] -> We recommend using a virtual environment to manage your Python packages using `venv` or `conda` to avoid conflicts with other packages. - -Run the application using Python: - -```bash -python src/app.py -``` diff --git a/samples/python/hello-foundry-local/src/app.py b/samples/python/hello-foundry-local/src/app.py deleted file mode 100644 index 8bd21c62..00000000 --- a/samples/python/hello-foundry-local/src/app.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -import openai -from foundry_local import FoundryLocalManager - -# By using an alias, the most suitable model will be downloaded -# to your end-user's device. -alias = "qwen2.5-coder-0.5b" - -# Create a FoundryLocalManager instance. This will start the Foundry -# Local service if it is not already running and load the specified model. -manager = FoundryLocalManager(alias) - -# The remaining code uses the OpenAI Python SDK to interact with the local model. - -# Configure the client to use the local Foundry service -client = openai.OpenAI( - base_url=manager.endpoint, - api_key=manager.api_key, # API key is not required for local usage -) - -# Set the model to use and generate a streaming response -stream = client.chat.completions.create( - model=manager.get_model_info(alias).id, - messages=[{"role": "user", "content": "What is the golden ratio?"}], - stream=True, -) - -# Print the streaming response -for chunk in stream: - if chunk.choices[0].delta.content is not None: - print(chunk.choices[0].delta.content, end="", flush=True) diff --git a/samples/python/langchain-integration/requirements.txt b/samples/python/langchain-integration/requirements.txt new file mode 100644 index 00000000..0ded700a --- /dev/null +++ b/samples/python/langchain-integration/requirements.txt @@ -0,0 +1,4 @@ +foundry-local-sdk +openai +langchain-openai +langchain-core diff --git a/samples/python/langchain-integration/src/app.py b/samples/python/langchain-integration/src/app.py new file mode 100644 index 00000000..1dd00224 --- /dev/null +++ b/samples/python/langchain-integration/src/app.py @@ -0,0 +1,59 @@ +# +# +from foundry_local_sdk import Configuration, FoundryLocalManager +from langchain_openai import ChatOpenAI +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.output_parsers import StrOutputParser +# + +# +# Initialize the Foundry Local SDK +config = Configuration(app_name="foundry_local_samples") +FoundryLocalManager.initialize(config) +manager = FoundryLocalManager.instance + +# Load a model +model = manager.catalog.get_model("qwen2.5-0.5b") +model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True, + ) +) +print() +model.load() +print("Model loaded.") + +# Start the web service to expose an OpenAI-compatible endpoint +manager.start_web_service() +base_url = f"{manager.urls[0]}/v1" +# + +# +# Create a LangChain ChatOpenAI instance pointing to the local endpoint +llm = ChatOpenAI( + base_url=base_url, + api_key="none", + model=model.id, +) +# + +# +# Create a translation chain +prompt = ChatPromptTemplate.from_messages([ + ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."), + ("user", "{text}") +]) + +chain = prompt | llm | StrOutputParser() + +# Run the chain +result = chain.invoke({"language": "Spanish", "text": "Hello, how are you today?"}) +print(f"Translation: {result}") +# + +# Clean up +model.unload() +manager.stop_web_service() +# diff --git a/samples/python/native-chat-completions/requirements.txt b/samples/python/native-chat-completions/requirements.txt new file mode 100644 index 00000000..c79aa6dd --- /dev/null +++ b/samples/python/native-chat-completions/requirements.txt @@ -0,0 +1 @@ +foundry-local-sdk diff --git a/samples/python/native-chat-completions/src/app.py b/samples/python/native-chat-completions/src/app.py new file mode 100644 index 00000000..ca087b77 --- /dev/null +++ b/samples/python/native-chat-completions/src/app.py @@ -0,0 +1,54 @@ +# +# +import asyncio +from foundry_local_sdk import Configuration, FoundryLocalManager +# + + +async def main(): + # + # Initialize the Foundry Local SDK + config = Configuration(app_name="foundry_local_samples") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + + # Select and load a model from the catalog + model = manager.catalog.get_model("qwen2.5-0.5b") + model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True, + ) + ) + print() + model.load() + print("Model loaded and ready.") + + # Get a chat client + client = model.get_chat_client() + # + + # + # Create the conversation messages + messages = [ + {"role": "user", "content": "What is the golden ratio?"} + ] + + # Stream the response token by token + print("Assistant: ", end="", flush=True) + for chunk in client.complete_streaming_chat(messages): + content = chunk.choices[0].delta.content + if content: + print(content, end="", flush=True) + print() + # + + # Clean up + model.unload() + print("Model unloaded.") + + +if __name__ == "__main__": + asyncio.run(main()) +# diff --git a/samples/python/summarize/.vscode/launch.json b/samples/python/summarize/.vscode/launch.json deleted file mode 100644 index 62c83dcf..00000000 --- a/samples/python/summarize/.vscode/launch.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "version": "0.2.0", - "configurations": [ - - { - "name": "Python Debugger: Current File with Arguments", - "type": "debugpy", - "request": "launch", - "program": "${file}", - "console": "integratedTerminal", - "args": "\"The quick brown fox jumps over the lazy dog, packing my box with five dozen liquor jugs, and then the dog chased the fox around the corner of the house.\" --text" - } - ] -} diff --git a/samples/python/summarize/README.md b/samples/python/summarize/README.md deleted file mode 100644 index 9fa753d1..00000000 --- a/samples/python/summarize/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Text Summarizer - -A simple command-line utility that uses Foundry Local to generate summaries of text files or direct text input. - -## Setup - -1. Install the required dependencies: - ```bash - pip install -r requirements.txt - ``` - -## Usage - -The utility can be used in two ways: - -1. Summarize a text file: - ```bash - python summarize.py path/to/your/file.txt - ``` - -2. Summarize direct text input: - ```bash - python summarize.py "Your text to summarize here" --text - ``` - -You can also specify which model to use with the `--model` parameter: - ```bash - python summarize.py path/to/your/file.txt --model "your-model-alias" - ``` - -If the specified model is not found, the script will use the first available model. - -## Requirements - -- Python 3.6 or higher -- Foundry Local Service -- Required Python packages (see requirements.txt) - diff --git a/samples/python/summarize/requirements.txt b/samples/python/summarize/requirements.txt deleted file mode 100644 index 7b37f256..00000000 --- a/samples/python/summarize/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -openai>=1.0.0 -python-dotenv>=0.19.0 -foundry-local-sdk>=0.3.1 diff --git a/samples/python/summarize/summarize.py b/samples/python/summarize/summarize.py deleted file mode 100644 index c2b00ba7..00000000 --- a/samples/python/summarize/summarize.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import argparse -from openai import OpenAI -from foundry_local import FoundryLocalManager - - -def read_file_content(file_path): - """Read content from a file.""" - try: - with open(file_path, "r", encoding="utf-8") as file: - return file.read() - except Exception as e: - print(f"Error reading file: {e}") - sys.exit(1) - - -def get_summary(text, client, model_name): - """Get summary from OpenAI API.""" - try: - response = client.chat.completions.create( - model=model_name, - messages=[ - { - "role": "system", - "content": "You are a helpful assistant that summarizes text. Provide a concise summary.", - }, - {"role": "user", "content": f"Please summarize the following text:\n\n{text}"}, - ], - ) - return response.choices[0].message.content - except Exception as e: - print(f"Error getting summary from OpenAI: {e}") - sys.exit(1) - - -def main(): - parser = argparse.ArgumentParser(description="Summarize text from a file or string using OpenAI.") - parser.add_argument("input", help="File path or text string to summarize") - parser.add_argument("--text", action="store_true", help="Treat input as direct text instead of a file path") - parser.add_argument("--model", help="Model alias to use for summarization") - args = parser.parse_args() - - fl_manager = FoundryLocalManager() - - fl_manager.start_service() - - model_list = fl_manager.list_cached_models() - - if not model_list: - print("No downloaded models available") - sys.exit(1) - - # Select model based on alias or use first one - if args.model: - selected_model = next((model for model in model_list if model.alias == args.model), None) - if selected_model: - model_name = selected_model.id - else: - model_name = model_list[0].id - print(f"Model alias '{args.model}' not found, using default model: {model_name}") - else: - model_name = model_list[0].id - - print(f"Using model: {model_name}") - - # Initialize OpenAI client - client = OpenAI(base_url=fl_manager.endpoint, api_key=fl_manager.api_key) - - # Get input text - if args.text: - text = args.input - else: - text = read_file_content(args.input) - - # Get and print summary - summary = get_summary(text, client, model_name) - print("\nSummary:") - print("-" * 50) - print(summary) - print("-" * 50) - - -if __name__ == "__main__": - main() diff --git a/samples/python/tool-calling/requirements.txt b/samples/python/tool-calling/requirements.txt new file mode 100644 index 00000000..c79aa6dd --- /dev/null +++ b/samples/python/tool-calling/requirements.txt @@ -0,0 +1 @@ +foundry-local-sdk diff --git a/samples/python/tool-calling/src/app.py b/samples/python/tool-calling/src/app.py new file mode 100644 index 00000000..ac00b023 --- /dev/null +++ b/samples/python/tool-calling/src/app.py @@ -0,0 +1,182 @@ +# +# +import asyncio +import json +from foundry_local_sdk import Configuration, FoundryLocalManager +# + + +# +# --- Tool definitions --- +tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city or location" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "Temperature unit" + } + }, + "required": ["location"] + } + } + }, + { + "type": "function", + "function": { + "name": "calculate", + "description": "Perform a math calculation", + "parameters": { + "type": "object", + "properties": { + "expression": { + "type": "string", + "description": ( + "The math expression to evaluate" + ) + } + }, + "required": ["expression"] + } + } + } +] + + +# --- Tool implementations --- +def get_weather(location, unit="celsius"): + """Simulate a weather lookup.""" + return { + "location": location, + "temperature": 22 if unit == "celsius" else 72, + "unit": unit, + "condition": "Sunny" + } + + +def calculate(expression): + """Evaluate a math expression safely.""" + allowed = set("0123456789+-*/(). ") + if not all(c in allowed for c in expression): + return {"error": "Invalid expression"} + try: + result = eval(expression) + return {"expression": expression, "result": result} + except Exception as e: + return {"error": str(e)} + + +tool_functions = { + "get_weather": get_weather, + "calculate": calculate +} +# + + +# +def process_tool_calls(messages, response, client): + """Handle tool calls in a loop until the model produces a final answer.""" + choice = response.choices[0].message + + while choice.tool_calls: + # Convert the assistant message to a dict for the SDK + assistant_msg = { + "role": "assistant", + "content": choice.content, + "tool_calls": [ + { + "id": tc.id, + "type": tc.type, + "function": { + "name": tc.function.name, + "arguments": tc.function.arguments, + }, + } + for tc in choice.tool_calls + ], + } + messages.append(assistant_msg) + + for tool_call in choice.tool_calls: + function_name = tool_call.function.name + arguments = json.loads(tool_call.function.arguments) + print(f" Tool call: {function_name}({arguments})") + + # Execute the function and add the result + func = tool_functions[function_name] + result = func(**arguments) + messages.append({ + "role": "tool", + "tool_call_id": tool_call.id, + "content": json.dumps(result) + }) + + # Send the updated conversation back + response = client.complete_chat(messages, tools=tools) + choice = response.choices[0].message + + return choice.content +# + + +# +async def main(): + # Initialize the Foundry Local SDK + config = Configuration(app_name="foundry_local_samples") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + + # Select and load a model + model = manager.catalog.get_model("qwen2.5-0.5b") + model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True + ) + ) + print() + model.load() + print("Model loaded and ready.") + + # Get a chat client + client = model.get_chat_client() + + # Conversation with a system prompt + messages = [ + { + "role": "system", + "content": "You are a helpful assistant with access to tools. " + "Use them when needed to answer questions accurately." + }, + { + "role": "user", + "content": "What is the weather in Seattle and what is 42 * 17?" + } + ] + + print("Sending request with tools...") + response = client.complete_chat(messages, tools=tools) + answer = process_tool_calls(messages, response, client) + + print(f"\nAssistant: {answer}") + + # Clean up + model.unload() + print("Model unloaded.") +# + + +if __name__ == "__main__": + asyncio.run(main()) +# diff --git a/samples/python/tutorial-chat-assistant/requirements.txt b/samples/python/tutorial-chat-assistant/requirements.txt new file mode 100644 index 00000000..c79aa6dd --- /dev/null +++ b/samples/python/tutorial-chat-assistant/requirements.txt @@ -0,0 +1 @@ +foundry-local-sdk diff --git a/samples/python/tutorial-chat-assistant/src/app.py b/samples/python/tutorial-chat-assistant/src/app.py new file mode 100644 index 00000000..05fa0bcc --- /dev/null +++ b/samples/python/tutorial-chat-assistant/src/app.py @@ -0,0 +1,71 @@ +# +# +import asyncio +from foundry_local_sdk import Configuration, FoundryLocalManager +# + + +async def main(): + # + # Initialize the Foundry Local SDK + config = Configuration(app_name="foundry_local_samples") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + + # Select and load a model from the catalog + model = manager.catalog.get_model("qwen2.5-0.5b") + model.download(lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True)) + print() + model.load() + print("Model loaded and ready.") + + # Get a chat client + client = model.get_chat_client() + # + + # + # Start the conversation with a system prompt + messages = [ + { + "role": "system", + "content": "You are a helpful, friendly assistant. Keep your responses " + "concise and conversational. If you don't know something, say so." + } + ] + # + + print("\nChat assistant ready! Type 'quit' to exit.\n") + + # + while True: + user_input = input("You: ") + if user_input.strip().lower() in ("quit", "exit"): + break + + # Add the user's message to conversation history + messages.append({"role": "user", "content": user_input}) + + # + # Stream the response token by token + print("Assistant: ", end="", flush=True) + full_response = "" + for chunk in client.complete_streaming_chat(messages): + content = chunk.choices[0].message.content + if content: + print(content, end="", flush=True) + full_response += content + print("\n") + # + + # Add the complete response to conversation history + messages.append({"role": "assistant", "content": full_response}) + # + + # Clean up - unload the model + model.unload() + print("Model unloaded. Goodbye!") + + +if __name__ == "__main__": + asyncio.run(main()) +# diff --git a/samples/python/tutorial-document-summarizer/requirements.txt b/samples/python/tutorial-document-summarizer/requirements.txt new file mode 100644 index 00000000..c79aa6dd --- /dev/null +++ b/samples/python/tutorial-document-summarizer/requirements.txt @@ -0,0 +1 @@ +foundry-local-sdk diff --git a/samples/python/tutorial-document-summarizer/src/app.py b/samples/python/tutorial-document-summarizer/src/app.py new file mode 100644 index 00000000..3a62fe24 --- /dev/null +++ b/samples/python/tutorial-document-summarizer/src/app.py @@ -0,0 +1,78 @@ +# +# +import asyncio +import sys +from pathlib import Path +from foundry_local_sdk import Configuration, FoundryLocalManager +# + + +async def summarize_file(client, file_path, system_prompt): + """Summarize a single file and print the result.""" + content = Path(file_path).read_text(encoding="utf-8") + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": content} + ] + response = client.complete_chat(messages) + print(response.choices[0].message.content) + + +async def summarize_directory(client, directory, system_prompt): + """Summarize all .txt files in a directory.""" + txt_files = sorted(Path(directory).glob("*.txt")) + + if not txt_files: + print(f"No .txt files found in {directory}") + return + + for txt_file in txt_files: + print(f"--- {txt_file.name} ---") + await summarize_file(client, txt_file, system_prompt) + print() + + +async def main(): + # + # Initialize the Foundry Local SDK + config = Configuration(app_name="foundry_local_samples") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + + # Select and load a model from the catalog + model = manager.catalog.get_model("qwen2.5-0.5b") + model.download(lambda p: print(f"\rDownloading model: {p:.2f}%", end="", flush=True)) + print() + model.load() + print("Model loaded and ready.\n") + + # Get a chat client + client = model.get_chat_client() + # + + # + system_prompt = ( + "Summarize the following document into concise bullet points. " + "Focus on the key points and main ideas." + ) + + # + target = sys.argv[1] if len(sys.argv) > 1 else "document.txt" + target_path = Path(target) + # + + if target_path.is_dir(): + await summarize_directory(client, target_path, system_prompt) + else: + print(f"--- {target_path.name} ---") + await summarize_file(client, target_path, system_prompt) + # + + # Clean up + model.unload() + print("\nModel unloaded. Done!") + + +if __name__ == "__main__": + asyncio.run(main()) +# diff --git a/samples/python/tutorial-tool-calling/requirements.txt b/samples/python/tutorial-tool-calling/requirements.txt new file mode 100644 index 00000000..c79aa6dd --- /dev/null +++ b/samples/python/tutorial-tool-calling/requirements.txt @@ -0,0 +1 @@ +foundry-local-sdk diff --git a/samples/python/tutorial-tool-calling/src/app.py b/samples/python/tutorial-tool-calling/src/app.py new file mode 100644 index 00000000..b26085f6 --- /dev/null +++ b/samples/python/tutorial-tool-calling/src/app.py @@ -0,0 +1,187 @@ +# +# +import asyncio +import json +from foundry_local_sdk import Configuration, FoundryLocalManager +# + + +# +# --- Tool definitions --- +tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city or location" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "Temperature unit" + } + }, + "required": ["location"] + } + } + }, + { + "type": "function", + "function": { + "name": "calculate", + "description": "Perform a math calculation", + "parameters": { + "type": "object", + "properties": { + "expression": { + "type": "string", + "description": ( + "The math expression to evaluate" + ) + } + }, + "required": ["expression"] + } + } + } +] + + +# --- Tool implementations --- +def get_weather(location, unit="celsius"): + """Simulate a weather lookup.""" + return { + "location": location, + "temperature": 22 if unit == "celsius" else 72, + "unit": unit, + "condition": "Sunny" + } + + +def calculate(expression): + """Evaluate a math expression safely.""" + allowed = set("0123456789+-*/(). ") + if not all(c in allowed for c in expression): + return {"error": "Invalid expression"} + try: + result = eval(expression) + return {"expression": expression, "result": result} + except Exception as e: + return {"error": str(e)} + + +tool_functions = { + "get_weather": get_weather, + "calculate": calculate +} +# + + +# +def process_tool_calls(messages, response, client): + """Handle tool calls in a loop until the model produces a final answer.""" + choice = response.choices[0].message + + while choice.tool_calls: + # Convert the assistant message to a dict for the SDK + assistant_msg = { + "role": "assistant", + "content": choice.content, + "tool_calls": [ + { + "id": tc.id, + "type": tc.type, + "function": { + "name": tc.function.name, + "arguments": tc.function.arguments, + }, + } + for tc in choice.tool_calls + ], + } + messages.append(assistant_msg) + + for tool_call in choice.tool_calls: + function_name = tool_call.function.name + arguments = json.loads(tool_call.function.arguments) + print(f" Tool call: {function_name}({arguments})") + + # Execute the function and add the result + func = tool_functions[function_name] + result = func(**arguments) + messages.append({ + "role": "tool", + "tool_call_id": tool_call.id, + "content": json.dumps(result) + }) + + # Send the updated conversation back + response = client.complete_chat(messages, tools=tools) + choice = response.choices[0].message + + return choice.content +# + + +# +async def main(): + # Initialize the Foundry Local SDK + config = Configuration(app_name="foundry_local_samples") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + + # Select and load a model + model = manager.catalog.get_model("qwen2.5-0.5b") + model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True + ) + ) + print() + model.load() + print("Model loaded and ready.") + + # Get a chat client + client = model.get_chat_client() + + # Conversation with a system prompt + messages = [ + { + "role": "system", + "content": "You are a helpful assistant with access to tools. " + "Use them when needed to answer questions accurately." + } + ] + + print("\nTool-calling assistant ready! Type 'quit' to exit.\n") + + while True: + user_input = input("You: ") + if user_input.strip().lower() in ("quit", "exit"): + break + + messages.append({"role": "user", "content": user_input}) + + response = client.complete_chat(messages, tools=tools) + answer = process_tool_calls(messages, response, client) + + messages.append({"role": "assistant", "content": answer}) + print(f"Assistant: {answer}\n") + + # Clean up + model.unload() + print("Model unloaded. Goodbye!") +# + + +if __name__ == "__main__": + asyncio.run(main()) +# diff --git a/samples/python/tutorial-voice-to-text/requirements.txt b/samples/python/tutorial-voice-to-text/requirements.txt new file mode 100644 index 00000000..c79aa6dd --- /dev/null +++ b/samples/python/tutorial-voice-to-text/requirements.txt @@ -0,0 +1 @@ +foundry-local-sdk diff --git a/samples/python/tutorial-voice-to-text/src/app.py b/samples/python/tutorial-voice-to-text/src/app.py new file mode 100644 index 00000000..4174e5ac --- /dev/null +++ b/samples/python/tutorial-voice-to-text/src/app.py @@ -0,0 +1,78 @@ +# +# +import asyncio +from foundry_local_sdk import Configuration, FoundryLocalManager +# + + +async def main(): + # + # Initialize the Foundry Local SDK + config = Configuration(app_name="foundry_local_samples") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + # + + # + # Load the speech-to-text model + speech_model = manager.catalog.get_model("whisper-tiny") + speech_model.download( + lambda progress: print( + f"\rDownloading speech model: {progress:.2f}%", + end="", + flush=True, + ) + ) + print() + speech_model.load() + print("Speech model loaded.") + + # Transcribe the audio file + audio_client = speech_model.get_audio_client() + transcription = audio_client.transcribe("meeting-notes.wav") + print(f"\nTranscription:\n{transcription.text}") + + # Unload the speech model to free memory + speech_model.unload() + # + + # + # Load the chat model for summarization + chat_model = manager.catalog.get_model("qwen2.5-0.5b") + chat_model.download( + lambda progress: print( + f"\rDownloading chat model: {progress:.2f}%", + end="", + flush=True, + ) + ) + print() + chat_model.load() + print("Chat model loaded.") + + # Summarize the transcription into organized notes + client = chat_model.get_chat_client() + messages = [ + { + "role": "system", + "content": "You are a note-taking assistant. " + "Summarize the following transcription " + "into organized, concise notes with " + "bullet points.", + }, + {"role": "user", "content": transcription.text}, + ] + + response = client.complete_chat(messages) + summary = response.choices[0].message.content + print(f"\nSummary:\n{summary}") + + # Clean up + chat_model.unload() + print("\nDone. Models unloaded.") + # + + +if __name__ == "__main__": + asyncio.run(main()) +# diff --git a/samples/python/web-server/requirements.txt b/samples/python/web-server/requirements.txt new file mode 100644 index 00000000..5a0f14ae --- /dev/null +++ b/samples/python/web-server/requirements.txt @@ -0,0 +1,2 @@ +foundry-local-sdk +openai diff --git a/samples/python/web-server/src/app.py b/samples/python/web-server/src/app.py new file mode 100644 index 00000000..dc554ad9 --- /dev/null +++ b/samples/python/web-server/src/app.py @@ -0,0 +1,59 @@ +# +# +import openai +from foundry_local_sdk import Configuration, FoundryLocalManager +# + +# +# Initialize the Foundry Local SDK +config = Configuration(app_name="foundry_local_samples") +FoundryLocalManager.initialize(config) +manager = FoundryLocalManager.instance + +# Load a model +model = manager.catalog.get_model("qwen2.5-0.5b") +model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True, + ) +) +print() +model.load() +print("Model loaded.") + +# Start the web service to expose an OpenAI-compatible REST endpoint +manager.start_web_service() +base_url = f"{manager.urls[0]}/v1" +# + +# +# Use the OpenAI SDK to connect to the local REST endpoint +client = openai.OpenAI( + base_url=base_url, + api_key="none", +) +# + +# +# Make a chat completion request via the REST API +response = client.chat.completions.create( + model=model.id, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the golden ratio?"} + ], + stream=True, +) + +for chunk in response: + if chunk.choices[0].delta.content is not None: + print(chunk.choices[0].delta.content, end="", flush=True) +print() +# + +# Clean up +model.unload() +manager.stop_web_service() +# diff --git a/samples/rag/README.md b/samples/rag/README.md deleted file mode 100644 index 2225fd01..00000000 --- a/samples/rag/README.md +++ /dev/null @@ -1,206 +0,0 @@ -# Foundry Local RAG Implementation Guide - -## Overview - -This guide demonstrates how to build a complete offline RAG (Retrieval-Augmented Generation) solution using Foundry Local, combining local embedding models with vector search capabilities for enhanced AI inference on edge devices. - -## Prerequisites - -- **Qdrant**: Local vector database installation -- **.NET 8+**: Runtime environment -- **.NET Interactive Notebook**: For development and testing -- **Foundry Local 0.5.100+**: Local AI model execution platform - -### Hardware Considerations - -- **CPU-only environments**: Use Qwen2.5-0.5b model for optimal performance -- **GPU environments**: Can leverage more powerful models through ONNX Runtime providers - -## What is RAG? - -RAG (Retrieval-Augmented Generation) combines information retrieval with text generation to provide contextually relevant responses. In this implementation, we create a fully offline RAG system that: - -1. **Embeds documents** using local embedding models -2. **Stores vectors** in Qdrant for efficient similarity search -3. **Retrieves relevant context** based on user queries -4. **Generates responses** using Foundry Local's language models - -## Local Embedding Model Setup - -For a complete offline RAG solution, we use ONNX-based embedding models that run locally alongside Foundry Local. The recommended model is JinaAI's [jina-embeddings-v2-base-en](https://huggingface.co/jinaai/jina-embeddings-v2-base-en). - -### Required Files - -Download and place these files in a `./jina/` directory: - -1. **ONNX Model**: [model.onnx](https://huggingface.co/jinaai/jina-embeddings-v2-base-en/resolve/main/model.onnx) -2. **Vocabulary**: [vocab.txt](https://huggingface.co/jinaai/jina-embeddings-v2-base-en/resolve/main/vocab.txt) - -## Building RAG with Semantic Kernel - -### 1. Core Dependencies - -```csharp -#r "nuget: Microsoft.SemanticKernel, 1.60.0" -#r "nuget: Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha" -#r "nuget: Microsoft.SemanticKernel.Connectors.Qdrant, 1.60.0-preview" -#r "nuget: Qdrant.Client, 1.14.1" -``` - -### 2. Kernel Configuration - -```csharp -var builder = Kernel.CreateBuilder(); - -// Local embedding model -builder.AddBertOnnxEmbeddingGenerator("./jina/model.onnx", "./jina/vocab.txt"); - -// Foundry Local chat completion -builder.AddOpenAIChatCompletion( - "qwen2.5-0.5b-instruct-generic-gpu", - new Uri("http://localhost:5273/v1"), - apiKey: "", - serviceId: "qwen2.5-0.5b"); - -var kernel = builder.Build(); -``` - -### 3. Vector Store Service - -The `VectorStoreService` class manages interactions with Qdrant: - -```csharp -public class VectorStoreService -{ - private readonly QdrantClient _client; - private readonly string _collectionName; - - public async Task InitializeAsync(int vectorSize = 768) - { - // Create collection if it doesn't exist - await _client.CreateCollectionAsync(_collectionName, new VectorParams - { - Size = (ulong)vectorSize, - Distance = Distance.Cosine - }); - } - - public async Task UpsertAsync(string id, ReadOnlyMemory embedding, - Dictionary metadata) - { - // Store document chunks with embeddings - } - - public async Task> SearchAsync(ReadOnlyMemory queryEmbedding, - int limit = 3) - { - // Perform similarity search - } -} -``` - -### 4. Document Ingestion - -The `DocumentIngestionService` processes documents into searchable chunks: - -```csharp -public class DocumentIngestionService -{ - public async Task IngestDocumentAsync(string documentPath, string documentId) - { - var content = await File.ReadAllTextAsync(documentPath); - var chunks = ChunkText(content, 300, 60); // 300 words, 60 word overlap - - foreach (var chunk in chunks) - { - var embedding = await _embeddingService.GenerateAsync(chunk); - await _vectorStoreService.UpsertAsync( - id: Guid.NewGuid().ToString(), - embedding: embedding.Vector, - metadata: new Dictionary - { - ["document_id"] = documentId, - ["text"] = chunk, - ["document_path"] = documentPath - }); - } - } -} -``` - -### 5. RAG Query Service - -The `RagQueryService` combines retrieval and generation: - -```csharp -public class RagQueryService -{ - public async Task QueryAsync(string question) - { - // 1. Generate query embedding - var queryEmbedding = await _embeddingService.GenerateAsync(question); - - // 2. Search for relevant chunks - var searchResults = await _vectorStoreService.SearchAsync( - queryEmbedding.Vector, limit: 5); - - // 3. Build context from retrieved chunks - var context = string.Join("", searchResults - .Select(r => r.Payload["text"].ToString())); - - // 4. Generate response using context - var prompt = $"Question: {question}\nContext: {context}"; - var chatHistory = new ChatHistory(); - chatHistory.AddSystemMessage( - "You are a helpful assistant that answers questions based on the provided context."); - chatHistory.AddUserMessage(prompt); - - // 5. Stream response from Foundry Local - var fullMessage = string.Empty; - await foreach (var chatUpdate in _chatService.GetStreamingChatMessageContentsAsync(chatHistory)) - { - if (chatUpdate.Content?.Length > 0) - fullMessage += chatUpdate.Content; - } - - return fullMessage ?? "I couldn't generate a response."; - } -} -``` - -## Usage Example - -```csharp -// Initialize services -var vectorStoreService = new VectorStoreService("http://localhost:6334", "", "demodocs"); -await vectorStoreService.InitializeAsync(); - -var documentIngestionService = new DocumentIngestionService(embeddingService, vectorStoreService); -var ragQueryService = new RagQueryService(embeddingService, chatService, vectorStoreService); - -// Ingest a document -await documentIngestionService.IngestDocumentAsync("./foundry-local-architecture.md", "doc1"); - -// Query the RAG system -var answer = await ragQueryService.QueryAsync("What's Foundry Local?"); -Console.WriteLine(answer); -``` - -## Architecture Benefits - -1. **Complete Offline Operation**: No external API dependencies -2. **Edge-Optimized**: Runs efficiently on local hardware -3. **Scalable Vector Search**: Qdrant provides high-performance similarity search -4. **Flexible Model Support**: ONNX Runtime supports multiple hardware providers -5. **Streaming Responses**: Real-time response generation - -## Performance Considerations - -- **Chunk Size**: 300 words with 60-word overlap balances context and performance -- **Vector Dimensions**: 768-dimensional embeddings from jina-embeddings-v2 -- **Search Limit**: Retrieve top 5 most relevant chunks for context -- **Memory Management**: TTL-based model caching in Foundry Local - -This implementation provides a robust foundation for building production-ready RAG applications that run entirely on local infrastructure while maintaining high performance and accuracy. - -***Note***Go to [demo](./rag_foundrylocal_demo.ipynb) diff --git a/samples/rag/foundry-local-architecture.md b/samples/rag/foundry-local-architecture.md deleted file mode 100644 index 6b04f790..00000000 --- a/samples/rag/foundry-local-architecture.md +++ /dev/null @@ -1,116 +0,0 @@ -# Foundry Local Architecture - -Foundry Local is designed to enable efficient, secure, and scalable AI model inference directly on local devices. This article explains the key components of the Foundry Local architecture and how they interact to deliver AI capabilities. - -The benefits of Foundry Local include: - -- **Low Latency**: By running models locally, Foundry Local minimizes the time it takes to process requests and return results. -- **Data Privacy**: Sensitive data can be processed locally without sending it to the cloud, ensuring compliance with data protection regulations. -- **Flexibility**: Foundry Local supports a wide range of hardware configurations, allowing users to choose the best setup for their needs. -- **Scalability**: Foundry Local can be deployed on various devices, from personal computers to powerful servers, making it suitable for different use cases. -- **Cost-Effectiveness**: Running models locally can reduce costs associated with cloud computing, especially for high-volume applications. -- **Offline Capabilities**: Foundry Local can operate without an internet connection, making it ideal for remote or disconnected environments. -- **Integration with Existing Workflows**: Foundry Local can be easily integrated into existing development and deployment workflows, allowing for a smooth transition to local inference. - -## Key Components - -The key components of the Foundry Local architecture are articulated in the following diagram: - -![Foundry Local Architecture Diagram](../media/architecture/foundry-local-arch.png) - -### Foundry Local Service - -The Foundry Local Service is an OpenAI compatible REST server that provides a standardized interface for interacting with the inference engine and model management. Developers can use this API to send requests, run models, and retrieve results programmatically. - -- **Endpoint**: `http://localhost:PORT/v1` - - Note: The port is dynamically assigned, so check the logs for the correct port. -- **Use Cases**: - - Integrating Foundry Local with custom applications. - - Running models via HTTP requests. - -### ONNX Runtime - -The ONNX runtime is a core component responsible for running AI models. It uses optimized ONNX models to perform inference efficiently on local hardware, such as CPUs, GPUs, or NPUs. - -**Features**: - -- Supports multiple hardware providers (for example: NVIDIA, AMD, Intel) and devices (for example: NPUs, CPUs, GPUs). -- Provides a unified interface for running models on different hardware platforms. -- Best-in-class performance. -- Supports quantized models for faster inference. - -### Model Management - -Foundry Local provides robust tools for managing AI models, ensuring that they're readily available for inference and easy to maintain. Model management is handled through the **Model Cache** and the **Command-Line Interface (CLI)**. - -#### Model Cache - -The model cache is a local storage system where AI models are downloaded and stored. It ensures that models are available for inference without requiring repeated downloads. The cache can be managed using the Foundry CLI or REST API. - -- **Purpose**: Reduces latency by storing models locally. -- **Management Commands**: - - `foundry cache list`: Lists all models stored in the local cache. - - `foundry cache remove `: Deletes a specific model from the cache. - - `foundry cache cd `: Changes the directory where models are stored. - -#### Model Lifecycle - -1. **Download**: Models are downloaded from the Azure AI Foundry model catalog to local disk. -2. **Load**: Models are loaded into the Foundry Local service (and therefore memory) for inference. You can set a TTL (time-to-live) for how long the model should remain in memory (the default is 10 minutes). -3. **Run**: Models are inferenced. -4. **Unload**: Models can be unloaded from the inference engine to free up resources. -5. **Delete**: Models can be deleted from the local cache to free up disk space. - -#### Model Compilation using Olive - -Before models can be used with Foundry Local, they must be compiled and optimized in the [ONNX](https://onnx.ai) format. Microsoft provides a selection of published models in the Azure AI Foundry Model Catalog that are already optimized for Foundry Local. However, you aren't limited to those models - by using [Olive](https://microsoft.github.io/Olive/). Olive is a powerful framework for preparing AI models for efficient inference. It converts models into the ONNX format, optimizes their graph structure, and applies techniques like quantization to improve performance on local hardware. - -**💡 TIP**: To learn more about compiling models for Foundry Local, read [Compile Hugging Face models for Foundry Local](../how-to/compile-models-for-foundry-local.md). - -### Hardware Abstraction Layer - -The hardware abstraction layer ensures that Foundry Local can run on various devices by abstracting the underlying hardware. To optimize performance based on the available hardware, Foundry Local supports: - -- **multiple _execution providers_**, such as NVIDIA CUDA, AMD, Qualcomm, Intel. -- **multiple _device types_**, such as CPU, GPU, NPU. - -### Developer Experiences - -The Foundry Local architecture is designed to provide a seamless developer experience, enabling easy integration and interaction with AI models. - -Developers can choose from various interfaces to interact with the system, including: - -#### Command-Line Interface (CLI) - -The Foundry CLI is a powerful tool for managing models, the inference engine, and the local cache. - -**Examples**: - -- `foundry model list`: Lists all available models in the local cache. -- `foundry model run `: Runs a model. -- `foundry service status`: Checks the status of the service. - -**💡 TIP**: To learn more about the CLI commands, read [Foundry Local CLI Reference](../reference/reference-cli.md). - -#### Inferencing SDK Integration - -Foundry Local supports integration with various SDKs, such as the OpenAI SDK, enabling developers to use familiar programming interfaces to interact with the local inference engine. - -- **Supported SDKs**: Python, JavaScript, C#, and more. - -**💡 TIP**: To learn more about integrating with inferencing SDKs, read [Integrate Foundry Local with Inferencing SDKs](../how-to/integrate-with-inference-sdks.md). - -#### AI Toolkit for Visual Studio Code - -The AI Toolkit for Visual Studio Code provides a user-friendly interface for developers to interact with Foundry Local. It allows users to run models, manage the local cache, and visualize results directly within the IDE. - -- **Features**: - - Model management: Download, load, and run models from within the IDE. - - Interactive console: Send requests and view responses in real-time. - - Visualization tools: Graphical representation of model performance and results. - -## Next Steps - -- [Get started with Foundry Local](../get-started.md) -- [Integrate with Inference SDKs](../how-to/integrate-with-inference-sdks.md) -- [Foundry Local CLI Reference](../reference/reference-cli.md) diff --git a/samples/rag/rag_foundrylocal_demo.ipynb b/samples/rag/rag_foundrylocal_demo.ipynb deleted file mode 100644 index d12cd5d1..00000000 --- a/samples/rag/rag_foundrylocal_demo.ipynb +++ /dev/null @@ -1,1042 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "6729525b", - "metadata": {}, - "source": [ - "# Foundry Local RAG Implementation Guide\n", - "\n", - "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using Foundry Local with Semantic Kernel, ONNX embeddings, and Qdrant vector database.\n", - "\n", - "## Package Installation\n", - "\n", - "First, we install the required NuGet packages for Semantic Kernel and related components." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "22f573fa", - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Installed Packages
  • Microsoft.SemanticKernel, 1.60.0
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "#r \"nuget: Microsoft.SemanticKernel, 1.60.0\"" - ] - }, - { - "cell_type": "markdown", - "id": "87f0b48a", - "metadata": {}, - "source": [ - "### Install Microsoft Semantic Kernel Core Package\n", - "\n", - "Installing the main Semantic Kernel package which provides the core functionality for building AI applications." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "2beb6393", - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Installed Packages
  • Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "#r \"nuget: Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha\"" - ] - }, - { - "cell_type": "markdown", - "id": "41c548be", - "metadata": {}, - "source": [ - "### Install Semantic Kernel ONNX Connector\n", - "\n", - "Installing the ONNX connector package which enables using ONNX models for embeddings generation in Semantic Kernel." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "bc62e7be", - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Installed Packages
  • Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "#r \"nuget: Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha\"" - ] - }, - { - "cell_type": "markdown", - "id": "70bff756", - "metadata": {}, - "source": [ - "### Duplicate ONNX Connector Installation\n", - "\n", - "Note: This is a duplicate installation of the ONNX connector package (same as the previous cell)." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Installed Packages
  • Microsoft.SemanticKernel.Connectors.Qdrant, 1.60.0-preview
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "#r \"nuget: Microsoft.SemanticKernel.Connectors.Qdrant, 1.60.0-preview\"" - ] - }, - { - "cell_type": "markdown", - "id": "d21d8590", - "metadata": {}, - "source": [ - "### Install Semantic Kernel Qdrant Connector\n", - "\n", - "Installing the Qdrant connector package to enable vector database operations with Semantic Kernel." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Installed Packages
  • qdrant.client, 1.14.1
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "#r \"nuget: Qdrant.Client, 1.14.1\"" - ] - }, - { - "cell_type": "markdown", - "id": "a887bc53", - "metadata": {}, - "source": [ - "### Install Qdrant Client\n", - "\n", - "Installing the official Qdrant client library for direct communication with the Qdrant vector database." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "6ab040e4", - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "using Microsoft.SemanticKernel;" - ] - }, - { - "cell_type": "markdown", - "id": "d7ab7920", - "metadata": {}, - "source": [ - "## Setup and Configuration\n", - "\n", - "### Import Semantic Kernel\n", - "\n", - "Importing the core Semantic Kernel namespace to access the main functionality." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "e4c08e21", - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "var builder = Kernel.CreateBuilder();" - ] - }, - { - "cell_type": "markdown", - "id": "c93e70fc", - "metadata": {}, - "source": [ - "### Create Kernel Builder\n", - "\n", - "Creating a kernel builder instance which will be used to configure and build the Semantic Kernel with various services." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0a0eb9fc", - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "var embeddModelPath = \"Your Jinaai jina-embeddings-v2-base-en onnx model path\";\n", - "var embedVocab = \"Your Jinaai ina-embeddings-v2-base-en vocab file path\";" - ] - }, - { - "cell_type": "markdown", - "id": "9cf4ae93", - "metadata": {}, - "source": [ - "### Define Embedding Model Paths\n", - "\n", - "Setting up file paths for the JINA embedding model files - the ONNX model file and vocabulary file needed for text embeddings." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "f48625de", - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "builder.AddBertOnnxEmbeddingGenerator(embeddModelPath, embedVocab);\n", - "builder.AddOpenAIChatCompletion(\"qwen2.5-0.5b-instruct-generic-gpu\", new Uri(\"http://localhost:5273/v1\"), apiKey: \"\", serviceId: \"qwen2.5-0.5b\");" - ] - }, - { - "cell_type": "markdown", - "id": "d6cf5a34", - "metadata": {}, - "source": [ - "### Configure AI Services\n", - "\n", - "Adding the BERT ONNX embedding generator and OpenAI-compatible chat completion service to the kernel builder. The chat service connects to a local Foundry Local instance running the Qwen2.5 model." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "e5efe8c9", - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "var kernel = builder.Build();" - ] - }, - { - "cell_type": "markdown", - "id": "58c210d5", - "metadata": {}, - "source": [ - "### Build the Kernel\n", - "\n", - "Building the final kernel instance with all configured services (embedding generator and chat completion service)." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "using Microsoft.SemanticKernel.Embeddings;\n", - "using Microsoft.SemanticKernel.ChatCompletion;\n", - "using Microsoft.Extensions.AI;\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "fb43e167", - "metadata": {}, - "source": [ - "### Import Additional Required Namespaces\n", - "\n", - "Importing namespaces for embeddings, chat completion, and Microsoft Extensions AI functionality." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "using System.Net.Http;" - ] - }, - { - "cell_type": "markdown", - "id": "5f690259", - "metadata": {}, - "source": [ - "### Import HTTP Client\n", - "\n", - "Importing System.Net.Http for HTTP communication capabilities." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "\n", - "using Microsoft.SemanticKernel.Memory;\n", - "using Microsoft.SemanticKernel.Connectors.Qdrant;" - ] - }, - { - "cell_type": "markdown", - "id": "376b8ade", - "metadata": {}, - "source": [ - "### Import Memory and Vector Database Connectors\n", - "\n", - "Importing Semantic Kernel memory functionality and Qdrant connector for vector database operations." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "using Qdrant.Client;\n", - "using Qdrant.Client.Grpc;" - ] - }, - { - "cell_type": "markdown", - "id": "326b2cda", - "metadata": {}, - "source": [ - "### Import Qdrant Client Libraries\n", - "\n", - "Importing the Qdrant client and gRPC libraries for direct communication with the Qdrant vector database." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "\n", - "public class VectorStoreService\n", - "{\n", - " private readonly QdrantClient _client;\n", - " private readonly string _collectionName;\n", - "\n", - " public VectorStoreService(string endpoint, string apiKey, string collectionName)\n", - " {\n", - " _client = new QdrantClient(new Uri(endpoint));\n", - " _collectionName = collectionName;\n", - " }\n", - "\n", - " public async Task InitializeAsync(int vectorSize = 768)\n", - " {\n", - " try\n", - " {\n", - " await _client.GetCollectionInfoAsync(_collectionName);\n", - " }\n", - " catch\n", - " {\n", - " await _client.CreateCollectionAsync(_collectionName, new VectorParams\n", - " {\n", - " Size = (ulong)vectorSize,\n", - " Distance = Distance.Cosine\n", - " });\n", - " }\n", - " }\n", - "\n", - " public async Task UpsertAsync(string id, ReadOnlyMemory embedding, Dictionary metadata)\n", - " {\n", - " var point = new PointStruct\n", - " {\n", - " Id = new PointId { Uuid = id },\n", - " Vectors = embedding.ToArray(),\n", - " Payload = { }\n", - " };\n", - "\n", - " foreach (var kvp in metadata)\n", - " {\n", - " point.Payload[kvp.Key] = kvp.Value switch\n", - " {\n", - " string s => s,\n", - " int i => i,\n", - " bool b => b,\n", - " _ => kvp.Value.ToString() ?? string.Empty\n", - " };\n", - " }\n", - "\n", - " await _client.UpsertAsync(_collectionName, new[] { point });\n", - " }\n", - "\n", - " public async Task> SearchAsync(ReadOnlyMemory queryEmbedding, int limit = 3)\n", - " {\n", - " var searchResult = await _client.SearchAsync(_collectionName, queryEmbedding.ToArray(), limit: (ulong)limit);\n", - " return searchResult.ToList();\n", - " }\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "499f7d8f", - "metadata": {}, - "source": [ - "## Service Classes\n", - "\n", - "### Vector Store Service Class\n", - "\n", - "This class provides a wrapper around the Qdrant client to handle vector database operations including:\n", - "- Collection initialization with proper vector configuration\n", - "- Upserting vectors with metadata\n", - "- Searching for similar vectors using cosine similarity" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "\n", - "public class RagQueryService\n", - "{\n", - " private readonly IEmbeddingGenerator> _embeddingService;\n", - " private readonly IChatCompletionService _chatService;\n", - " private readonly VectorStoreService _vectorStoreService;\n", - "\n", - " public RagQueryService(\n", - " IEmbeddingGenerator> embeddingService,\n", - " IChatCompletionService chatService,\n", - " VectorStoreService vectorStoreService)\n", - " {\n", - " _embeddingService = embeddingService;\n", - " _chatService = chatService;\n", - " _vectorStoreService = vectorStoreService;\n", - " }\n", - "\n", - " public async Task QueryAsync(string question)\n", - " {\n", - " // return question; // For now, just return the question as a placeholder\n", - " var queryEmbeddingResult = await _embeddingService.GenerateAsync(question);\n", - "// Console.WriteLine(question);\n", - " var queryEmbedding = queryEmbeddingResult.Vector;\n", - " var searchResults = await _vectorStoreService.SearchAsync(queryEmbedding, limit: 5);\n", - "\n", - " string str_context = \"\";\n", - " foreach (var result in searchResults)\n", - " {\n", - " if (result.Payload.TryGetValue(\"text\", out var text))\n", - " {\n", - " str_context += text.ToString();\n", - " }\n", - " }\n", - " var prompt = $@\"According to the question {question},, optimize and simplify the content. {str_context}\";\n", - "\n", - "\n", - " var chatHistory = new ChatHistory();\n", - " chatHistory.AddSystemMessage(\"You are a helpful assistant that answers questions based on the provided context.\");\n", - " chatHistory.AddUserMessage(prompt);\n", - "\n", - " var fullMessage = string.Empty;\n", - "\n", - " await foreach (var chatUpdate in _chatService.GetStreamingChatMessageContentsAsync(chatHistory, cancellationToken: default))\n", - " { \n", - " if (chatUpdate.Content is { Length: > 0 })\n", - " {\n", - " fullMessage += chatUpdate.Content;\n", - " }\n", - " }\n", - " return fullMessage ?? \"I couldn't generate a response.\";\n", - " }\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "4fc8eee3", - "metadata": {}, - "source": [ - "### RAG Query Service Class\n", - "\n", - "This service implements the core RAG (Retrieval-Augmented Generation) functionality:\n", - "1. Converts user questions into embeddings\n", - "2. Searches for relevant context from the vector database\n", - "3. Combines the retrieved context with the user question\n", - "4. Generates responses using the chat completion service" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "using System.IO;" - ] - }, - { - "cell_type": "markdown", - "id": "04b2e2e9", - "metadata": {}, - "source": [ - "### Import File I/O\n", - "\n", - "Importing System.IO for file reading operations needed for document ingestion." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "\n", - "public class DocumentIngestionService\n", - "{\n", - " private readonly IEmbeddingGenerator> _embeddingService;\n", - " private readonly VectorStoreService _vectorStoreService;\n", - "\n", - " public DocumentIngestionService(IEmbeddingGenerator> embeddingService, VectorStoreService vectorStoreService)\n", - " {\n", - " _embeddingService = embeddingService;\n", - " _vectorStoreService = vectorStoreService;\n", - " }\n", - "\n", - " public async Task IngestDocumentAsync(string documentPath, string documentId)\n", - " {\n", - " var content = await File.ReadAllTextAsync(documentPath);\n", - " var chunks = ChunkText(content, 300, 60);\n", - "\n", - " for (int i = 0; i < chunks.Count; i++)\n", - " {\n", - " var chunk = chunks[i];\n", - " var embeddingResult = await _embeddingService.GenerateAsync(chunk);\n", - " var embedding = embeddingResult.Vector;\n", - " \n", - " await _vectorStoreService.UpsertAsync(\n", - " id: Guid.NewGuid().ToString(),\n", - " embedding: embedding,\n", - " metadata: new Dictionary\n", - " {\n", - " [\"document_id\"] = documentId,\n", - " [\"chunk_index\"] = i,\n", - " [\"text\"] = chunk,\n", - " [\"document_path\"] = documentPath\n", - " }\n", - " );\n", - " }\n", - " }\n", - "\n", - " private List ChunkText(string text, int chunkSize, int overlap)\n", - " {\n", - " var chunks = new List();\n", - " var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);\n", - " \n", - " for (int i = 0; i < words.Length; i += chunkSize - overlap)\n", - " {\n", - " var chunkWords = words.Skip(i).Take(chunkSize).ToArray();\n", - " var chunk = string.Join(\" \", chunkWords);\n", - " chunks.Add(chunk);\n", - " \n", - " if (i + chunkSize >= words.Length)\n", - " break;\n", - " }\n", - " \n", - " return chunks;\n", - " }\n", - "}" - ] - }, - { - "cell_type": "markdown", - "id": "8a5845c7", - "metadata": {}, - "source": [ - "### Document Ingestion Service Class\n", - "\n", - "This service handles the process of ingesting documents into the vector database:\n", - "1. Reads document content from files\n", - "2. Splits text into chunks with configurable size and overlap\n", - "3. Generates embeddings for each chunk\n", - "4. Stores chunks with embeddings and metadata in the vector database" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "\n", - "using Microsoft.SemanticKernel.ChatCompletion;" - ] - }, - { - "cell_type": "markdown", - "id": "1828967e", - "metadata": {}, - "source": [ - "### Additional Chat Completion Import\n", - "\n", - "Additional import for chat completion functionality (note: this might be a duplicate import)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "var chatService = kernel.GetRequiredService(serviceKey: \"qwen2.5-0.5b\");\n", - "var embeddingService = kernel.GetRequiredService>>();" - ] - }, - { - "cell_type": "markdown", - "id": "8dfafaac", - "metadata": {}, - "source": [ - "## Initialize Services\n", - "\n", - "### Get Services from Kernel\n", - "\n", - "Retrieving the chat completion service and embedding generator from the configured kernel using their service keys." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "var vectorStoreService = new VectorStoreService(\n", - " \"http://localhost:6334\",\n", - " \"\",\n", - " \"demodocs\");\n", - "\n", - "await vectorStoreService.InitializeAsync();" - ] - }, - { - "cell_type": "markdown", - "id": "9b29fd60", - "metadata": {}, - "source": [ - "### Create and Initialize Vector Store Service\n", - "\n", - "Creating a VectorStoreService instance pointing to a local Qdrant instance and initializing the collection for storing document embeddings." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "var documentIngestionService = new DocumentIngestionService(embeddingService, vectorStoreService);\n", - "var ragQueryService = new RagQueryService(embeddingService, chatService, vectorStoreService);" - ] - }, - { - "cell_type": "markdown", - "id": "fb5a4751", - "metadata": {}, - "source": [ - "### Create Service Instances\n", - "\n", - "Creating instances of the DocumentIngestionService and RagQueryService with the necessary dependencies (embedding service, chat service, and vector store service)." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "var filePath = \"./foundry-local-architecture.md\";\n", - "var fileID = \"3\";" - ] - }, - { - "cell_type": "markdown", - "id": "07b13842", - "metadata": {}, - "source": [ - "## Document Ingestion Demo\n", - "\n", - "### Define Document Information\n", - "\n", - "Setting up the file path and document ID for the Foundry Local architecture document that will be ingested into the vector database." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "await documentIngestionService.IngestDocumentAsync(filePath, fileID);" - ] - }, - { - "cell_type": "markdown", - "id": "d2c08b5e", - "metadata": {}, - "source": [ - "### Ingest Document into Vector Database\n", - "\n", - "Processing the Foundry Local architecture document by reading its content, chunking it, generating embeddings for each chunk, and storing them in the vector database with metadata." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "var question = \"What's Foundry Local?\";" - ] - }, - { - "cell_type": "markdown", - "id": "e26a25d4", - "metadata": {}, - "source": [ - "## RAG Query Demo\n", - "\n", - "### Define Query Question\n", - "\n", - "Setting up a test question to demonstrate the RAG functionality - asking about what Foundry Local is." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "var answer = await ragQueryService.QueryAsync(question);" - ] - }, - { - "cell_type": "markdown", - "id": "59a1803c", - "metadata": {}, - "source": [ - "### Execute RAG Query\n", - "\n", - "Running the RAG query which will:\n", - "1. Convert the question to embeddings\n", - "2. Search for relevant context in the vector database\n", - "3. Combine retrieved context with the question\n", - "4. Generate a response using the chat completion service" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - " Here's a simplified version of the text:\n", - "\n", - "---\n", - "\n", - "**Title:** Introduction to Foundry Local\n", - "\n", - "**Overview:** Foundry Local is a design focused on optimizing AI model inference on local devices. This guide explores the core components of Foundry Local and their interactions.\n", - "\n", - "**Key Components**:\n", - "- Built-in System Platform (OSX)\n", - "- REST Server Framework (API)\n", - "- Local Execution Provider\n", - "- Model Manager\n", - "- Cloud Connectivity Framework\n", - "\n", - "### Foundry Local Services Overview\n", - "\n", - "- Endpoint: http://localhost:PORT/v1 \n", - "- Use Case: Run Models Locally, Access the Local Executor.\n", - "- ONNX Runtime: Utilizes optimized ONNX models to support local inference.\n", - "\n", - "### ONNX Runtime\n", - "\n", - "- Supported by Multiple Providers: NVIDIA, AMD, Intel (supported by OSLC).\n", - "- Provides Unified Interface for All Providers.\n", - "\n", - "### Model Management\n", - "- Model Cache (local storage): Automatically generated when models are downloaded from the OSX platform.\n", - "- TTL for Memory Storage: Determines how long models" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "answer" - ] - }, - { - "cell_type": "markdown", - "id": "be4a335e", - "metadata": {}, - "source": [ - "### Display RAG Response\n", - "\n", - "Displaying the final answer generated by the RAG system, which should contain information about Foundry Local based on the ingested document." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".NET (C#)", - "language": "C#", - "name": ".net-csharp" - }, - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelInfo": { - "defaultKernelName": "csharp", - "items": [ - { - "aliases": [], - "name": "csharp" - } - ] - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/samples/rust/.cargo/config.toml b/samples/rust/.cargo/config.toml new file mode 100644 index 00000000..84c57445 --- /dev/null +++ b/samples/rust/.cargo/config.toml @@ -0,0 +1,7 @@ +[registries] + +[source.crates-io] +replace-with = "ORT-Nightly" + +[source.ORT-Nightly] +registry = "sparse+https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/Cargo/index/" diff --git a/samples/rust/Cargo.toml b/samples/rust/Cargo.toml index bdc9ee44..42d1293f 100644 --- a/samples/rust/Cargo.toml +++ b/samples/rust/Cargo.toml @@ -4,5 +4,9 @@ members = [ "tool-calling-foundry-local", "native-chat-completions", "audio-transcription-example", + "tutorial-chat-assistant", + "tutorial-document-summarizer", + "tutorial-tool-calling", + "tutorial-voice-to-text", ] resolver = "2" diff --git a/samples/rust/audio-transcription-example/Recording.mp3 b/samples/rust/audio-transcription-example/Recording.mp3 new file mode 100644 index 00000000..deb38418 Binary files /dev/null and b/samples/rust/audio-transcription-example/Recording.mp3 differ diff --git a/samples/rust/audio-transcription-example/src/main.rs b/samples/rust/audio-transcription-example/src/main.rs index 6f9b3e9e..c326006f 100644 --- a/samples/rust/audio-transcription-example/src/main.rs +++ b/samples/rust/audio-transcription-example/src/main.rs @@ -1,11 +1,14 @@ +// // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// use std::env; use std::io::{self, Write}; use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; use tokio_stream::StreamExt; +// const ALIAS: &str = "whisper-tiny"; @@ -14,16 +17,18 @@ async fn main() -> Result<(), Box> { println!("Audio Transcription Example"); println!("===========================\n"); - // Accept an audio file path as a CLI argument. - let audio_path = env::args().nth(1).unwrap_or_else(|| { - eprintln!("Usage: cargo run -- "); - std::process::exit(1); - }); + // Accept an optional audio file path as a CLI argument, defaulting to Recording.mp3. + let audio_path = env::args() + .nth(1) + .unwrap_or_else(|| "Recording.mp3".to_string()); // ── 1. Initialise the manager ──────────────────────────────────────── + // let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?; + // - // ── 2. Pick the whisper model and ensure it is downloaded ──────────── + // ── 2. Pick the whispermodel and ensure it is downloaded ──────────── + // let model = manager.catalog().get_model(ALIAS).await?; println!("Model: {} (id: {})", model.alias(), model.id()); @@ -41,8 +46,10 @@ async fn main() -> Result<(), Box> { println!("Loading model..."); model.load().await?; println!("✓ Model loaded\n"); + // - // ── 3. Create an audio client ──────────────────────────────────────── + // + // ── 3. Create an audio client──────────────────────────────────────── let audio_client = model.create_audio_client(); // ── 4. Non-streaming transcription ─────────────────────────────────── @@ -60,11 +67,15 @@ async fn main() -> Result<(), Box> { io::stdout().flush().ok(); } println!("\n"); + // // ── 6. Unload the model────────────────────────────────────────────── + // println!("Unloading model..."); model.unload().await?; println!("Done."); + // Ok(()) } +// diff --git a/samples/rust/foundry-local-webserver/src/main.rs b/samples/rust/foundry-local-webserver/src/main.rs index d8cf0b44..492cbbc1 100644 --- a/samples/rust/foundry-local-webserver/src/main.rs +++ b/samples/rust/foundry-local-webserver/src/main.rs @@ -1,3 +1,4 @@ +// // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. @@ -8,20 +9,25 @@ //! when you want to use the OpenAI REST API directly or integrate with tools //! that expect an OpenAI-compatible endpoint. +// use std::io::{self, Write}; use serde_json::json; use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; +// #[tokio::main] async fn main() -> Result<(), Box> { // ── 1. Initialise the SDK ──────────────────────────────────────────── + // println!("Initializing Foundry Local SDK..."); let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?; println!("✓ SDK initialized"); + // // ── 2. Download and load a model ───────────────────────────────────── + // let model_alias = "qwen2.5-0.5b"; let model = manager.catalog().get_model(model_alias).await?; @@ -39,8 +45,10 @@ async fn main() -> Result<(), Box> { print!("Loading model {model_alias}..."); model.load().await?; println!("done."); + // - // ── 3. Start the web service ───────────────────────────────────────── + // + // ── 3. Start the web service───────────────────────────────────────── print!("Starting web service..."); manager.start_web_service().await?; println!("done."); @@ -90,6 +98,7 @@ async fn main() -> Result<(), Box> { } } println!(); + // // ── 5. Clean up ────────────────────────────────────────────────────── println!("\nStopping web service..."); @@ -101,3 +110,4 @@ async fn main() -> Result<(), Box> { println!("✓ Done."); Ok(()) } +// diff --git a/samples/rust/native-chat-completions/src/main.rs b/samples/rust/native-chat-completions/src/main.rs index 2e2d2d23..04d09372 100644 --- a/samples/rust/native-chat-completions/src/main.rs +++ b/samples/rust/native-chat-completions/src/main.rs @@ -1,6 +1,8 @@ +// // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// use std::io::{self, Write}; use foundry_local_sdk::{ @@ -8,6 +10,7 @@ use foundry_local_sdk::{ ChatCompletionRequestUserMessage, FoundryLocalConfig, FoundryLocalManager, }; use tokio_stream::StreamExt; +// const ALIAS: &str = "qwen2.5-0.5b"; @@ -17,9 +20,12 @@ async fn main() -> Result<(), Box> { println!("=======================\n"); // ── 1. Initialise the manager ──────────────────────────────────────── + // let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?; + // - // ── 2. Pick a model and ensure it is downloaded ────────────────────── + // ── 2. Pick a modeland ensure it is downloaded ────────────────────── + // let model = manager.catalog().get_model(ALIAS).await?; println!("Model: {} (id: {})", model.alias(), model.id()); @@ -37,13 +43,17 @@ async fn main() -> Result<(), Box> { println!("Loading model..."); model.load().await?; println!("✓ Model loaded\n"); + // - // ── 3. Create a chat client ────────────────────────────────────────── + // ── 3. Create a chat client────────────────────────────────────────── + // let client = model.create_chat_client() .temperature(0.7) .max_tokens(256); + // - // ── 4. Non-streaming chat completion ───────────────────────────────── + // ── 4. Non-streamingchat completion ───────────────────────────────── + // let messages: Vec = vec![ ChatCompletionRequestSystemMessage::from("You are a helpful assistant.").into(), ChatCompletionRequestUserMessage::from("What is Rust's ownership model?").into(), @@ -56,8 +66,10 @@ async fn main() -> Result<(), Box> { println!("Assistant: {content}"); } } + // - // ── 5. Streaming chat completion ───────────────────────────────────── + // ── 5. Streamingchat completion ───────────────────────────────────── + // let stream_messages: Vec = vec![ ChatCompletionRequestSystemMessage::from("You are a helpful assistant.").into(), ChatCompletionRequestUserMessage::from("Explain the borrow checker in two sentences.") @@ -79,11 +91,15 @@ async fn main() -> Result<(), Box> { } } println!("\n"); + // - // ── 6. Unload the model────────────────────────────────────────────── + // ── 6. Unloadthe model────────────────────────────────────────────── + // println!("Unloading model..."); model.unload().await?; println!("Done."); + // Ok(()) } +// diff --git a/samples/rust/tool-calling-foundry-local/src/main.rs b/samples/rust/tool-calling-foundry-local/src/main.rs index 9a144500..1ccda1e8 100644 --- a/samples/rust/tool-calling-foundry-local/src/main.rs +++ b/samples/rust/tool-calling-foundry-local/src/main.rs @@ -1,6 +1,8 @@ +// // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +// use std::io::{self, Write}; use serde_json::{json, Value}; @@ -11,11 +13,13 @@ use foundry_local_sdk::{ ChatCompletionRequestToolMessage, ChatCompletionRequestUserMessage, ChatCompletionTools, ChatToolChoice, FinishReason, FoundryLocalConfig, FoundryLocalManager, }; +// // By using an alias, the most suitable model variant will be downloaded // to your end-user's device. const ALIAS: &str = "qwen2.5-0.5b"; +// /// A simple tool that multiplies two numbers. fn multiply_numbers(first: f64, second: f64) -> f64 { first * second @@ -33,6 +37,7 @@ fn invoke_tool(name: &str, args: &Value) -> String { _ => format!("Unknown tool: {name}"), } } +// /// Accumulated state from a streaming response that contains tool calls. #[derive(Default)] @@ -49,9 +54,12 @@ async fn main() -> Result<(), Box> { println!("===============================\n"); // ── 1. Initialise the manager ──────────────────────────────────────── + // let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?; + // - // ── 2. Load a model ────────────────────────────────────────────────── + // ── 2. Load a model────────────────────────────────────────────────── + // let model = manager.catalog().get_model(ALIAS).await?; println!("Model: {} (id: {})", model.alias(), model.id()); @@ -69,12 +77,14 @@ async fn main() -> Result<(), Box> { println!("Loading model..."); model.load().await?; println!("✓ Model loaded\n"); + // - // ── 3. Create a chat client with tool_choice = required ────────────── + // ── 3. Create a chat clientwith tool_choice = required ────────────── let client = model.create_chat_client() .max_tokens(512) .tool_choice(ChatToolChoice::Required); + // // Define the multiply_numbers tool. let tools: Vec = serde_json::from_value(json!([{ "type": "function", @@ -97,7 +107,9 @@ async fn main() -> Result<(), Box> { } } }]))?; + // + // // Prepare the initial conversation. let mut messages: Vec = vec![ ChatCompletionRequestSystemMessage::from( @@ -210,11 +222,15 @@ async fn main() -> Result<(), Box> { } } println!("\n"); + // // ── 7. Clean up────────────────────────────────────────────────────── + // println!("Unloading model..."); model.unload().await?; println!("Done."); + // Ok(()) } +// diff --git a/samples/rust/tutorial-chat-assistant/Cargo.toml b/samples/rust/tutorial-chat-assistant/Cargo.toml new file mode 100644 index 00000000..83c7d237 --- /dev/null +++ b/samples/rust/tutorial-chat-assistant/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "tutorial-chat-assistant" +version = "0.1.0" +edition = "2021" + +[dependencies] +foundry-local-sdk = { path = "../../../sdk/rust" } +tokio = { version = "1", features = ["full"] } +tokio-stream = "0.1" +anyhow = "1" +serde_json = "1" diff --git a/samples/rust/tutorial-chat-assistant/src/main.rs b/samples/rust/tutorial-chat-assistant/src/main.rs new file mode 100644 index 00000000..6b0b587b --- /dev/null +++ b/samples/rust/tutorial-chat-assistant/src/main.rs @@ -0,0 +1,102 @@ +// +// +use foundry_local_sdk::{ + ChatCompletionRequestMessage, + ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage, + FoundryLocalConfig, FoundryLocalManager, +}; +use std::io::{self, BufRead, Write}; +use tokio_stream::StreamExt; +// + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // + // Initialize the Foundry Local SDK + let manager = FoundryLocalManager::create(FoundryLocalConfig::new("chat-assistant"))?; + + // Select and load a model from the catalog + let model = manager.catalog().get_model("qwen2.5-0.5b").await?; + + if !model.is_cached().await? { + println!("Downloading model..."); + model + .download(Some(|progress: &str| { + print!("\r {progress}"); + io::stdout().flush().ok(); + })) + .await?; + println!(); + } + + model.load().await?; + println!("Model loaded and ready."); + + // Create a chat client + let client = model.create_chat_client().temperature(0.7).max_tokens(512); + // + + // + // Start the conversation with a system prompt + let mut messages: Vec = vec![ + ChatCompletionRequestSystemMessage::from( + "You are a helpful, friendly assistant. Keep your responses \ + concise and conversational. If you don't know something, say so.", + ) + .into(), + ]; + // + + println!("\nChat assistant ready! Type 'quit' to exit.\n"); + + let stdin = io::stdin(); + // + loop { + print!("You: "); + io::stdout().flush()?; + + let mut input = String::new(); + stdin.lock().read_line(&mut input)?; + let input = input.trim(); + + if input.eq_ignore_ascii_case("quit") || input.eq_ignore_ascii_case("exit") { + break; + } + + // Add the user's message to conversation history + messages.push(ChatCompletionRequestUserMessage::from(input).into()); + + // + // Stream the response token by token + print!("Assistant: "); + io::stdout().flush()?; + let mut full_response = String::new(); + let mut stream = client.complete_streaming_chat(&messages, None).await?; + while let Some(chunk) = stream.next().await { + let chunk = chunk?; + if let Some(choice) = chunk.choices.first() { + if let Some(ref content) = choice.delta.content { + print!("{content}"); + io::stdout().flush()?; + full_response.push_str(content); + } + } + } + println!("\n"); + // + + // Add the complete response to conversation history + let assistant_msg: ChatCompletionRequestMessage = serde_json::from_value( + serde_json::json!({"role": "assistant", "content": full_response}), + )?; + messages.push(assistant_msg); + } + // + + // Clean up - unload the model + model.unload().await?; + println!("Model unloaded. Goodbye!"); + + Ok(()) +} +// diff --git a/samples/rust/tutorial-document-summarizer/Cargo.toml b/samples/rust/tutorial-document-summarizer/Cargo.toml new file mode 100644 index 00000000..cdf77fb7 --- /dev/null +++ b/samples/rust/tutorial-document-summarizer/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "tutorial-document-summarizer" +version = "0.1.0" +edition = "2021" + +[dependencies] +foundry-local-sdk = { path = "../../../sdk/rust" } +tokio = { version = "1", features = ["full"] } +tokio-stream = "0.1" +anyhow = "1" diff --git a/samples/rust/tutorial-document-summarizer/src/main.rs b/samples/rust/tutorial-document-summarizer/src/main.rs new file mode 100644 index 00000000..9ade2e77 --- /dev/null +++ b/samples/rust/tutorial-document-summarizer/src/main.rs @@ -0,0 +1,157 @@ +// +// +use foundry_local_sdk::{ + ChatCompletionRequestMessage, + ChatCompletionRequestSystemMessage, + ChatCompletionRequestUserMessage, FoundryLocalConfig, + FoundryLocalManager, +}; +use std::io::{self, Write}; +use std::path::Path; +use std::{env, fs}; +// + +async fn summarize_file( + client: &foundry_local_sdk::openai::ChatClient, + file_path: &Path, + system_prompt: &str, +) -> anyhow::Result<()> { + let content = fs::read_to_string(file_path)?; + let messages: Vec = vec![ + ChatCompletionRequestSystemMessage::from(system_prompt) + .into(), + ChatCompletionRequestUserMessage::from(content.as_str()) + .into(), + ]; + + let response = + client.complete_chat(&messages, None).await?; + let summary = response.choices[0] + .message + .content + .as_deref() + .unwrap_or(""); + println!("{}", summary); + Ok(()) +} + +async fn summarize_directory( + client: &foundry_local_sdk::openai::ChatClient, + directory: &Path, + system_prompt: &str, +) -> anyhow::Result<()> { + let mut txt_files: Vec<_> = fs::read_dir(directory)? + .filter_map(|entry| entry.ok()) + .filter(|entry| { + entry + .path() + .extension() + .map(|ext| ext == "txt") + .unwrap_or(false) + }) + .collect(); + + txt_files.sort_by_key(|e| e.path()); + + if txt_files.is_empty() { + println!( + "No .txt files found in {}", + directory.display() + ); + return Ok(()); + } + + for entry in &txt_files { + let file_name = entry.file_name(); + println!( + "--- {} ---", + file_name.to_string_lossy() + ); + summarize_file( + client, + &entry.path(), + system_prompt, + ) + .await?; + println!(); + } + + Ok(()) +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // + // Initialize the Foundry Local SDK + let manager = FoundryLocalManager::create( + FoundryLocalConfig::new("doc-summarizer"), + )?; + + // Select and load a model from the catalog + let model = manager + .catalog() + .get_model("qwen2.5-0.5b") + .await?; + + if !model.is_cached().await? { + println!("Downloading model..."); + model + .download(Some(|progress: &str| { + print!("\r {progress}"); + io::stdout().flush().ok(); + })) + .await?; + println!(); + } + + model.load().await?; + println!("Model loaded and ready.\n"); + + // Create a chat client + let client = model + .create_chat_client() + .temperature(0.7) + .max_tokens(512); + // + + // + let system_prompt = "Summarize the following document \ + into concise bullet points. Focus on the key \ + points and main ideas."; + + // + let target = env::args() + .nth(1) + .unwrap_or_else(|| "document.txt".to_string()); + let target_path = Path::new(&target); + // + + if target_path.is_dir() { + summarize_directory( + &client, + target_path, + system_prompt, + ) + .await?; + } else { + let file_name = target_path + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_else(|| target.clone()); + println!("--- {} ---", file_name); + summarize_file( + &client, + target_path, + system_prompt, + ) + .await?; + } + // + + // Clean up + model.unload().await?; + println!("\nModel unloaded. Done!"); + + Ok(()) +} +// diff --git a/samples/rust/tutorial-tool-calling/Cargo.toml b/samples/rust/tutorial-tool-calling/Cargo.toml new file mode 100644 index 00000000..2de3d740 --- /dev/null +++ b/samples/rust/tutorial-tool-calling/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "tutorial-tool-calling" +version = "0.1.0" +edition = "2021" + +[dependencies] +foundry-local-sdk = { path = "../../../sdk/rust" } +tokio = { version = "1", features = ["full"] } +tokio-stream = "0.1" +anyhow = "1" +serde_json = "1" diff --git a/samples/rust/tutorial-tool-calling/src/main.rs b/samples/rust/tutorial-tool-calling/src/main.rs new file mode 100644 index 00000000..f4476643 --- /dev/null +++ b/samples/rust/tutorial-tool-calling/src/main.rs @@ -0,0 +1,330 @@ +// +// +use foundry_local_sdk::{ + ChatCompletionRequestMessage, + ChatCompletionRequestSystemMessage, + ChatCompletionRequestToolMessage, + ChatCompletionRequestUserMessage, + ChatCompletionMessageToolCalls, + ChatCompletionTools, ChatToolChoice, + FoundryLocalConfig, FoundryLocalManager, +}; +use serde_json::{json, Value}; +use std::io::{self, BufRead, Write}; +// + +// +// --- Tool implementations --- +fn execute_tool( + name: &str, + arguments: &Value, +) -> Value { + match name { + "get_weather" => { + let location = arguments["location"] + .as_str() + .unwrap_or("unknown"); + let unit = arguments["unit"] + .as_str() + .unwrap_or("celsius"); + let temp = if unit == "celsius" { 22 } else { 72 }; + json!({ + "location": location, + "temperature": temp, + "unit": unit, + "condition": "Sunny" + }) + } + "calculate" => { + let expression = arguments["expression"] + .as_str() + .unwrap_or(""); + let is_valid = expression + .chars() + .all(|c| "0123456789+-*/(). ".contains(c)); + if !is_valid { + return json!({"error": "Invalid expression"}); + } + match eval_expression(expression) { + Ok(result) => json!({ + "expression": expression, + "result": result + }), + Err(e) => json!({"error": e}), + } + } + _ => json!({"error": format!("Unknown function: {}", name)}), + } +} + +fn eval_expression(expr: &str) -> Result { + let expr = expr.replace(' ', ""); + let chars: Vec = expr.chars().collect(); + let mut pos = 0; + let result = parse_add(&chars, &mut pos)?; + if pos < chars.len() { + return Err("Unexpected character".to_string()); + } + Ok(result) +} + +fn parse_add( + chars: &[char], + pos: &mut usize, +) -> Result { + let mut result = parse_mul(chars, pos)?; + while *pos < chars.len() + && (chars[*pos] == '+' || chars[*pos] == '-') + { + let op = chars[*pos]; + *pos += 1; + let right = parse_mul(chars, pos)?; + result = if op == '+' { + result + right + } else { + result - right + }; + } + Ok(result) +} + +fn parse_mul( + chars: &[char], + pos: &mut usize, +) -> Result { + let mut result = parse_atom(chars, pos)?; + while *pos < chars.len() + && (chars[*pos] == '*' || chars[*pos] == '/') + { + let op = chars[*pos]; + *pos += 1; + let right = parse_atom(chars, pos)?; + result = if op == '*' { + result * right + } else { + result / right + }; + } + Ok(result) +} + +fn parse_atom( + chars: &[char], + pos: &mut usize, +) -> Result { + if *pos < chars.len() && chars[*pos] == '(' { + *pos += 1; + let result = parse_add(chars, pos)?; + if *pos < chars.len() && chars[*pos] == ')' { + *pos += 1; + } + return Ok(result); + } + let start = *pos; + while *pos < chars.len() + && (chars[*pos].is_ascii_digit() || chars[*pos] == '.') + { + *pos += 1; + } + if start == *pos { + return Err("Expected number".to_string()); + } + let num_str: String = chars[start..*pos].iter().collect(); + num_str.parse::().map_err(|e| e.to_string()) +} +// + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // + // --- Tool definitions --- + let tools: Vec = serde_json::from_value(json!([ + { + "type": "function", + "function": { + "name": "get_weather", + "description": + "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": + "The city or location" + }, + "unit": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "Temperature unit" + } + }, + "required": ["location"] + } + } + }, + { + "type": "function", + "function": { + "name": "calculate", + "description": "Perform a math calculation", + "parameters": { + "type": "object", + "properties": { + "expression": { + "type": "string", + "description": + "The math expression to evaluate" + } + }, + "required": ["expression"] + } + } + } + ]))?; + // + + // + // Initialize the Foundry Local SDK + let manager = FoundryLocalManager::create( + FoundryLocalConfig::new("tool-calling-app"), + )?; + + // Select and load a model + let model = manager + .catalog() + .get_model("qwen2.5-0.5b") + .await?; + + if !model.is_cached().await? { + println!("Downloading model..."); + model + .download(Some(|progress: &str| { + print!("\r {progress}"); + io::stdout().flush().ok(); + })) + .await?; + println!(); + } + + model.load().await?; + println!("Model loaded and ready."); + + // Create a chat client + let client = model + .create_chat_client() + .temperature(0.7) + .max_tokens(512) + .tool_choice(ChatToolChoice::Auto); + + // Conversation with a system prompt + let mut messages: Vec = vec![ + ChatCompletionRequestSystemMessage::from( + "You are a helpful assistant with access to tools. \ + Use them when needed to answer questions accurately.", + ) + .into(), + ]; + // + + // + println!( + "\nTool-calling assistant ready! Type 'quit' to exit.\n" + ); + + let stdin = io::stdin(); + loop { + print!("You: "); + io::stdout().flush()?; + + let mut input = String::new(); + stdin.lock().read_line(&mut input)?; + let input = input.trim(); + + if input.eq_ignore_ascii_case("quit") + || input.eq_ignore_ascii_case("exit") + { + break; + } + + messages.push( + ChatCompletionRequestUserMessage::from(input).into(), + ); + + let mut response = client + .complete_chat(&messages, Some(&tools)) + .await?; + + // Process tool calls in a loop + while response.choices[0].message.tool_calls.is_some() { + let tool_calls = response.choices[0] + .message + .tool_calls + .as_ref() + .unwrap(); + + // Append the assistant's tool_calls message via JSON + let assistant_msg: ChatCompletionRequestMessage = + serde_json::from_value(json!({ + "role": "assistant", + "content": null, + "tool_calls": tool_calls, + }))?; + messages.push(assistant_msg); + + for tc_enum in tool_calls { + let tool_call = match tc_enum { + ChatCompletionMessageToolCalls::Function( + tc, + ) => tc, + _ => continue, + }; + let function_name = + &tool_call.function.name; + let arguments: Value = + serde_json::from_str( + &tool_call.function.arguments, + )?; + println!( + " Tool call: {}({})", + function_name, arguments + ); + + let result = + execute_tool(function_name, &arguments); + messages.push( + ChatCompletionRequestToolMessage { + content: result.to_string().into(), + tool_call_id: tool_call.id.clone(), + } + .into(), + ); + } + + response = client + .complete_chat(&messages, Some(&tools)) + .await?; + } + + let answer = response.choices[0] + .message + .content + .as_deref() + .unwrap_or(""); + let assistant_msg: ChatCompletionRequestMessage = + serde_json::from_value(json!({ + "role": "assistant", + "content": answer, + }))?; + messages.push(assistant_msg); + println!("Assistant: {}\n", answer); + } + + // Clean up + model.unload().await?; + println!("Model unloaded. Goodbye!"); + // + + Ok(()) +} +// diff --git a/samples/rust/tutorial-voice-to-text/Cargo.toml b/samples/rust/tutorial-voice-to-text/Cargo.toml new file mode 100644 index 00000000..35ec4fc4 --- /dev/null +++ b/samples/rust/tutorial-voice-to-text/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "tutorial-voice-to-text" +version = "0.1.0" +edition = "2021" + +[dependencies] +foundry-local-sdk = { path = "../../../sdk/rust" } +tokio = { version = "1", features = ["full"] } +tokio-stream = "0.1" +anyhow = "1" diff --git a/samples/rust/tutorial-voice-to-text/src/main.rs b/samples/rust/tutorial-voice-to-text/src/main.rs new file mode 100644 index 00000000..2295c86a --- /dev/null +++ b/samples/rust/tutorial-voice-to-text/src/main.rs @@ -0,0 +1,110 @@ +// +// +use foundry_local_sdk::{ + ChatCompletionRequestMessage, + ChatCompletionRequestSystemMessage, + ChatCompletionRequestUserMessage, + FoundryLocalConfig, FoundryLocalManager, +}; +use std::io::{self, Write}; +// + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // + // Initialize the Foundry Local SDK + let manager = FoundryLocalManager::create( + FoundryLocalConfig::new("note-taker"), + )?; + // + + // + // Load the speech-to-text model + let speech_model = manager + .catalog() + .get_model("whisper-tiny") + .await?; + + if !speech_model.is_cached().await? { + println!("Downloading speech model..."); + speech_model + .download(Some(|progress: &str| { + print!("\r {progress}"); + io::stdout().flush().ok(); + })) + .await?; + println!(); + } + + speech_model.load().await?; + println!("Speech model loaded."); + + // Transcribe the audio file + let audio_client = speech_model.create_audio_client(); + let transcription = audio_client + .transcribe("meeting-notes.wav") + .await?; + println!("\nTranscription:\n{}", transcription.text); + + // Unload the speech model to free memory + speech_model.unload().await?; + // + + // + // Load the chat model for summarization + let chat_model = manager + .catalog() + .get_model("qwen2.5-0.5b") + .await?; + + if !chat_model.is_cached().await? { + println!("Downloading chat model..."); + chat_model + .download(Some(|progress: &str| { + print!("\r {progress}"); + io::stdout().flush().ok(); + })) + .await?; + println!(); + } + + chat_model.load().await?; + println!("Chat model loaded."); + + // Summarize the transcription into organized notes + let client = chat_model + .create_chat_client() + .temperature(0.7) + .max_tokens(512); + + let messages: Vec = vec![ + ChatCompletionRequestSystemMessage::from( + "You are a note-taking assistant. Summarize \ + the following transcription into organized, \ + concise notes with bullet points.", + ) + .into(), + ChatCompletionRequestUserMessage::from( + transcription.text.as_str(), + ) + .into(), + ]; + + let response = client + .complete_chat(&messages, None) + .await?; + let summary = response.choices[0] + .message + .content + .as_deref() + .unwrap_or(""); + println!("\nSummary:\n{}", summary); + + // Clean up + chat_model.unload().await?; + println!("\nDone. Models unloaded."); + // + + Ok(()) +} +// diff --git a/sdk/cs/NuGet.config b/sdk/cs/NuGet.config index 294478a7..420497e9 100644 --- a/sdk/cs/NuGet.config +++ b/sdk/cs/NuGet.config @@ -2,7 +2,6 @@ - diff --git a/sdk/cs/README.md b/sdk/cs/README.md index f58e41e0..846c5bf7 100644 --- a/sdk/cs/README.md +++ b/sdk/cs/README.md @@ -48,7 +48,10 @@ dotnet build src/Microsoft.AI.Foundry.Local.csproj /p:UseWinML=true ### Triggering EP download -EP download can be time-consuming. Call `EnsureEpsDownloadedAsync` early (after initialization) to separate the download step from catalog access: +EP management is explicit via two methods: + +- **`DiscoverEps()`** — returns an array of `EpInfo` describing each available EP and whether it is already registered. +- **`DownloadAndRegisterEpsAsync(names?, progressCallback?, ct?)`** — downloads and registers the specified EPs (or all available EPs if no names are given). Returns an `EpDownloadResult`. Overloads are provided so you can pass just a callback without specifying names. ```csharp // Initialize the manager first (see Quick Start) @@ -56,13 +59,46 @@ await FoundryLocalManager.CreateAsync( new Configuration { AppName = "my-app" }, NullLogger.Instance); -await FoundryLocalManager.Instance.EnsureEpsDownloadedAsync(); +var mgr = FoundryLocalManager.Instance; -// Now catalog access won't trigger an EP download -var catalog = await FoundryLocalManager.Instance.GetCatalogAsync(); +// Discover what EPs are available +var eps = mgr.DiscoverEps(); +foreach (var ep in eps) +{ + Console.WriteLine($"{ep.Name} — registered: {ep.IsRegistered}"); +} + +// Download and register all EPs +var result = await mgr.DownloadAndRegisterEpsAsync(); +Console.WriteLine($"Success: {result.Success}, Status: {result.Status}"); + +// Or download only specific EPs +var result2 = await mgr.DownloadAndRegisterEpsAsync(new[] { eps[0].Name }); +``` + +#### Per-EP download progress + +Pass an optional `Action` callback to receive `(epName, percent)` updates +as each EP downloads (`percent` is 0–100): + +```csharp +string currentEp = ""; +await mgr.DownloadAndRegisterEpsAsync((epName, percent) => +{ + if (epName != currentEp) + { + if (currentEp != "") + { + Console.WriteLine(); + } + currentEp = epName; + } + Console.Write($"\r {epName} {percent,6:F1}%"); +}); +Console.WriteLine(); ``` -If you skip this step, EPs are downloaded automatically the first time you access the catalog. Once cached, subsequent calls are fast. +Catalog access no longer blocks on EP downloads. Call `DownloadAndRegisterEpsAsync` explicitly when you need hardware-accelerated execution providers. ## Quick Start @@ -142,11 +178,11 @@ var loaded = await catalog.GetLoadedModelsAsync(); ### Model Lifecycle -Each `Model` wraps one or more `ModelVariant` entries (different quantizations, hardware targets). The SDK auto-selects the best variant, or you can pick one: +Each model may have multiple variants (different quantizations, hardware targets). The SDK auto-selects the best variant, or you can pick one. All models implement the `IModel` interface. ```csharp // Check and select variants -Console.WriteLine($"Selected: {model.SelectedVariant.Id}"); +Console.WriteLine($"Selected: {model.Id}"); foreach (var v in model.Variants) Console.WriteLine($" {v.Id} (cached: {await v.IsCachedAsync()})"); @@ -293,8 +329,8 @@ Key types: | [`FoundryLocalManager`](./docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md) | Singleton entry point — create, catalog, web service | | [`Configuration`](./docs/api/microsoft.ai.foundry.local.configuration.md) | Initialization settings | | [`ICatalog`](./docs/api/microsoft.ai.foundry.local.icatalog.md) | Model catalog interface | -| [`Model`](./docs/api/microsoft.ai.foundry.local.model.md) | Model with variant selection | -| [`ModelVariant`](./docs/api/microsoft.ai.foundry.local.modelvariant.md) | Specific model variant (hardware/quantization) | +| [`IModel`](./docs/api/microsoft.ai.foundry.local.imodel.md) | Model interface — identity, metadata, lifecycle, variant selection | +| [`Model`](./docs/api/microsoft.ai.foundry.local.model.md) | Model with variant selection (implements `IModel`) | | [`OpenAIChatClient`](./docs/api/microsoft.ai.foundry.local.openaichatclient.md) | Chat completions (sync + streaming) | | [`OpenAIAudioClient`](./docs/api/microsoft.ai.foundry.local.openaiaudioclient.md) | Audio transcription (sync + streaming) | | [`ModelInfo`](./docs/api/microsoft.ai.foundry.local.modelinfo.md) | Full model metadata record | diff --git a/sdk/cs/docs/api/index.md b/sdk/cs/docs/api/index.md index 1dcc4e4c..4d084f87 100644 --- a/sdk/cs/docs/api/index.md +++ b/sdk/cs/docs/api/index.md @@ -6,6 +6,10 @@ [DeviceType](./microsoft.ai.foundry.local.devicetype.md) +[EpDownloadResult](./microsoft.ai.foundry.local.epdownloadresult.md) + +[EpInfo](./microsoft.ai.foundry.local.epinfo.md) + [FoundryLocalException](./microsoft.ai.foundry.local.foundrylocalexception.md) [FoundryLocalManager](./microsoft.ai.foundry.local.foundrylocalmanager.md) @@ -22,8 +26,6 @@ [ModelSettings](./microsoft.ai.foundry.local.modelsettings.md) -[ModelVariant](./microsoft.ai.foundry.local.modelvariant.md) - [OpenAIAudioClient](./microsoft.ai.foundry.local.openaiaudioclient.md) [OpenAIChatClient](./microsoft.ai.foundry.local.openaichatclient.md) @@ -39,3 +41,11 @@ [AsyncLock](./microsoft.ai.foundry.local.detail.asynclock.md) [CoreInteropRequest](./microsoft.ai.foundry.local.detail.coreinteroprequest.md) + +## Microsoft.AI.Foundry.Local.OpenAI + +[LiveAudioTranscriptionResponse](./microsoft.ai.foundry.local.openai.liveaudiotranscriptionresponse.md) + +[LiveAudioTranscriptionSession](./microsoft.ai.foundry.local.openai.liveaudiotranscriptionsession.md) + +[ResponseFormatExtended](./microsoft.ai.foundry.local.openai.responseformatextended.md) diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.epdownloadresult.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.epdownloadresult.md new file mode 100644 index 00000000..c9ebeb82 --- /dev/null +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.epdownloadresult.md @@ -0,0 +1,59 @@ +# EpDownloadResult + +Namespace: Microsoft.AI.Foundry.Local + +Result of an explicit EP download and registration operation. + +```csharp +public record EpDownloadResult +``` + +## Properties + +### **Success** + +True if all requested EPs were successfully downloaded and registered. + +```csharp +public bool Success { get; set; } +``` + +#### Property Value + +[Boolean](https://docs.microsoft.com/en-us/dotnet/api/system.boolean)
+ +### **Status** + +Human-readable status message. + +```csharp +public string Status { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **RegisteredEps** + +Names of EPs that were successfully registered. + +```csharp +public String[] RegisteredEps { get; set; } +``` + +#### Property Value + +[String[]](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **FailedEps** + +Names of EPs that failed to register. + +```csharp +public String[] FailedEps { get; set; } +``` + +#### Property Value + +[String[]](https://docs.microsoft.com/en-us/dotnet/api/system.string)
diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md new file mode 100644 index 00000000..d2df44d3 --- /dev/null +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.epinfo.md @@ -0,0 +1,35 @@ +# EpInfo + +Namespace: Microsoft.AI.Foundry.Local + +Describes a discoverable execution provider bootstrapper. + +```csharp +public record EpInfo +``` + +## Properties + +### **Name** + +The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). + +```csharp +public string Name { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **IsRegistered** + +True if this EP has already been successfully downloaded and registered. + +```csharp +public bool IsRegistered { get; set; } +``` + +#### Property Value + +[Boolean](https://docs.microsoft.com/en-us/dotnet/api/system.boolean)
diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md index 93f162b7..5f1ba50e 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.foundrylocalmanager.md @@ -96,9 +96,9 @@ The model catalog. **Remarks:** -The catalog is populated on first use. - If you are using a WinML build this will trigger a one-off execution provider download if not already done. - It is recommended to call [FoundryLocalManager.EnsureEpsDownloadedAsync(Nullable<CancellationToken>)](./microsoft.ai.foundry.local.foundrylocalmanager.md#ensureepsdownloadedasyncnullablecancellationtoken) first to separate out the two steps. +The catalog is populated on first use and returns models based on currently available execution providers. + To ensure all hardware-accelerated models are listed, call [FoundryLocalManager.DownloadAndRegisterEpsAsync(Nullable<CancellationToken>)](./microsoft.ai.foundry.local.foundrylocalmanager.md#downloadandregisterepsasyncnullablecancellationtoken) first to + register execution providers, then access the catalog. ### **StartWebServiceAsync(Nullable<CancellationToken>)** @@ -141,17 +141,26 @@ Optional cancellation token. [Task](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task)
Task stopping the web service. -### **EnsureEpsDownloadedAsync(Nullable<CancellationToken>)** +### **DiscoverEps()** -Ensure execution providers are downloaded and registered. - Only relevant when using WinML. - - Execution provider download can be time consuming due to the size of the packages. - Once downloaded, EPs are not re-downloaded unless a new version is available, so this method will be fast - on subsequent calls. +Discovers all available execution provider bootstrappers. + Returns metadata about each EP including whether it is already registered. + +```csharp +public EpInfo[] DiscoverEps() +``` + +#### Returns + +[EpInfo[]](./microsoft.ai.foundry.local.epinfo.md)
+Array of EP bootstrapper info describing available EPs. + +### **DownloadAndRegisterEpsAsync(Nullable<CancellationToken>)** + +Downloads and registers all available execution providers. ```csharp -public Task EnsureEpsDownloadedAsync(Nullable ct) +public Task DownloadAndRegisterEpsAsync(Nullable ct) ``` #### Parameters @@ -161,7 +170,104 @@ Optional cancellation token. #### Returns -[Task](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task)
+[Task<EpDownloadResult>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Result describing which EPs succeeded and which failed. + +**Remarks:** + +Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + +### **DownloadAndRegisterEpsAsync(IEnumerable<String>, Nullable<CancellationToken>)** + +Downloads and registers the specified execution providers. + +```csharp +public Task DownloadAndRegisterEpsAsync(IEnumerable names, Nullable ct) +``` + +#### Parameters + +`names` [IEnumerable<String>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Subset of EP bootstrapper names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<EpDownloadResult>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Result describing which EPs succeeded and which failed. + +**Remarks:** + +Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + +### **DownloadAndRegisterEpsAsync(Action<String, Double>, Nullable<CancellationToken>)** + +Downloads and registers all available execution providers, reporting progress. + +```csharp +public Task DownloadAndRegisterEpsAsync(Action progressCallback, Nullable ct) +``` + +#### Parameters + +`progressCallback` [Action<String, Double>](https://docs.microsoft.com/en-us/dotnet/api/system.action-2)
+Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<EpDownloadResult>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Result describing which EPs succeeded and which failed. + +**Remarks:** + +Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + +### **DownloadAndRegisterEpsAsync(IEnumerable<String>, Action<String, Double>, Nullable<CancellationToken>)** + +Downloads and registers the specified execution providers, reporting progress. + +```csharp +public Task DownloadAndRegisterEpsAsync(IEnumerable names, Action progressCallback, Nullable ct) +``` + +#### Parameters + +`names` [IEnumerable<String>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Subset of EP bootstrapper names to download (as returned by [FoundryLocalManager.DiscoverEps()](./microsoft.ai.foundry.local.foundrylocalmanager.md#discovereps)). + +`progressCallback` [Action<String, Double>](https://docs.microsoft.com/en-us/dotnet/api/system.action-2)
+Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<EpDownloadResult>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Result describing which EPs succeeded and which failed. + +**Remarks:** + +Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + +### **Dispose(Boolean)** + +```csharp +protected void Dispose(bool disposing) +``` + +#### Parameters + +`disposing` [Boolean](https://docs.microsoft.com/en-us/dotnet/api/system.boolean)
### **Dispose()** diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.icatalog.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.icatalog.md index dc68c173..6a3858b2 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.icatalog.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.icatalog.md @@ -29,7 +29,7 @@ public abstract string Name { get; } List the available models in the catalog. ```csharp -Task> ListModelsAsync(Nullable ct) +Task> ListModelsAsync(Nullable ct) ``` #### Parameters @@ -39,15 +39,15 @@ Optional CancellationToken. #### Returns -[Task<List<Model>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-List of Model instances. +[Task<List<IModel>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+List of IModel instances. ### **GetModelAsync(String, Nullable<CancellationToken>)** Lookup a model by its alias. ```csharp -Task GetModelAsync(string modelAlias, Nullable ct) +Task GetModelAsync(string modelAlias, Nullable ct) ``` #### Parameters @@ -60,15 +60,17 @@ Optional CancellationToken. #### Returns -[Task<Model>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-The matching Model, or null if no model with the given alias exists. +[Task<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+The matching IModel, or null if no model with the given alias exists. ### **GetModelVariantAsync(String, Nullable<CancellationToken>)** Lookup a model variant by its unique model id. + NOTE: This will return an IModel with a single variant. Use GetModelAsync to get an IModel with all available + variants. ```csharp -Task GetModelVariantAsync(string modelId, Nullable ct) +Task GetModelVariantAsync(string modelId, Nullable ct) ``` #### Parameters @@ -81,15 +83,15 @@ Optional CancellationToken. #### Returns -[Task<ModelVariant>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-The matching ModelVariant, or null if no variant with the given id exists. +[Task<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+The matching IModel, or null if no variant with the given id exists. ### **GetCachedModelsAsync(Nullable<CancellationToken>)** Get a list of currently downloaded models from the model cache. ```csharp -Task> GetCachedModelsAsync(Nullable ct) +Task> GetCachedModelsAsync(Nullable ct) ``` #### Parameters @@ -99,15 +101,15 @@ Optional CancellationToken. #### Returns -[Task<List<ModelVariant>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-List of ModelVariant instances. +[Task<List<IModel>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+List of IModel instances. ### **GetLoadedModelsAsync(Nullable<CancellationToken>)** Get a list of the currently loaded models. ```csharp -Task> GetLoadedModelsAsync(Nullable ct) +Task> GetLoadedModelsAsync(Nullable ct) ``` #### Parameters @@ -117,5 +119,27 @@ Optional CancellationToken. #### Returns -[Task<List<ModelVariant>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-List of ModelVariant instances. +[Task<List<IModel>>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+List of IModel instances. + +### **GetLatestVersionAsync(IModel, Nullable<CancellationToken>)** + +Get the latest version of a model. + This is used to check if a newer version of a model is available in the catalog for download. + +```csharp +Task GetLatestVersionAsync(IModel model, Nullable ct) +``` + +#### Parameters + +`model` [IModel](./microsoft.ai.foundry.local.imodel.md)
+The model to check for the latest version. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional CancellationToken. + +#### Returns + +[Task<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+The latest version of the model. Will match the input if it is the latest version. diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md index d5d2b437..861386a8 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.imodel.md @@ -30,6 +30,28 @@ public abstract string Alias { get; } [String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+### **Info** + +```csharp +public abstract ModelInfo Info { get; } +``` + +#### Property Value + +[ModelInfo](./microsoft.ai.foundry.local.modelinfo.md)
+ +### **Variants** + +Variants of the model that are available. Variants of the model are optimized for different devices. + +```csharp +public abstract IReadOnlyList Variants { get; } +``` + +#### Property Value + +[IReadOnlyList<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ireadonlylist-1)
+ ## Methods ### **IsCachedAsync(Nullable<CancellationToken>)** @@ -185,3 +207,22 @@ Optional cancellation token. [Task<OpenAIAudioClient>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
OpenAI.AudioClient + +### **SelectVariant(IModel)** + +Select a model variant from [IModel.Variants](./microsoft.ai.foundry.local.imodel.md#variants) to use for [IModel](./microsoft.ai.foundry.local.imodel.md) operations. + An IModel from `Variants` can also be used directly. + +```csharp +void SelectVariant(IModel variant) +``` + +#### Parameters + +`variant` [IModel](./microsoft.ai.foundry.local.imodel.md)
+Model variant to select. Must be one of the variants in [IModel.Variants](./microsoft.ai.foundry.local.imodel.md#variants). + +#### Exceptions + +[FoundryLocalException](./microsoft.ai.foundry.local.foundrylocalexception.md)
+If variant is not valid for this model. diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md index c63b78a4..23cd67a3 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.model.md @@ -15,42 +15,42 @@ Attributes [NullableContextAttribute](https://docs.microsoft.com/en-us/dotnet/ap ### **Variants** ```csharp -public List Variants { get; internal set; } +public IReadOnlyList Variants { get; } ``` #### Property Value -[List<ModelVariant>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.list-1)
+[IReadOnlyList<IModel>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ireadonlylist-1)
-### **SelectedVariant** +### **Alias** ```csharp -public ModelVariant SelectedVariant { get; internal set; } +public string Alias { get; set; } ``` #### Property Value -[ModelVariant](./microsoft.ai.foundry.local.modelvariant.md)
+[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
-### **Alias** +### **Id** ```csharp -public string Alias { get; set; } +public string Id { get; } ``` #### Property Value [String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
-### **Id** +### **Info** ```csharp -public string Id { get; } +public ModelInfo Info { get; } ``` #### Property Value -[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+[ModelInfo](./microsoft.ai.foundry.local.modelinfo.md)
## Methods @@ -86,17 +86,17 @@ public Task IsLoadedAsync(Nullable ct) [Task<Boolean>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
-### **SelectVariant(ModelVariant)** +### **SelectVariant(IModel)** Select a specific model variant from [Model.Variants](./microsoft.ai.foundry.local.model.md#variants) to use for [IModel](./microsoft.ai.foundry.local.imodel.md) operations. ```csharp -public void SelectVariant(ModelVariant variant) +public void SelectVariant(IModel variant) ``` #### Parameters -`variant` [ModelVariant](./microsoft.ai.foundry.local.modelvariant.md)
+`variant` [IModel](./microsoft.ai.foundry.local.imodel.md)
Model variant to select. Must be one of the variants in [Model.Variants](./microsoft.ai.foundry.local.model.md#variants). #### Exceptions @@ -104,29 +104,6 @@ Model variant to select. Must be one of the variants in [Model.Variants](./micro [FoundryLocalException](./microsoft.ai.foundry.local.foundrylocalexception.md)
If variant is not valid for this model. -### **GetLatestVersion(ModelVariant)** - -Get the latest version of the specified model variant. - -```csharp -public ModelVariant GetLatestVersion(ModelVariant variant) -``` - -#### Parameters - -`variant` [ModelVariant](./microsoft.ai.foundry.local.modelvariant.md)
-Model variant. - -#### Returns - -[ModelVariant](./microsoft.ai.foundry.local.modelvariant.md)
-ModelVariant for latest version. Same as `variant` if that is the latest version. - -#### Exceptions - -[FoundryLocalException](./microsoft.ai.foundry.local.foundrylocalexception.md)
-If variant is not valid for this model. - ### **GetPathAsync(Nullable<CancellationToken>)** ```csharp diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.modelinfo.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.modelinfo.md index 750253c1..1716e3b2 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.modelinfo.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.modelinfo.md @@ -222,6 +222,46 @@ public long CreatedAtUnix { get; set; } [Int64](https://docs.microsoft.com/en-us/dotnet/api/system.int64)
+### **ContextLength** + +```csharp +public Nullable ContextLength { get; set; } +``` + +#### Property Value + +[Nullable<Int64>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+ +### **InputModalities** + +```csharp +public string InputModalities { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **OutputModalities** + +```csharp +public string OutputModalities { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ +### **Capabilities** + +```csharp +public string Capabilities { get; set; } +``` + +#### Property Value + +[String](https://docs.microsoft.com/en-us/dotnet/api/system.string)
+ ## Constructors ### **ModelInfo()** diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiaudioclient.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiaudioclient.md index bcaefc04..b1b60bd8 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiaudioclient.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaiaudioclient.md @@ -71,3 +71,17 @@ Cancellation token. [IAsyncEnumerable<AudioCreateTranscriptionResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.iasyncenumerable-1)
An asynchronous enumerable of transcription responses. + +### **CreateLiveTranscriptionSession()** + +Create a real-time streaming transcription session. + Audio data is pushed in as PCM chunks and transcription results are returned as an async stream. + +```csharp +public LiveAudioTranscriptionSession CreateLiveTranscriptionSession() +``` + +#### Returns + +[LiveAudioTranscriptionSession](./microsoft.ai.foundry.local.openai.liveaudiotranscriptionsession.md)
+A streaming session that must be disposed when done. diff --git a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaichatclient.md b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaichatclient.md index 251e474c..43e00f6d 100644 --- a/sdk/cs/docs/api/microsoft.ai.foundry.local.openaichatclient.md +++ b/sdk/cs/docs/api/microsoft.ai.foundry.local.openaichatclient.md @@ -51,6 +51,32 @@ Optional cancellation token. [Task<ChatCompletionCreateResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
Chat completion response. +### **CompleteChatAsync(IEnumerable<ChatMessage>, IEnumerable<ToolDefinition>, Nullable<CancellationToken>)** + +Execute a chat completion request. + + To continue a conversation, add the ChatMessage from the previous response and new prompt to the messages. + +```csharp +public Task CompleteChatAsync(IEnumerable messages, IEnumerable tools, Nullable ct) +``` + +#### Parameters + +`messages` [IEnumerable<ChatMessage>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Chat messages. The system message is automatically added. + +`tools` [IEnumerable<ToolDefinition>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Optional tool definitions to include in the request. + +`ct` [Nullable<CancellationToken>](https://docs.microsoft.com/en-us/dotnet/api/system.nullable-1)
+Optional cancellation token. + +#### Returns + +[Task<ChatCompletionCreateResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.task-1)
+Chat completion response. + ### **CompleteChatStreamingAsync(IEnumerable<ChatMessage>, CancellationToken)** Execute a chat completion request with streamed output. @@ -67,7 +93,33 @@ public IAsyncEnumerable CompleteChatStreamingAsync Chat messages. The system message is automatically added. `ct` [CancellationToken](https://docs.microsoft.com/en-us/dotnet/api/system.threading.cancellationtoken)
-Optional cancellation token. +Cancellation token. + +#### Returns + +[IAsyncEnumerable<ChatCompletionCreateResponse>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.iasyncenumerable-1)
+Async enumerable of chat completion responses. + +### **CompleteChatStreamingAsync(IEnumerable<ChatMessage>, IEnumerable<ToolDefinition>, CancellationToken)** + +Execute a chat completion request with streamed output. + + To continue a conversation, add the ChatMessage from the previous response and new prompt to the messages. + +```csharp +public IAsyncEnumerable CompleteChatStreamingAsync(IEnumerable messages, IEnumerable tools, CancellationToken ct) +``` + +#### Parameters + +`messages` [IEnumerable<ChatMessage>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Chat messages. The system message is automatically added. + +`tools` [IEnumerable<ToolDefinition>](https://docs.microsoft.com/en-us/dotnet/api/system.collections.generic.ienumerable-1)
+Optional tool definitions to include in the request. + +`ct` [CancellationToken](https://docs.microsoft.com/en-us/dotnet/api/system.threading.cancellationtoken)
+Cancellation token. #### Returns diff --git a/sdk/cs/src/Catalog.cs b/sdk/cs/src/Catalog.cs index eb9ba0d7..f33dcaff 100644 --- a/sdk/cs/src/Catalog.cs +++ b/sdk/cs/src/Catalog.cs @@ -52,51 +52,59 @@ internal static async Task CreateAsync(IModelLoadManager modelManager, return catalog; } - public async Task> ListModelsAsync(CancellationToken? ct = null) + public async Task> ListModelsAsync(CancellationToken? ct = null) { return await Utils.CallWithExceptionHandling(() => ListModelsImplAsync(ct), "Error listing models.", _logger).ConfigureAwait(false); } - public async Task> GetCachedModelsAsync(CancellationToken? ct = null) + public async Task> GetCachedModelsAsync(CancellationToken? ct = null) { return await Utils.CallWithExceptionHandling(() => GetCachedModelsImplAsync(ct), "Error getting cached models.", _logger).ConfigureAwait(false); } - public async Task> GetLoadedModelsAsync(CancellationToken? ct = null) + public async Task> GetLoadedModelsAsync(CancellationToken? ct = null) { return await Utils.CallWithExceptionHandling(() => GetLoadedModelsImplAsync(ct), "Error getting loaded models.", _logger).ConfigureAwait(false); } - public async Task GetModelAsync(string modelAlias, CancellationToken? ct = null) + public async Task GetModelAsync(string modelAlias, CancellationToken? ct = null) { return await Utils.CallWithExceptionHandling(() => GetModelImplAsync(modelAlias, ct), $"Error getting model with alias '{modelAlias}'.", _logger) .ConfigureAwait(false); } - public async Task GetModelVariantAsync(string modelId, CancellationToken? ct = null) + public async Task GetModelVariantAsync(string modelId, CancellationToken? ct = null) { return await Utils.CallWithExceptionHandling(() => GetModelVariantImplAsync(modelId, ct), $"Error getting model variant with ID '{modelId}'.", _logger) .ConfigureAwait(false); } - private async Task> ListModelsImplAsync(CancellationToken? ct = null) + public async Task GetLatestVersionAsync(IModel modelOrModelVariant, CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling( + () => GetLatestVersionImplAsync(modelOrModelVariant, ct), + $"Error getting latest version for model with name '{modelOrModelVariant.Info.Name}'.", + _logger).ConfigureAwait(false); + } + + private async Task> ListModelsImplAsync(CancellationToken? ct = null) { await UpdateModels(ct).ConfigureAwait(false); using var disposable = await _lock.LockAsync().ConfigureAwait(false); - return _modelAliasToModel.Values.OrderBy(m => m.Alias).ToList(); + return _modelAliasToModel.Values.OrderBy(m => m.Alias).Cast().ToList(); } - private async Task> GetCachedModelsImplAsync(CancellationToken? ct = null) + private async Task> GetCachedModelsImplAsync(CancellationToken? ct = null) { var cachedModelIds = await Utils.GetCachedModelIdsAsync(_coreInterop, ct).ConfigureAwait(false); - List cachedModels = new(); + List cachedModels = []; foreach (var modelId in cachedModelIds) { if (_modelIdToModelVariant.TryGetValue(modelId, out ModelVariant? modelVariant)) @@ -108,10 +116,10 @@ private async Task> GetCachedModelsImplAsync(CancellationToke return cachedModels; } - private async Task> GetLoadedModelsImplAsync(CancellationToken? ct = null) + private async Task> GetLoadedModelsImplAsync(CancellationToken? ct = null) { var loadedModelIds = await _modelLoadManager.ListLoadedModelsAsync(ct).ConfigureAwait(false); - List loadedModels = new(); + List loadedModels = []; foreach (var modelId in loadedModelIds) { @@ -143,6 +151,45 @@ private async Task> GetLoadedModelsImplAsync(CancellationToke return modelVariant; } + private async Task GetLatestVersionImplAsync(IModel modelOrModelVariant, CancellationToken? ct) + { + Model? model; + + if (modelOrModelVariant is ModelVariant) + { + // For ModelVariant, resolve the owning Model via alias. + model = await GetModelImplAsync(modelOrModelVariant.Alias, ct); + } + else + { + // Try to use the concrete Model instance if this is our SDK type. + model = modelOrModelVariant as Model; + + // If this is a different IModel implementation (e.g., a test stub), + // fall back to resolving the Model via alias. + if (model == null) + { + model = await GetModelImplAsync(modelOrModelVariant.Alias, ct); + } + } + + if (model == null) + { + throw new FoundryLocalException($"Model with alias '{modelOrModelVariant.Alias}' not found in catalog.", + _logger); + } + + // variants are sorted by version, so the first one matching the name is the latest version for that variant. + var latest = model!.Variants.FirstOrDefault(v => v.Info.Name == modelOrModelVariant.Info.Name) ?? + // should not be possible given we internally manage all the state involved + throw new FoundryLocalException($"Internal error. Mismatch between model (alias:{model.Alias}) and " + + $"model variant (alias:{modelOrModelVariant.Alias}).", _logger); + + // if input was the latest return the input (could be model or model variant) + // otherwise return the latest model variant + return latest.Id == modelOrModelVariant.Id ? modelOrModelVariant : latest; + } + private async Task UpdateModels(CancellationToken? ct) { // TODO: make this configurable @@ -193,6 +240,11 @@ private async Task UpdateModels(CancellationToken? ct) _lastFetch = DateTime.Now; } + internal void InvalidateCache() + { + _lastFetch = DateTime.MinValue; + } + public void Dispose() { _lock.Dispose(); diff --git a/sdk/cs/src/Detail/CoreInterop.cs b/sdk/cs/src/Detail/CoreInterop.cs index 8411473b..d7867cad 100644 --- a/sdk/cs/src/Detail/CoreInterop.cs +++ b/sdk/cs/src/Detail/CoreInterop.cs @@ -124,6 +124,15 @@ internal CoreInterop(Configuration config, ILogger logger) _logger = logger ?? throw new ArgumentNullException(nameof(logger)); var request = new CoreInteropRequest { Params = config.AsDictionary() }; + +#if IS_WINML + // WinML builds require bootstrapping the Windows App Runtime + if (!request.Params.ContainsKey("Bootstrap")) + { + request.Params["Bootstrap"] = "true"; + } +#endif + var response = ExecuteCommand("initialize", request); if (response.Error != null) diff --git a/sdk/cs/src/Detail/JsonSerializationContext.cs b/sdk/cs/src/Detail/JsonSerializationContext.cs index 894f9454..4621a43c 100644 --- a/sdk/cs/src/Detail/JsonSerializationContext.cs +++ b/sdk/cs/src/Detail/JsonSerializationContext.cs @@ -24,6 +24,8 @@ namespace Microsoft.AI.Foundry.Local.Detail; [JsonSerializable(typeof(AudioCreateTranscriptionRequest))] [JsonSerializable(typeof(AudioCreateTranscriptionResponse))] [JsonSerializable(typeof(string[]))] // list loaded or cached models +[JsonSerializable(typeof(EpInfo[]))] +[JsonSerializable(typeof(EpDownloadResult))] [JsonSerializable(typeof(JsonElement))] [JsonSerializable(typeof(ResponseFormatExtended))] [JsonSerializable(typeof(ToolChoice))] diff --git a/sdk/cs/src/Model.cs b/sdk/cs/src/Detail/Model.cs similarity index 74% rename from sdk/cs/src/Model.cs rename to sdk/cs/src/Detail/Model.cs index bbbbcb5b..c4d96057 100644 --- a/sdk/cs/src/Model.cs +++ b/sdk/cs/src/Detail/Model.cs @@ -12,11 +12,13 @@ public class Model : IModel { private readonly ILogger _logger; - public List Variants { get; internal set; } - public ModelVariant SelectedVariant { get; internal set; } = default!; + private readonly List _variants; + public IReadOnlyList Variants => _variants; + internal IModel SelectedVariant { get; set; } = default!; public string Alias { get; init; } public string Id => SelectedVariant.Id; + public ModelInfo Info => SelectedVariant.Info; /// /// Is the currently selected variant cached locally? @@ -33,7 +35,7 @@ internal Model(ModelVariant modelVariant, ILogger logger) _logger = logger; Alias = modelVariant.Alias; - Variants = new() { modelVariant }; + _variants = [modelVariant]; // variants are sorted by Core, so the first one added is the default SelectedVariant = modelVariant; @@ -48,7 +50,7 @@ internal void AddVariant(ModelVariant variant) _logger); } - Variants.Add(variant); + _variants.Add(variant); // prefer the highest priority locally cached variant if (variant.Info.Cached && !SelectedVariant.Info.Cached) @@ -62,31 +64,15 @@ internal void AddVariant(ModelVariant variant) /// /// Model variant to select. Must be one of the variants in . /// If variant is not valid for this model. - public void SelectVariant(ModelVariant variant) + public void SelectVariant(IModel variant) { _ = Variants.FirstOrDefault(v => v == variant) ?? - // user error so don't log - throw new FoundryLocalException($"Model {Alias} does not have a {variant.Id} variant."); + // user error so don't log. + throw new FoundryLocalException($"Input variant was not found in Variants."); SelectedVariant = variant; } - /// - /// Get the latest version of the specified model variant. - /// - /// Model variant. - /// ModelVariant for latest version. Same as `variant` if that is the latest version. - /// If variant is not valid for this model. - public ModelVariant GetLatestVersion(ModelVariant variant) - { - // variants are sorted by version, so the first one matching the name is the latest version for that variant. - var latest = Variants.FirstOrDefault(v => v.Info.Name == variant.Info.Name) ?? - // user error so don't log - throw new FoundryLocalException($"Model {Alias} does not have a {variant.Id} variant."); - - return latest; - } - public async Task GetPathAsync(CancellationToken? ct = null) { return await SelectedVariant.GetPathAsync(ct).ConfigureAwait(false); diff --git a/sdk/cs/src/ModelVariant.cs b/sdk/cs/src/Detail/ModelVariant.cs similarity index 95% rename from sdk/cs/src/ModelVariant.cs rename to sdk/cs/src/Detail/ModelVariant.cs index 6ca7cda7..9f2deaba 100644 --- a/sdk/cs/src/ModelVariant.cs +++ b/sdk/cs/src/Detail/ModelVariant.cs @@ -9,7 +9,7 @@ namespace Microsoft.AI.Foundry.Local; using Microsoft.AI.Foundry.Local.Detail; using Microsoft.Extensions.Logging; -public class ModelVariant : IModel +internal class ModelVariant : IModel { private readonly IModelLoadManager _modelLoadManager; private readonly ICoreInterop _coreInterop; @@ -22,6 +22,8 @@ public class ModelVariant : IModel public string Alias => Info.Alias; public int Version { get; init; } // parsed from Info.Version if possible, else 0 + public IReadOnlyList Variants => [this]; + internal ModelVariant(ModelInfo modelInfo, IModelLoadManager modelLoadManager, ICoreInterop coreInterop, ILogger logger) { @@ -190,4 +192,11 @@ private async Task GetAudioClientImplAsync(CancellationToken? return new OpenAIAudioClient(Id); } + + public void SelectVariant(IModel variant) + { + throw new FoundryLocalException( + $"SelectVariant is not supported on a ModelVariant. " + + $"Call Catalog.GetModelAsync(\"{Alias}\") to get an IModel with all variants available."); + } } diff --git a/sdk/cs/src/EpInfo.cs b/sdk/cs/src/EpInfo.cs new file mode 100644 index 00000000..d170ac0e --- /dev/null +++ b/sdk/cs/src/EpInfo.cs @@ -0,0 +1,45 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local; + +using System.Text.Json.Serialization; + +/// +/// Describes a discoverable execution provider bootstrapper. +/// +public record EpInfo +{ + /// The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). + [JsonPropertyName("Name")] + public required string Name { get; init; } + + /// True if this EP has already been successfully downloaded and registered. + [JsonPropertyName("IsRegistered")] + public required bool IsRegistered { get; init; } +} + +/// +/// Result of an explicit EP download and registration operation. +/// +public record EpDownloadResult +{ + /// True if all requested EPs were successfully downloaded and registered. + [JsonPropertyName("Success")] + public required bool Success { get; init; } + + /// Human-readable status message. + [JsonPropertyName("Status")] + public required string Status { get; init; } + + /// Names of EPs that were successfully registered. + [JsonPropertyName("RegisteredEps")] + public required string[] RegisteredEps { get; init; } + + /// Names of EPs that failed to register. + [JsonPropertyName("FailedEps")] + public required string[] FailedEps { get; init; } +} diff --git a/sdk/cs/src/FoundryLocalManager.cs b/sdk/cs/src/FoundryLocalManager.cs index 639be3a2..10b51285 100644 --- a/sdk/cs/src/FoundryLocalManager.cs +++ b/sdk/cs/src/FoundryLocalManager.cs @@ -97,9 +97,9 @@ public static async Task CreateAsync(Configuration configuration, ILogger logger /// Optional cancellation token. /// The model catalog. /// - /// The catalog is populated on first use. - /// If you are using a WinML build this will trigger a one-off execution provider download if not already done. - /// It is recommended to call first to separate out the two steps. + /// The catalog is populated on first use and returns models based on currently available execution providers. + /// To ensure all hardware-accelerated models are listed, call first to + /// register execution providers, then access the catalog. /// public async Task GetCatalogAsync(CancellationToken? ct = null) { @@ -135,19 +135,94 @@ await Utils.CallWithExceptionHandling(() => StopWebServiceImplAsync(ct), } /// - /// Ensure execution providers are downloaded and registered. - /// Only relevant when using WinML. - /// - /// Execution provider download can be time consuming due to the size of the packages. - /// Once downloaded, EPs are not re-downloaded unless a new version is available, so this method will be fast - /// on subsequent calls. + /// Discovers all available execution provider bootstrappers. + /// Returns metadata about each EP including whether it is already registered. + /// + /// Array of EP bootstrapper info describing available EPs. + public EpInfo[] DiscoverEps() + { + return Utils.CallWithExceptionHandling(DiscoverEpsImpl, + "Error discovering execution providers.", _logger); + } + + /// + /// Downloads and registers all available execution providers. + /// + /// Optional cancellation token. + /// Result describing which EPs succeeded and which failed. + /// + /// Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + /// After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + /// + public async Task DownloadAndRegisterEpsAsync(CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling(() => DownloadAndRegisterEpsImplAsync(null, null, ct), + "Error downloading execution providers.", _logger) + .ConfigureAwait(false); + } + + /// + /// Downloads and registers the specified execution providers. + /// + /// + /// Subset of EP bootstrapper names to download (as returned by ). + /// + /// Optional cancellation token. + /// Result describing which EPs succeeded and which failed. + /// + /// Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + /// After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + /// + public async Task DownloadAndRegisterEpsAsync(IEnumerable names, + CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling(() => DownloadAndRegisterEpsImplAsync(names, null, ct), + "Error downloading execution providers.", _logger) + .ConfigureAwait(false); + } + + /// + /// Downloads and registers all available execution providers, reporting progress. /// + /// + /// Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. + /// /// Optional cancellation token. - public async Task EnsureEpsDownloadedAsync(CancellationToken? ct = null) + /// Result describing which EPs succeeded and which failed. + /// + /// Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + /// After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + /// + public async Task DownloadAndRegisterEpsAsync(Action progressCallback, + CancellationToken? ct = null) { - await Utils.CallWithExceptionHandling(() => EnsureEpsDownloadedImplAsync(ct), - "Error ensuring execution providers downloaded.", _logger) - .ConfigureAwait(false); + return await Utils.CallWithExceptionHandling(() => DownloadAndRegisterEpsImplAsync(null, progressCallback, ct), + "Error downloading execution providers.", _logger) + .ConfigureAwait(false); + } + + /// + /// Downloads and registers the specified execution providers, reporting progress. + /// + /// + /// Subset of EP bootstrapper names to download (as returned by ). + /// + /// + /// Callback invoked as each EP downloads. Parameters are (epName, percentComplete) where percentComplete is 0-100. + /// + /// Optional cancellation token. + /// Result describing which EPs succeeded and which failed. + /// + /// Catalog and model requests use whatever EPs are currently registered and do not block on EP downloads. + /// After downloading new EPs, re-fetch the model catalog to include models requiring the newly registered EPs. + /// + public async Task DownloadAndRegisterEpsAsync(IEnumerable names, + Action progressCallback, + CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling(() => DownloadAndRegisterEpsImplAsync(names, progressCallback, ct), + "Error downloading execution providers.", _logger) + .ConfigureAwait(false); } private FoundryLocalManager(Configuration configuration, ILogger logger) @@ -197,6 +272,24 @@ private async Task InitializeAsync(CancellationToken? ct = null) return; } + private EpInfo[] DiscoverEpsImpl() + { + var result = _coreInterop!.ExecuteCommand("discover_eps"); + if (result.Error != null) + { + throw new FoundryLocalException($"Error discovering execution providers: {result.Error}", _logger); + } + + var data = result.Data; + if (string.IsNullOrWhiteSpace(data)) + { + return Array.Empty(); + } + + return JsonSerializer.Deserialize(data, JsonSerializationContext.Default.EpInfoArray) + ?? Array.Empty(); + } + private async Task GetCatalogImplAsync(CancellationToken? ct = null) { // create on first use @@ -259,17 +352,78 @@ private async Task StopWebServiceImplAsync(CancellationToken? ct = null) Urls = null; } - private async Task EnsureEpsDownloadedImplAsync(CancellationToken? ct = null) + private async Task DownloadAndRegisterEpsImplAsync(IEnumerable? names = null, + Action? progressCallback = null, + CancellationToken? ct = null) { - using var disposable = await asyncLock.LockAsync().ConfigureAwait(false); CoreInteropRequest? input = null; - var result = await _coreInterop!.ExecuteCommandAsync("ensure_eps_downloaded", input, ct); + if (names != null) + { + var namesList = string.Join(",", names); + if (!string.IsNullOrEmpty(namesList)) + { + input = new CoreInteropRequest + { + Params = new Dictionary { { "Names", namesList } } + }; + } + } + + ICoreInterop.Response result; + + if (progressCallback != null) + { + var callback = new ICoreInterop.CallbackFn(progressString => + { + var sepIndex = progressString.IndexOf('|'); + if (sepIndex >= 0) + { + var name = progressString[..sepIndex]; + if (double.TryParse(progressString[(sepIndex + 1)..], + System.Globalization.NumberStyles.Float, + System.Globalization.CultureInfo.InvariantCulture, + out var percent)) + { + progressCallback(string.IsNullOrEmpty(name) ? "" : name, percent); + } + } + }); + + result = await _coreInterop!.ExecuteCommandWithCallbackAsync("download_and_register_eps", input, + callback, ct).ConfigureAwait(false); + } + else + { + result = await _coreInterop!.ExecuteCommandAsync("download_and_register_eps", input, ct).ConfigureAwait(false); + } + if (result.Error != null) { - throw new FoundryLocalException($"Error ensuring execution providers downloaded: {result.Error}", _logger); + throw new FoundryLocalException($"Error downloading execution providers: {result.Error}", _logger); } + + EpDownloadResult epResult; + + if (!string.IsNullOrEmpty(result.Data)) + { + epResult = JsonSerializer.Deserialize(result.Data!, JsonSerializationContext.Default.EpDownloadResult) + ?? throw new FoundryLocalException("Failed to deserialize EP download result.", _logger); + } + else + { + epResult = new EpDownloadResult { Success = true, Status = "Completed", RegisteredEps = [], FailedEps = [] }; + } + + // Invalidate the catalog cache if any EP was newly registered so the next access + // re-fetches models with the updated set of available EPs. + if ((epResult.Success || epResult.RegisteredEps.Length > 0) && _catalog != null) + { + _catalog.InvalidateCache(); + } + + return epResult; } protected virtual void Dispose(bool disposing) diff --git a/sdk/cs/src/FoundryModelInfo.cs b/sdk/cs/src/FoundryModelInfo.cs index 1f795d22..2d1327cc 100644 --- a/sdk/cs/src/FoundryModelInfo.cs +++ b/sdk/cs/src/FoundryModelInfo.cs @@ -119,4 +119,16 @@ public record ModelInfo [JsonPropertyName("createdAt")] public long CreatedAtUnix { get; init; } + + [JsonPropertyName("contextLength")] + public long? ContextLength { get; init; } + + [JsonPropertyName("inputModalities")] + public string? InputModalities { get; init; } + + [JsonPropertyName("outputModalities")] + public string? OutputModalities { get; init; } + + [JsonPropertyName("capabilities")] + public string? Capabilities { get; init; } } diff --git a/sdk/cs/src/ICatalog.cs b/sdk/cs/src/ICatalog.cs index 35285736..4dca8e7d 100644 --- a/sdk/cs/src/ICatalog.cs +++ b/sdk/cs/src/ICatalog.cs @@ -18,36 +18,47 @@ public interface ICatalog /// List the available models in the catalog. /// /// Optional CancellationToken. - /// List of Model instances. - Task> ListModelsAsync(CancellationToken? ct = null); + /// List of IModel instances. + Task> ListModelsAsync(CancellationToken? ct = null); /// /// Lookup a model by its alias. /// /// Model alias. /// Optional CancellationToken. - /// The matching Model, or null if no model with the given alias exists. - Task GetModelAsync(string modelAlias, CancellationToken? ct = null); + /// The matching IModel, or null if no model with the given alias exists. + Task GetModelAsync(string modelAlias, CancellationToken? ct = null); /// /// Lookup a model variant by its unique model id. + /// NOTE: This will return an IModel with a single variant. Use GetModelAsync to get an IModel with all available + /// variants. /// /// Model id. /// Optional CancellationToken. - /// The matching ModelVariant, or null if no variant with the given id exists. - Task GetModelVariantAsync(string modelId, CancellationToken? ct = null); + /// The matching IModel, or null if no variant with the given id exists. + Task GetModelVariantAsync(string modelId, CancellationToken? ct = null); /// /// Get a list of currently downloaded models from the model cache. /// /// Optional CancellationToken. - /// List of ModelVariant instances. - Task> GetCachedModelsAsync(CancellationToken? ct = null); + /// List of IModel instances. + Task> GetCachedModelsAsync(CancellationToken? ct = null); /// /// Get a list of the currently loaded models. /// /// Optional CancellationToken. - /// List of ModelVariant instances. - Task> GetLoadedModelsAsync(CancellationToken? ct = null); + /// List of IModel instances. + Task> GetLoadedModelsAsync(CancellationToken? ct = null); + + /// + /// Get the latest version of a model. + /// This is used to check if a newer version of a model is available in the catalog for download. + /// + /// The model to check for the latest version. + /// Optional CancellationToken. + /// The latest version of the model. Will match the input if it is the latest version. + Task GetLatestVersionAsync(IModel model, CancellationToken? ct = null); } diff --git a/sdk/cs/src/IModel.cs b/sdk/cs/src/IModel.cs index c3acba61..a27f3a3d 100644 --- a/sdk/cs/src/IModel.cs +++ b/sdk/cs/src/IModel.cs @@ -16,6 +16,8 @@ public interface IModel Justification = "Alias is a suitable name in this context.")] string Alias { get; } + ModelInfo Info { get; } + Task IsCachedAsync(CancellationToken? ct = null); Task IsLoadedAsync(CancellationToken? ct = null); @@ -67,4 +69,17 @@ Task DownloadAsync(Action? downloadProgress = null, /// Optional cancellation token. /// OpenAI.AudioClient Task GetAudioClientAsync(CancellationToken? ct = null); + + /// + /// Variants of the model that are available. Variants of the model are optimized for different devices. + /// + IReadOnlyList Variants { get; } + + /// + /// Select a model variant from to use for operations. + /// An IModel from `Variants` can also be used directly. + /// + /// Model variant to select. Must be one of the variants in . + /// If variant is not valid for this model. + void SelectVariant(IModel variant); } diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj index 905f9652..95398017 100644 --- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj +++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj @@ -13,7 +13,7 @@ https://github.com/microsoft/Foundry-Local git - net8.0 + net9.0 win-x64;win-arm64;linux-x64;linux-arm64;osx-arm64 true @@ -87,7 +87,8 @@ Microsoft Foundry Local SDK for WinML Microsoft.AI.Foundry.Local.WinML Microsoft.AI.Foundry.Local.WinML - net8.0-windows10.0.26100.0 + $(DefineConstants);IS_WINML + net9.0-windows10.0.26100.0 win-x64;win-arm64 10.0.17763.0 @@ -99,8 +100,8 @@ $(FoundryLocalCoreVersion) - 0.9.0.8-rc3 - 0.9.0.8-rc3 + 1.0.0-rc1 + 1.0.0-rc1 True diff --git a/sdk/cs/test/FoundryLocal.Tests/AudioClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/AudioClientTests.cs index ec4ab4c9..5c4cc8d6 100644 --- a/sdk/cs/test/FoundryLocal.Tests/AudioClientTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/AudioClientTests.cs @@ -12,7 +12,7 @@ namespace Microsoft.AI.Foundry.Local.Tests; internal sealed class AudioClientTests { - private static Model? model; + private static IModel? model; [Before(Class)] public static async Task Setup() diff --git a/sdk/cs/test/FoundryLocal.Tests/CatalogTests.cs b/sdk/cs/test/FoundryLocal.Tests/CatalogTests.cs new file mode 100644 index 00000000..d270ac15 --- /dev/null +++ b/sdk/cs/test/FoundryLocal.Tests/CatalogTests.cs @@ -0,0 +1,121 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local.Tests; +using System.Collections.Generic; +using System.Linq; +using System.Text.Json; +using System.Threading.Tasks; + +using Microsoft.AI.Foundry.Local.Detail; +using Microsoft.Extensions.Logging.Abstractions; + +using Moq; + +internal sealed class CatalogTests +{ + [Test] + public async Task GetLatestVersion_Works() + { + // Create test data with 3 entries for a model with different versions + // Sorted by version (descending), so version 3 is first (latest) + var testModelInfos = new List + { + new() + { + Id = "test-model:3", + Name = "test-model", + Version = 3, + Alias = "test-alias", + DisplayName = "Test Model", + ProviderType = "test", + Uri = "test://model/3", + ModelType = "ONNX", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, + Cached = false + }, + new() + { + Id = "test-model:2", + Name = "test-model", + Version = 2, + Alias = "test-alias", + DisplayName = "Test Model", + ProviderType = "test", + Uri = "test://model/2", + ModelType = "ONNX", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, + Cached = false + }, + new() + { + Id = "test-model:1", + Name = "test-model", + Version = 1, + Alias = "test-alias", + DisplayName = "Test Model", + ProviderType = "test", + Uri = "test://model/1", + ModelType = "ONNX", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, + Cached = false + } + }; + + // Serialize the test data + var modelListJson = JsonSerializer.Serialize(testModelInfos, JsonSerializationContext.Default.ListModelInfo); + + // Create mock ICoreInterop + var mockCoreInterop = new Mock(); + + // Mock get_catalog_name + mockCoreInterop.Setup(x => x.ExecuteCommand("get_catalog_name", It.IsAny())) + .Returns(new ICoreInterop.Response { Data = "TestCatalog", Error = null }); + + // Mock get_model_list + mockCoreInterop.Setup(x => x.ExecuteCommandAsync("get_model_list", It.IsAny(), It.IsAny())) + .ReturnsAsync(new ICoreInterop.Response { Data = modelListJson, Error = null }); + + // Create mock IModelLoadManager + var mockLoadManager = new Mock(); + + // Create Catalog instance directly (internals are visible to test project) + var catalog = await Catalog.CreateAsync(mockLoadManager.Object, mockCoreInterop.Object, + NullLogger.Instance, null); + + // Get the model + var model = await catalog.GetModelAsync("test-alias"); + await Assert.That(model).IsNotNull(); + + // Verify we have 3 variants + await Assert.That(model!.Variants).HasCount().EqualTo(3); + + // Get the variants - they should be sorted by version (descending) + var variants = model.Variants.ToList(); + var latestVariant = variants[0]; // version 3 + var middleVariant = variants[1]; // version 2 + var oldestVariant = variants[2]; // version 1 + + await Assert.That(latestVariant.Id).IsEqualTo("test-model:3"); + await Assert.That(middleVariant.Id).IsEqualTo("test-model:2"); + await Assert.That(oldestVariant.Id).IsEqualTo("test-model:1"); + + // Test GetLatestVersionAsync with all 3 variants - should always return the first (version 3) + var result1 = await catalog.GetLatestVersionAsync(latestVariant); + await Assert.That(result1.Id).IsEqualTo("test-model:3"); + + var result2 = await catalog.GetLatestVersionAsync(middleVariant); + await Assert.That(result2.Id).IsEqualTo("test-model:3"); + + var result3 = await catalog.GetLatestVersionAsync(oldestVariant); + await Assert.That(result3.Id).IsEqualTo("test-model:3"); + + // Test with Model input - when latest is selected, should get Model not ModelVariant back + model.SelectVariant(latestVariant); + var result4 = await catalog.GetLatestVersionAsync(model); + await Assert.That(result4).IsEqualTo(model); + } +} diff --git a/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs b/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs index b7a91190..2624f98a 100644 --- a/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/ChatCompletionsTests.cs @@ -15,7 +15,7 @@ namespace Microsoft.AI.Foundry.Local.Tests; internal sealed class ChatCompletionsTests { - private static Model? model; + private static IModel? model; [Before(Class)] public static async Task Setup() @@ -24,11 +24,10 @@ public static async Task Setup() var catalog = await manager.GetCatalogAsync(); // Load the specific cached model variant directly - var modelVariant = await catalog.GetModelVariantAsync("qwen2.5-0.5b-instruct-generic-cpu:4").ConfigureAwait(false); - await Assert.That(modelVariant).IsNotNull(); + var model = await catalog.GetModelVariantAsync("qwen2.5-0.5b-instruct-generic-cpu:4").ConfigureAwait(false); + await Assert.That(model).IsNotNull(); - var model = new Model(modelVariant!, manager.Logger); - await model.LoadAsync().ConfigureAwait(false); + await model!.LoadAsync().ConfigureAwait(false); await Assert.That(await model.IsLoadedAsync()).IsTrue(); ChatCompletionsTests.model = model; diff --git a/sdk/cs/test/FoundryLocal.Tests/EndToEnd.cs b/sdk/cs/test/FoundryLocal.Tests/EndToEnd.cs index 80ab4c0a..56c70769 100644 --- a/sdk/cs/test/FoundryLocal.Tests/EndToEnd.cs +++ b/sdk/cs/test/FoundryLocal.Tests/EndToEnd.cs @@ -29,8 +29,9 @@ public async Task EndToEndTest_Succeeds() await Assert.That(modelVariant).IsNotNull(); await Assert.That(modelVariant!.Alias).IsEqualTo("qwen2.5-0.5b"); - // Create model from the specific variant - var model = new Model(modelVariant, manager.Logger); + // Get Model for variant and select the variant so `model` and `modelVariant` should be equivalent + var model = await catalog.GetModelAsync(modelVariant.Alias); + model!.SelectVariant(modelVariant); // uncomment this to remove the model first to test the download progress // only do this when manually testing as other tests expect the model to be cached diff --git a/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs b/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs index 5227e062..cd7e7793 100644 --- a/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs +++ b/sdk/cs/test/FoundryLocal.Tests/FoundryLocalManagerTest.cs @@ -26,7 +26,7 @@ public async Task Manager_GetCatalog_Succeeds() foreach (var model in models) { Console.WriteLine($"Model Alias: {model.Alias}, Variants: {model.Variants.Count}"); - Console.WriteLine($"Selected Variant Id: {model.SelectedVariant?.Id ?? "none"}"); + Console.WriteLine($"Selected Variant Id: {model.Id ?? "none"}"); // variants should be in sorted order diff --git a/sdk/cs/test/FoundryLocal.Tests/LOCAL_MODEL_TESTING.md b/sdk/cs/test/FoundryLocal.Tests/LOCAL_MODEL_TESTING.md index 1145cd9d..1b4a71e7 100644 --- a/sdk/cs/test/FoundryLocal.Tests/LOCAL_MODEL_TESTING.md +++ b/sdk/cs/test/FoundryLocal.Tests/LOCAL_MODEL_TESTING.md @@ -6,10 +6,14 @@ The test model cache directory name is configured in `sdk/cs/test/FoundryLocal.T ```json { - "TestModelCacheDirName": "/path/to/model/cache" + "TestModelCacheDirName": "test-data-shared" } ``` +If the value is a directory name it will be resolved as /../{TestModelCacheDirName}. +Otherwise the value will be resolved using Path.GetFullPath, which allows for absolute paths or +relative paths based on the current working directory. + ## Run the tests The tests will automatically find the models in the configured test model cache directory. @@ -17,21 +21,4 @@ The tests will automatically find the models in the configured test model cache ```bash cd /path/to/parent-dir/foundry-local-sdk/sdk/cs/test/FoundryLocal.Tests dotnet test Microsoft.AI.Foundry.Local.Tests.csproj --configuration Release# Running Local Model Tests - -## Configuration - -The test model cache directory name is configured in `sdk/cs/test/FoundryLocal.Tests/appsettings.Test.json`: - -```json -{ - "TestModelCacheDirName": "/path/to/model/cache" -} ``` - -## Run the tests - -The tests will automatically find the models in the configured test model cache directory. - -```bash -cd /path/to/parent-dir/foundry-local-sdk/sdk/cs/test/FoundryLocal.Tests -dotnet test Microsoft.AI.Foundry.Local.Tests.csproj --configuration Release \ No newline at end of file diff --git a/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj b/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj index b0bd3cd0..fe0dfcd2 100644 --- a/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj +++ b/sdk/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj @@ -22,7 +22,6 @@ net9.0-windows10.0.26100.0 10.0.17763.0 None - true diff --git a/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs b/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs deleted file mode 100644 index b5a49657..00000000 --- a/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs +++ /dev/null @@ -1,54 +0,0 @@ -// -------------------------------------------------------------------------------------------------------------------- -// -// Copyright (c) Microsoft. All rights reserved. -// -// -------------------------------------------------------------------------------------------------------------------- - -namespace Microsoft.AI.Foundry.Local.Tests; -using System.Collections.Generic; -using System.Threading.Tasks; - -using Microsoft.Extensions.Logging.Abstractions; - -using Moq; - -internal sealed class ModelTests -{ - [Test] - public async Task GetLastestVersion_Works() - { - var loadManager = new Mock(); - var coreInterop = new Mock(); - var logger = NullLogger.Instance; - - var createModelInfo = (string name, int version) => new ModelInfo - { - Id = $"{name}:{version}", - Alias = "model", - Name = name, - Version = version, - Uri = "local://model", - ProviderType = "local", - ModelType = "test" - }; - - var variants = new List - { - new(createModelInfo("model_a", 4), loadManager.Object, coreInterop.Object, logger), - new(createModelInfo("model_b", 3), loadManager.Object, coreInterop.Object, logger), - new(createModelInfo("model_b", 2), loadManager.Object, coreInterop.Object, logger), - }; - - var model = new Model(variants[0], NullLogger.Instance); - foreach (var variant in variants.Skip(1)) - { - model.AddVariant(variant); - } - - var latestA = model.GetLatestVersion(variants[0]); - await Assert.That(latestA).IsEqualTo(variants[0]); - - var latestB = model.GetLatestVersion(variants[2]); - await Assert.That(latestB).IsEqualTo(variants[1]); - } -} diff --git a/sdk/cs/test/FoundryLocal.Tests/TestAssemblySetupCleanup.cs b/sdk/cs/test/FoundryLocal.Tests/TestAssemblySetupCleanup.cs index ac536d12..2136a8eb 100644 --- a/sdk/cs/test/FoundryLocal.Tests/TestAssemblySetupCleanup.cs +++ b/sdk/cs/test/FoundryLocal.Tests/TestAssemblySetupCleanup.cs @@ -15,16 +15,20 @@ public static async Task Cleanup(AssemblyHookContext _) { try { - // ensure any loaded models are unloaded - var manager = FoundryLocalManager.Instance; // initialized by Utils - var catalog = await manager.GetCatalogAsync(); - var models = await catalog.GetLoadedModelsAsync().ConfigureAwait(false); - - foreach (var model in models) + // if running individual test/s they may not have used the Utils class which creates FoundryLocalManager + if (FoundryLocalManager.IsInitialized) { - await Assert.That(await model.IsLoadedAsync()).IsTrue(); - await model.UnloadAsync().ConfigureAwait(false); - await Assert.That(await model.IsLoadedAsync()).IsFalse(); + // ensure any loaded models are unloaded + var manager = FoundryLocalManager.Instance; // initialized by Utils + var catalog = await manager.GetCatalogAsync(); + var models = await catalog.GetLoadedModelsAsync().ConfigureAwait(false); + + foreach (var model in models) + { + await Assert.That(await model.IsLoadedAsync()).IsTrue(); + await model.UnloadAsync().ConfigureAwait(false); + await Assert.That(await model.IsLoadedAsync()).IsFalse(); + } } } catch (Exception ex) diff --git a/sdk/cs/test/FoundryLocal.Tests/Utils.cs b/sdk/cs/test/FoundryLocal.Tests/Utils.cs index 6313b0d5..9611d0d4 100644 --- a/sdk/cs/test/FoundryLocal.Tests/Utils.cs +++ b/sdk/cs/test/FoundryLocal.Tests/Utils.cs @@ -55,7 +55,7 @@ public static void AssemblyInit(AssemblyHookContext _) .AddJsonFile("appsettings.Test.json", optional: true, reloadOnChange: false) .Build(); - var testModelCacheDirName = "test-data-shared"; + var testModelCacheDirName = configuration["TestModelCacheDirName"] ?? "test-data-shared"; string testDataSharedPath; if (Path.IsPathRooted(testModelCacheDirName) || testModelCacheDirName.Contains(Path.DirectorySeparatorChar) || @@ -74,6 +74,8 @@ public static void AssemblyInit(AssemblyHookContext _) if (!Directory.Exists(testDataSharedPath)) { + // need to ensure there's a user visible error when running in VS. + logger.LogCritical($"Test model cache directory does not exist: {testDataSharedPath}"); throw new DirectoryNotFoundException($"Test model cache directory does not exist: {testDataSharedPath}"); } diff --git a/sdk/cs/test/FoundryLocal.Tests/appsettings.Test.json b/sdk/cs/test/FoundryLocal.Tests/appsettings.Test.json index 87410c33..d42d8789 100644 --- a/sdk/cs/test/FoundryLocal.Tests/appsettings.Test.json +++ b/sdk/cs/test/FoundryLocal.Tests/appsettings.Test.json @@ -1,3 +1,3 @@ { - "TestModelCacheDirName": "/path/to/test/model/cache" + "TestModelCacheDirName": "test-data-shared" } diff --git a/sdk/js/.npmrc b/sdk/js/.npmrc new file mode 100644 index 00000000..114ea2a4 --- /dev/null +++ b/sdk/js/.npmrc @@ -0,0 +1,2 @@ +registry=https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/npm/registry/ +always-auth=true diff --git a/sdk/js/README.md b/sdk/js/README.md index 3308c9d8..c197e80e 100644 --- a/sdk/js/README.md +++ b/sdk/js/README.md @@ -34,6 +34,45 @@ When WinML is enabled: > **Note:** The `--winml` flag is only relevant on Windows. On macOS and Linux, the standard installation is used regardless of this flag. +### Explicit EP Management + +You can explicitly discover and download execution providers using the `discoverEps()` and `downloadAndRegisterEps()` methods: + +```typescript +// Discover available EPs and their status +const eps = manager.discoverEps(); +for (const ep of eps) { + console.log(`${ep.name} — registered: ${ep.isRegistered}`); +} + +// Download and register all available EPs +const result = await manager.downloadAndRegisterEps(); +console.log(`Success: ${result.success}, Status: ${result.status}`); + +// Download only specific EPs +const result2 = await manager.downloadAndRegisterEps([eps[0].name]); +``` + +#### Per-EP download progress + +Pass an optional `progressCallback` to receive `(epName, percent)` updates as each EP downloads (`percent` is 0–100): + +```typescript +let currentEp = ''; +await manager.downloadAndRegisterEps((epName, percent) => { + if (epName !== currentEp) { + if (currentEp !== '') { + process.stdout.write('\n'); + } + currentEp = epName; + } + process.stdout.write(`\r ${epName} ${percent.toFixed(1)}%`); +}); +process.stdout.write('\n'); +``` + +Catalog access does not block on EP downloads. Call `downloadAndRegisterEps()` when you need hardware-accelerated execution providers. + ## Quick Start ```typescript @@ -69,15 +108,14 @@ console.log(completion.choices[0]?.message?.content); // Example streaming completion console.log('\nTesting streaming completion...'); -await chatClient.completeStreamingChat( - [{ role: 'user', content: 'Write a short poem about programming.' }], - (chunk) => { - const content = chunk.choices?.[0]?.message?.content; - if (content) { - process.stdout.write(content); - } +for await (const chunk of chatClient.completeStreamingChat( + [{ role: 'user', content: 'Write a short poem about programming.' }] +)) { + const content = chunk.choices?.[0]?.message?.content; + if (content) { + process.stdout.write(content); } -); +} console.log('\n'); // Unload the model @@ -108,7 +146,7 @@ const loaded = await catalog.getLoadedModels(); ### Loading and Running Models -Each `Model` can have multiple variants (different quantizations or formats). The SDK automatically selects the best available variant, preferring cached versions. +Each model can have multiple variants (different quantizations or formats). The SDK automatically selects the best available variant, preferring cached versions. All models implement the `IModel` interface. ```typescript const model = await catalog.getModel('qwen2.5-0.5b'); @@ -157,15 +195,14 @@ console.log(response.choices[0].message.content); For real-time output, use streaming: ```typescript -await chatClient.completeStreamingChat( - [{ role: 'user', content: 'Write a short poem about programming.' }], - (chunk) => { - const content = chunk.choices?.[0]?.message?.content; - if (content) { - process.stdout.write(content); - } +for await (const chunk of chatClient.completeStreamingChat( + [{ role: 'user', content: 'Write a short poem about programming.' }] +)) { + const content = chunk.choices?.[0]?.message?.content; + if (content) { + process.stdout.write(content); } -); +} ``` ### Audio Transcription @@ -180,9 +217,9 @@ audioClient.settings.language = 'en'; const result = await audioClient.transcribe('/path/to/audio.wav'); // Streaming transcription -await audioClient.transcribeStreaming('/path/to/audio.wav', (chunk) => { +for await (const chunk of audioClient.transcribeStreaming('/path/to/audio.wav')) { console.log(chunk); -}); +} ``` ### Embedded Web Service @@ -220,8 +257,7 @@ Auto-generated class documentation lives in [`docs/classes/`](docs/classes/): - [FoundryLocalManager](docs/classes/FoundryLocalManager.md) — SDK entry point, web service management - [Catalog](docs/classes/Catalog.md) — Model discovery and browsing -- [Model](docs/classes/Model.md) — High-level model with variant selection -- [ModelVariant](docs/classes/ModelVariant.md) — Specific model variant: download, load, inference +- [IModel](docs/README.md#imodel) — Model interface: variant selection, download, load, inference - [ChatClient](docs/classes/ChatClient.md) — Chat completions (sync and streaming) - [AudioClient](docs/classes/AudioClient.md) — Audio transcription (sync and streaming) - [ModelLoadManager](docs/classes/ModelLoadManager.md) — Low-level model loading management diff --git a/sdk/js/docs/README.md b/sdk/js/docs/README.md index e79be84d..b0167b4d 100644 --- a/sdk/js/docs/README.md +++ b/sdk/js/docs/README.md @@ -1,4 +1,4 @@ -# @prathikrao/foundry-local-sdk +# foundry-local-sdk ## Enumerations @@ -23,7 +23,6 @@ - [FoundryLocalManager](classes/FoundryLocalManager.md) - [Model](classes/Model.md) - [ModelLoadManager](classes/ModelLoadManager.md) -- [ModelVariant](classes/ModelVariant.md) - [ResponsesClient](classes/ResponsesClient.md) - [ResponsesClientSettings](classes/ResponsesClientSettings.md) @@ -153,6 +152,70 @@ object: string; *** +### EpDownloadResult + +Result of an explicit EP download and registration operation. + +#### Properties + +##### failedEps + +```ts +failedEps: string[]; +``` + +Names of EPs that failed to register. + +##### registeredEps + +```ts +registeredEps: string[]; +``` + +Names of EPs that were successfully registered. + +##### status + +```ts +status: string; +``` + +Human-readable status message. + +##### success + +```ts +success: boolean; +``` + +True if all requested EPs were successfully downloaded and registered. + +*** + +### EpInfo + +Describes a discoverable execution provider bootstrapper. + +#### Properties + +##### isRegistered + +```ts +isRegistered: boolean; +``` + +True if this EP has already been successfully downloaded and registered. + +##### name + +```ts +name: string; +``` + +The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). + +*** + ### FoundryLocalConfig Configuration options for the Foundry Local SDK. @@ -163,7 +226,7 @@ Use a plain object with these properties to configure the SDK. ##### additionalSettings? ```ts -optional additionalSettings: { +optional additionalSettings?: { [key: string]: string; }; ``` @@ -180,7 +243,7 @@ Optional. Internal use only. ##### appDataDir? ```ts -optional appDataDir: string; +optional appDataDir?: string; ``` The directory where application data should be stored. @@ -198,7 +261,7 @@ Used for identifying the application in logs and telemetry. ##### libraryPath? ```ts -optional libraryPath: string; +optional libraryPath?: string; ``` The path to the directory containing the native Foundry Local Core libraries. @@ -208,7 +271,7 @@ If not provided, the SDK attempts to discover them in standard locations. ##### logLevel? ```ts -optional logLevel: "trace" | "debug" | "info" | "warn" | "error" | "fatal"; +optional logLevel?: "trace" | "debug" | "info" | "warn" | "error" | "fatal"; ``` The logging level for the SDK. @@ -218,7 +281,7 @@ Defaults to 'warn'. ##### logsDir? ```ts -optional logsDir: string; +optional logsDir?: string; ``` The directory where log files are written. @@ -227,7 +290,7 @@ Optional. Defaults to `{appDataDir}/logs`. ##### modelCacheDir? ```ts -optional modelCacheDir: string; +optional modelCacheDir?: string; ``` The directory where models are downloaded and cached. @@ -236,7 +299,7 @@ Optional. Defaults to `{appDataDir}/cache/models`. ##### serviceEndpoint? ```ts -optional serviceEndpoint: string; +optional serviceEndpoint?: string; ``` The external URL if the web service is running in a separate process. @@ -245,7 +308,7 @@ Optional. This is used to connect to an existing service instance. ##### webServiceUrls? ```ts -optional webServiceUrls: string; +optional webServiceUrls?: string; ``` The URL(s) for the local web service to bind to. @@ -351,7 +414,7 @@ call_id: string; ##### id? ```ts -optional id: string; +optional id?: string; ``` ##### name @@ -363,7 +426,7 @@ name: string; ##### status? ```ts -optional status: ResponseItemStatus; +optional status?: ResponseItemStatus; ``` ##### type @@ -387,7 +450,7 @@ call_id: string; ##### id? ```ts -optional id: string; +optional id?: string; ``` ##### output @@ -399,7 +462,7 @@ output: string | ContentPart[]; ##### status? ```ts -optional status: ResponseItemStatus; +optional status?: ResponseItemStatus; ``` ##### type @@ -417,7 +480,7 @@ type: "function_call_output"; ##### description? ```ts -optional description: string; +optional description?: string; ``` ##### name @@ -429,13 +492,13 @@ name: string; ##### parameters? ```ts -optional parameters: Record; +optional parameters?: Record; ``` ##### strict? ```ts -optional strict: boolean; +optional strict?: boolean; ``` ##### type @@ -462,6 +525,30 @@ get alias(): string; `string` +##### capabilities + +###### Get Signature + +```ts +get capabilities(): string | null; +``` + +###### Returns + +`string` \| `null` + +##### contextLength + +###### Get Signature + +```ts +get contextLength(): number | null; +``` + +###### Returns + +`number` \| `null` + ##### id ###### Get Signature @@ -474,6 +561,30 @@ get id(): string; `string` +##### info + +###### Get Signature + +```ts +get info(): ModelInfo; +``` + +###### Returns + +[`ModelInfo`](#modelinfo) + +##### inputModalities + +###### Get Signature + +```ts +get inputModalities(): string | null; +``` + +###### Returns + +`string` \| `null` + ##### isCached ###### Get Signature @@ -486,6 +597,18 @@ get isCached(): boolean; `boolean` +##### outputModalities + +###### Get Signature + +```ts +get outputModalities(): string | null; +``` + +###### Returns + +`string` \| `null` + ##### path ###### Get Signature @@ -498,6 +621,32 @@ get path(): string; `string` +##### supportsToolCalling + +###### Get Signature + +```ts +get supportsToolCalling(): boolean | null; +``` + +###### Returns + +`boolean` \| `null` + +##### variants + +###### Get Signature + +```ts +get variants(): IModel[]; +``` + +Variants of the model that are available. Variants of the model are optimized for different devices. + +###### Returns + +[`IModel`](#imodel)[] + #### Methods ##### createAudioClient() @@ -586,6 +735,29 @@ removeFromCache(): void; `void` +##### selectVariant() + +```ts +selectVariant(variant): void; +``` + +Select a model variant from variants to use for IModel operations. +An IModel from `variants` can also be used directly. + +###### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `variant` | [`IModel`](#imodel) | Model variant to select. Must be one of the variants in `variants`. | + +###### Returns + +`void` + +###### Throws + +Error if variant is not valid for this model. + ##### unload() ```ts @@ -671,7 +843,7 @@ type: "item_reference"; ##### bytes? ```ts -optional bytes: number[]; +optional bytes?: number[]; ``` ##### logprob @@ -701,7 +873,7 @@ content: string | ContentPart[]; ##### id? ```ts -optional id: string; +optional id?: string; ``` ##### role @@ -713,7 +885,7 @@ role: MessageRole; ##### status? ```ts -optional status: ResponseItemStatus; +optional status?: ResponseItemStatus; ``` ##### type @@ -740,6 +912,18 @@ alias: string; cached: boolean; ``` +##### capabilities? + +```ts +optional capabilities?: string | null; +``` + +##### contextLength? + +```ts +optional contextLength?: number | null; +``` + ##### createdAtUnix ```ts @@ -749,13 +933,13 @@ createdAtUnix: number; ##### displayName? ```ts -optional displayName: string | null; +optional displayName?: string | null; ``` ##### fileSizeMb? ```ts -optional fileSizeMb: number | null; +optional fileSizeMb?: number | null; ``` ##### id @@ -764,34 +948,40 @@ optional fileSizeMb: number | null; id: string; ``` +##### inputModalities? + +```ts +optional inputModalities?: string | null; +``` + ##### license? ```ts -optional license: string | null; +optional license?: string | null; ``` ##### licenseDescription? ```ts -optional licenseDescription: string | null; +optional licenseDescription?: string | null; ``` ##### maxOutputTokens? ```ts -optional maxOutputTokens: number | null; +optional maxOutputTokens?: number | null; ``` ##### minFLVersion? ```ts -optional minFLVersion: string | null; +optional minFLVersion?: string | null; ``` ##### modelSettings? ```ts -optional modelSettings: ModelSettings | null; +optional modelSettings?: ModelSettings | null; ``` ##### modelType @@ -806,10 +996,16 @@ modelType: string; name: string; ``` +##### outputModalities? + +```ts +optional outputModalities?: string | null; +``` + ##### promptTemplate? ```ts -optional promptTemplate: PromptTemplate | null; +optional promptTemplate?: PromptTemplate | null; ``` ##### providerType @@ -821,25 +1017,25 @@ providerType: string; ##### publisher? ```ts -optional publisher: string | null; +optional publisher?: string | null; ``` ##### runtime? ```ts -optional runtime: Runtime | null; +optional runtime?: Runtime | null; ``` ##### supportsToolCalling? ```ts -optional supportsToolCalling: boolean | null; +optional supportsToolCalling?: boolean | null; ``` ##### task? ```ts -optional task: string | null; +optional task?: string | null; ``` ##### uri @@ -863,7 +1059,7 @@ version: number; ##### parameters? ```ts -optional parameters: Parameter[] | null; +optional parameters?: Parameter[] | null; ``` *** @@ -947,13 +1143,13 @@ type: "response.output_item.done"; ##### annotations? ```ts -optional annotations: Annotation[]; +optional annotations?: Annotation[]; ``` ##### logprobs? ```ts -optional logprobs: LogProb[]; +optional logprobs?: LogProb[]; ``` ##### text @@ -1067,7 +1263,7 @@ name: string; ##### value? ```ts -optional value: string | null; +optional value?: string | null; ``` *** @@ -1091,13 +1287,13 @@ prompt: string; ##### system? ```ts -optional system: string | null; +optional system?: string | null; ``` ##### user? ```ts -optional user: string | null; +optional user?: string | null; ``` *** @@ -1109,13 +1305,13 @@ optional user: string | null; ##### effort? ```ts -optional effort: string; +optional effort?: string; ``` ##### summary? ```ts -optional summary: string; +optional summary?: string; ``` *** @@ -1127,31 +1323,31 @@ optional summary: string; ##### content? ```ts -optional content: ContentPart[]; +optional content?: ContentPart[]; ``` ##### encrypted\_content? ```ts -optional encrypted_content: string; +optional encrypted_content?: string; ``` ##### id? ```ts -optional id: string; +optional id?: string; ``` ##### status? ```ts -optional status: ResponseItemStatus; +optional status?: ResponseItemStatus; ``` ##### summary? ```ts -optional summary: string; +optional summary?: string; ``` ##### type @@ -1259,121 +1455,121 @@ type: "response.refusal.done"; ##### frequency\_penalty? ```ts -optional frequency_penalty: number; +optional frequency_penalty?: number; ``` ##### input? ```ts -optional input: string | ResponseInputItem[]; +optional input?: string | ResponseInputItem[]; ``` ##### instructions? ```ts -optional instructions: string; +optional instructions?: string; ``` ##### max\_output\_tokens? ```ts -optional max_output_tokens: number; +optional max_output_tokens?: number; ``` ##### metadata? ```ts -optional metadata: Record; +optional metadata?: Record; ``` ##### model? ```ts -optional model: string; +optional model?: string; ``` ##### parallel\_tool\_calls? ```ts -optional parallel_tool_calls: boolean; +optional parallel_tool_calls?: boolean; ``` ##### presence\_penalty? ```ts -optional presence_penalty: number; +optional presence_penalty?: number; ``` ##### previous\_response\_id? ```ts -optional previous_response_id: string; +optional previous_response_id?: string; ``` ##### reasoning? ```ts -optional reasoning: ReasoningConfig; +optional reasoning?: ReasoningConfig; ``` ##### seed? ```ts -optional seed: number; +optional seed?: number; ``` ##### store? ```ts -optional store: boolean; +optional store?: boolean; ``` ##### stream? ```ts -optional stream: boolean; +optional stream?: boolean; ``` ##### temperature? ```ts -optional temperature: number; +optional temperature?: number; ``` ##### text? ```ts -optional text: TextConfig; +optional text?: TextConfig; ``` ##### tool\_choice? ```ts -optional tool_choice: ResponseToolChoice; +optional tool_choice?: ResponseToolChoice; ``` ##### tools? ```ts -optional tools: FunctionToolDefinition[]; +optional tools?: FunctionToolDefinition[]; ``` ##### top\_p? ```ts -optional top_p: number; +optional top_p?: number; ``` ##### truncation? ```ts -optional truncation: TruncationStrategy; +optional truncation?: TruncationStrategy; ``` ##### user? ```ts -optional user: string; +optional user?: string; ``` *** @@ -1403,13 +1599,13 @@ message: string; ##### jsonSchema? ```ts -optional jsonSchema: string; +optional jsonSchema?: string; ``` ##### larkGrammar? ```ts -optional larkGrammar: string; +optional larkGrammar?: string; ``` ##### type @@ -1457,13 +1653,13 @@ type: ##### cancelled\_at? ```ts -optional cancelled_at: number | null; +optional cancelled_at?: number | null; ``` ##### completed\_at? ```ts -optional completed_at: number | null; +optional completed_at?: number | null; ``` ##### created\_at @@ -1475,13 +1671,13 @@ created_at: number; ##### error? ```ts -optional error: ResponseError | null; +optional error?: ResponseError | null; ``` ##### failed\_at? ```ts -optional failed_at: number | null; +optional failed_at?: number | null; ``` ##### frequency\_penalty @@ -1499,25 +1695,25 @@ id: string; ##### incomplete\_details? ```ts -optional incomplete_details: IncompleteDetails | null; +optional incomplete_details?: IncompleteDetails | null; ``` ##### instructions? ```ts -optional instructions: string | null; +optional instructions?: string | null; ``` ##### max\_output\_tokens? ```ts -optional max_output_tokens: number | null; +optional max_output_tokens?: number | null; ``` ##### metadata? ```ts -optional metadata: Record | null; +optional metadata?: Record | null; ``` ##### model @@ -1553,13 +1749,13 @@ presence_penalty: number; ##### previous\_response\_id? ```ts -optional previous_response_id: string | null; +optional previous_response_id?: string | null; ``` ##### reasoning? ```ts -optional reasoning: ReasoningConfig | null; +optional reasoning?: ReasoningConfig | null; ``` ##### status @@ -1613,13 +1809,13 @@ truncation: TruncationStrategy; ##### usage? ```ts -optional usage: ResponseUsage | null; +optional usage?: ResponseUsage | null; ``` ##### user? ```ts -optional user: string | null; +optional user?: string | null; ``` *** @@ -1655,7 +1851,7 @@ input_tokens: number; ##### input\_tokens\_details? ```ts -optional input_tokens_details: { +optional input_tokens_details?: { cached_tokens: number; }; ``` @@ -1675,7 +1871,7 @@ output_tokens: number; ##### output\_tokens\_details? ```ts -optional output_tokens_details: { +optional output_tokens_details?: { reasoning_tokens: number; }; ``` @@ -1719,19 +1915,19 @@ executionProvider: string; ##### code? ```ts -optional code: string; +optional code?: string; ``` ##### message? ```ts -optional message: string; +optional message?: string; ``` ##### param? ```ts -optional param: string; +optional param?: string; ``` ##### sequence\_number @@ -1755,13 +1951,13 @@ type: "error"; ##### format? ```ts -optional format: TextFormat; +optional format?: TextFormat; ``` ##### verbosity? ```ts -optional verbosity: string; +optional verbosity?: string; ``` *** @@ -1773,25 +1969,25 @@ optional verbosity: string; ##### description? ```ts -optional description: string; +optional description?: string; ``` ##### name? ```ts -optional name: string; +optional name?: string; ``` ##### schema? ```ts -optional schema: unknown; +optional schema?: unknown; ``` ##### strict? ```ts -optional strict: boolean; +optional strict?: boolean; ``` ##### type @@ -1809,7 +2005,7 @@ type: string; ##### name? ```ts -optional name: string; +optional name?: string; ``` ##### type diff --git a/sdk/js/docs/classes/AudioClient.md b/sdk/js/docs/classes/AudioClient.md index 7fd13bd8..e661bad0 100644 --- a/sdk/js/docs/classes/AudioClient.md +++ b/sdk/js/docs/classes/AudioClient.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / AudioClient +[foundry-local-sdk](../README.md) / AudioClient # Class: AudioClient @@ -46,24 +46,31 @@ Error - If audioFilePath is invalid or transcription fails. ### transcribeStreaming() ```ts -transcribeStreaming(audioFilePath, callback): Promise; +transcribeStreaming(audioFilePath): AsyncIterable; ``` -Transcribes audio into the input language using streaming. +Transcribes audio into the input language using streaming, returning an async iterable of chunks. #### Parameters | Parameter | Type | Description | | ------ | ------ | ------ | | `audioFilePath` | `string` | Path to the audio file to transcribe. | -| `callback` | (`chunk`) => `void` | A callback function that receives each chunk of the streaming response. | #### Returns -`Promise`\<`void`\> +`AsyncIterable`\<`any`\> -A promise that resolves when the stream is complete. +An async iterable that yields parsed streaming transcription chunks. #### Throws -Error - If audioFilePath or callback are invalid, or streaming fails. +Error - If audioFilePath is invalid, or streaming fails. + +#### Example + +```typescript +for await (const chunk of audioClient.transcribeStreaming('recording.wav')) { + process.stdout.write(chunk.text); +} +``` diff --git a/sdk/js/docs/classes/AudioClientSettings.md b/sdk/js/docs/classes/AudioClientSettings.md index 619c526b..49e806dc 100644 --- a/sdk/js/docs/classes/AudioClientSettings.md +++ b/sdk/js/docs/classes/AudioClientSettings.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / AudioClientSettings +[foundry-local-sdk](../README.md) / AudioClientSettings # Class: AudioClientSettings @@ -19,7 +19,7 @@ new AudioClientSettings(): AudioClientSettings; ### language? ```ts -optional language: string; +optional language?: string; ``` *** @@ -27,5 +27,5 @@ optional language: string; ### temperature? ```ts -optional temperature: number; +optional temperature?: number; ``` diff --git a/sdk/js/docs/classes/Catalog.md b/sdk/js/docs/classes/Catalog.md index b77f254f..78ce821c 100644 --- a/sdk/js/docs/classes/Catalog.md +++ b/sdk/js/docs/classes/Catalog.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / Catalog +[foundry-local-sdk](../README.md) / Catalog # Class: Catalog @@ -47,7 +47,7 @@ The name of the catalog. ### getCachedModels() ```ts -getCachedModels(): Promise; +getCachedModels(): Promise; ``` Retrieves a list of all locally cached model variants. @@ -55,16 +55,39 @@ This method is asynchronous as it may involve file I/O or querying the underlyin #### Returns -`Promise`\<[`ModelVariant`](ModelVariant.md)[]\> +`Promise`\<[`IModel`](../README.md#imodel)[]\> -A Promise that resolves to an array of cached ModelVariant objects. +A Promise that resolves to an array of cached IModel objects. + +*** + +### getLatestVersion() + +```ts +getLatestVersion(modelOrModelVariant): Promise; +``` + +Get the latest version of a model. +This is used to check if a newer version of a model is available in the catalog for download. + +#### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `modelOrModelVariant` | [`IModel`](../README.md#imodel) | The model to check for the latest version. | + +#### Returns + +`Promise`\<[`IModel`](../README.md#imodel)\> + +The latest version of the model. Will match the input if it is the latest version. *** ### getLoadedModels() ```ts -getLoadedModels(): Promise; +getLoadedModels(): Promise; ``` Retrieves a list of all currently loaded model variants. @@ -73,16 +96,16 @@ the underlying core or an external service, which can be an I/O bound operation. #### Returns -`Promise`\<[`ModelVariant`](ModelVariant.md)[]\> +`Promise`\<[`IModel`](../README.md#imodel)[]\> -A Promise that resolves to an array of loaded ModelVariant objects. +A Promise that resolves to an array of loaded IModel objects. *** ### getModel() ```ts -getModel(alias): Promise; +getModel(alias): Promise; ``` Retrieves a model by its alias. @@ -96,9 +119,9 @@ This method is asynchronous as it may ensure the catalog is up-to-date by fetchi #### Returns -`Promise`\<[`Model`](Model.md)\> +`Promise`\<[`IModel`](../README.md#imodel)\> -A Promise that resolves to the Model object if found, otherwise throws an error. +A Promise that resolves to the IModel object if found, otherwise throws an error. #### Throws @@ -109,7 +132,7 @@ Error - If alias is null, undefined, or empty. ### getModels() ```ts -getModels(): Promise; +getModels(): Promise; ``` Lists all available models in the catalog. @@ -117,19 +140,21 @@ This method is asynchronous as it may fetch the model list from a remote service #### Returns -`Promise`\<[`Model`](Model.md)[]\> +`Promise`\<[`IModel`](../README.md#imodel)[]\> -A Promise that resolves to an array of Model objects. +A Promise that resolves to an array of IModel objects. *** ### getModelVariant() ```ts -getModelVariant(modelId): Promise; +getModelVariant(modelId): Promise; ``` Retrieves a specific model variant by its ID. +NOTE: This will return an IModel with a single variant. Use getModel to get an IModel with all available +variants. This method is asynchronous as it may ensure the catalog is up-to-date by fetching from a remote service. #### Parameters @@ -140,9 +165,9 @@ This method is asynchronous as it may ensure the catalog is up-to-date by fetchi #### Returns -`Promise`\<[`ModelVariant`](ModelVariant.md)\> +`Promise`\<[`IModel`](../README.md#imodel)\> -A Promise that resolves to the ModelVariant object if found, otherwise throws an error. +A Promise that resolves to the IModel object if found, otherwise throws an error. #### Throws diff --git a/sdk/js/docs/classes/ChatClient.md b/sdk/js/docs/classes/ChatClient.md index 91e877aa..26cc6f0c 100644 --- a/sdk/js/docs/classes/ChatClient.md +++ b/sdk/js/docs/classes/ChatClient.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / ChatClient +[foundry-local-sdk](../README.md) / ChatClient # Class: ChatClient @@ -75,53 +75,80 @@ Error - If messages or tools are invalid or completion fails. #### Call Signature ```ts -completeStreamingChat(messages, callback): Promise; +completeStreamingChat(messages): AsyncIterable; ``` -Performs a streaming chat completion. +Performs a streaming chat completion, returning an async iterable of chunks. ##### Parameters | Parameter | Type | Description | | ------ | ------ | ------ | | `messages` | `any`[] | An array of message objects. | -| `callback` | (`chunk`) => `void` | A callback function that receives each chunk of the streaming response. | ##### Returns -`Promise`\<`void`\> +`AsyncIterable`\<`any`\> -A promise that resolves when the stream is complete. +An async iterable that yields parsed streaming response chunks. ##### Throws -Error - If messages, tools, or callback are invalid, or streaming fails. +Error - If messages or tools are invalid, or streaming fails. + +##### Example + +```typescript +// Without tools: +for await (const chunk of chatClient.completeStreamingChat(messages)) { + const content = chunk.choices?.[0]?.delta?.content; + if (content) process.stdout.write(content); +} + +// With tools: +for await (const chunk of chatClient.completeStreamingChat(messages, tools)) { + const content = chunk.choices?.[0]?.delta?.content; + if (content) process.stdout.write(content); +} +``` #### Call Signature ```ts -completeStreamingChat( - messages, - tools, -callback): Promise; +completeStreamingChat(messages, tools): AsyncIterable; ``` -Performs a streaming chat completion. +Performs a streaming chat completion, returning an async iterable of chunks. ##### Parameters | Parameter | Type | Description | | ------ | ------ | ------ | | `messages` | `any`[] | An array of message objects. | -| `tools` | `any`[] | An array of tool objects. | -| `callback` | (`chunk`) => `void` | A callback function that receives each chunk of the streaming response. | +| `tools` | `any`[] | An optional array of tool objects. | ##### Returns -`Promise`\<`void`\> +`AsyncIterable`\<`any`\> -A promise that resolves when the stream is complete. +An async iterable that yields parsed streaming response chunks. ##### Throws -Error - If messages, tools, or callback are invalid, or streaming fails. +Error - If messages or tools are invalid, or streaming fails. + +##### Example + +```typescript +// Without tools: +for await (const chunk of chatClient.completeStreamingChat(messages)) { + const content = chunk.choices?.[0]?.delta?.content; + if (content) process.stdout.write(content); +} + +// With tools: +for await (const chunk of chatClient.completeStreamingChat(messages, tools)) { + const content = chunk.choices?.[0]?.delta?.content; + if (content) process.stdout.write(content); +} +``` diff --git a/sdk/js/docs/classes/ChatClientSettings.md b/sdk/js/docs/classes/ChatClientSettings.md index 7fed8a46..323bd3ca 100644 --- a/sdk/js/docs/classes/ChatClientSettings.md +++ b/sdk/js/docs/classes/ChatClientSettings.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / ChatClientSettings +[foundry-local-sdk](../README.md) / ChatClientSettings # Class: ChatClientSettings @@ -19,7 +19,7 @@ new ChatClientSettings(): ChatClientSettings; ### frequencyPenalty? ```ts -optional frequencyPenalty: number; +optional frequencyPenalty?: number; ``` *** @@ -27,7 +27,7 @@ optional frequencyPenalty: number; ### maxTokens? ```ts -optional maxTokens: number; +optional maxTokens?: number; ``` *** @@ -35,7 +35,7 @@ optional maxTokens: number; ### n? ```ts -optional n: number; +optional n?: number; ``` *** @@ -43,7 +43,7 @@ optional n: number; ### presencePenalty? ```ts -optional presencePenalty: number; +optional presencePenalty?: number; ``` *** @@ -51,7 +51,7 @@ optional presencePenalty: number; ### randomSeed? ```ts -optional randomSeed: number; +optional randomSeed?: number; ``` *** @@ -59,7 +59,7 @@ optional randomSeed: number; ### responseFormat? ```ts -optional responseFormat: ResponseFormat; +optional responseFormat?: ResponseFormat; ``` *** @@ -67,7 +67,7 @@ optional responseFormat: ResponseFormat; ### temperature? ```ts -optional temperature: number; +optional temperature?: number; ``` *** @@ -75,7 +75,7 @@ optional temperature: number; ### toolChoice? ```ts -optional toolChoice: ToolChoice; +optional toolChoice?: ToolChoice; ``` *** @@ -83,7 +83,7 @@ optional toolChoice: ToolChoice; ### topK? ```ts -optional topK: number; +optional topK?: number; ``` *** @@ -91,5 +91,5 @@ optional topK: number; ### topP? ```ts -optional topP: number; +optional topP?: number; ``` diff --git a/sdk/js/docs/classes/FoundryLocalManager.md b/sdk/js/docs/classes/FoundryLocalManager.md index fb9a4783..6ca963f7 100644 --- a/sdk/js/docs/classes/FoundryLocalManager.md +++ b/sdk/js/docs/classes/FoundryLocalManager.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / FoundryLocalManager +[foundry-local-sdk](../README.md) / FoundryLocalManager # Class: FoundryLocalManager @@ -87,6 +87,101 @@ Error - If the web service is not running. *** +### discoverEps() + +```ts +discoverEps(): EpInfo[]; +``` + +Discovers available execution providers (EPs) and their registration status. + +#### Returns + +[`EpInfo`](../README.md#epinfo)[] + +An array of EpInfo describing each available EP. + +*** + +### downloadAndRegisterEps() + +#### Call Signature + +```ts +downloadAndRegisterEps(): Promise; +``` + +Downloads and registers execution providers. + +##### Returns + +`Promise`\<[`EpDownloadResult`](../README.md#epdownloadresult)\> + +A promise that resolves with an EpDownloadResult describing the outcome. + +#### Call Signature + +```ts +downloadAndRegisterEps(names): Promise; +``` + +Downloads and registers execution providers. + +##### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `names` | `string`[] | Array of EP names to download. | + +##### Returns + +`Promise`\<[`EpDownloadResult`](../README.md#epdownloadresult)\> + +A promise that resolves with an EpDownloadResult describing the outcome. + +#### Call Signature + +```ts +downloadAndRegisterEps(progressCallback): Promise; +``` + +Downloads and registers execution providers, reporting progress. + +##### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `progressCallback` | (`epName`, `percent`) => `void` | Callback invoked with (epName, percent) as each EP downloads. Percent is 0-100. | + +##### Returns + +`Promise`\<[`EpDownloadResult`](../README.md#epdownloadresult)\> + +A promise that resolves with an EpDownloadResult describing the outcome. + +#### Call Signature + +```ts +downloadAndRegisterEps(names, progressCallback): Promise; +``` + +Downloads and registers execution providers, reporting progress. + +##### Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `names` | `string`[] | Array of EP names to download. | +| `progressCallback` | (`epName`, `percent`) => `void` | Callback invoked with (epName, percent) as each EP downloads. Percent is 0-100. | + +##### Returns + +`Promise`\<[`EpDownloadResult`](../README.md#epdownloadresult)\> + +A promise that resolves with an EpDownloadResult describing the outcome. + +*** + ### startWebService() ```ts diff --git a/sdk/js/docs/classes/Model.md b/sdk/js/docs/classes/Model.md index 48340dae..f678f873 100644 --- a/sdk/js/docs/classes/Model.md +++ b/sdk/js/docs/classes/Model.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / Model +[foundry-local-sdk](../README.md) / Model # Class: Model @@ -21,7 +21,7 @@ new Model(variant): Model; | Parameter | Type | | ------ | ------ | -| `variant` | [`ModelVariant`](ModelVariant.md) | +| `variant` | `ModelVariant` | #### Returns @@ -51,6 +51,42 @@ The model alias. *** +### capabilities + +#### Get Signature + +```ts +get capabilities(): string | null; +``` + +##### Returns + +`string` \| `null` + +#### Implementation of + +[`IModel`](../README.md#imodel).[`capabilities`](../README.md#capabilities) + +*** + +### contextLength + +#### Get Signature + +```ts +get contextLength(): number | null; +``` + +##### Returns + +`number` \| `null` + +#### Implementation of + +[`IModel`](../README.md#imodel).[`contextLength`](../README.md#contextlength) + +*** + ### id #### Get Signature @@ -73,6 +109,46 @@ The ID of the selected variant. *** +### info + +#### Get Signature + +```ts +get info(): ModelInfo; +``` + +Gets the ModelInfo of the currently selected variant. + +##### Returns + +[`ModelInfo`](../README.md#modelinfo) + +The ModelInfo object. + +#### Implementation of + +[`IModel`](../README.md#imodel).[`info`](../README.md#info) + +*** + +### inputModalities + +#### Get Signature + +```ts +get inputModalities(): string | null; +``` + +##### Returns + +`string` \| `null` + +#### Implementation of + +[`IModel`](../README.md#imodel).[`inputModalities`](../README.md#inputmodalities) + +*** + ### isCached #### Get Signature @@ -95,6 +171,24 @@ True if cached, false otherwise. *** +### outputModalities + +#### Get Signature + +```ts +get outputModalities(): string | null; +``` + +##### Returns + +`string` \| `null` + +#### Implementation of + +[`IModel`](../README.md#imodel).[`outputModalities`](../README.md#outputmodalities) + +*** + ### path #### Get Signature @@ -117,48 +211,45 @@ The local file path. *** -### variants +### supportsToolCalling #### Get Signature ```ts -get variants(): ModelVariant[]; +get supportsToolCalling(): boolean | null; ``` -Gets all available variants for this model. - ##### Returns -[`ModelVariant`](ModelVariant.md)[] +`boolean` \| `null` + +#### Implementation of -An array of ModelVariant objects. +[`IModel`](../README.md#imodel).[`supportsToolCalling`](../README.md#supportstoolcalling) -## Methods +*** -### addVariant() +### variants + +#### Get Signature ```ts -addVariant(variant): void; +get variants(): IModel[]; ``` -Adds a new variant to this model. -Automatically selects the new variant if it is cached and the current one is not. - -#### Parameters +Gets all available variants for this model. -| Parameter | Type | Description | -| ------ | ------ | ------ | -| `variant` | [`ModelVariant`](ModelVariant.md) | The model variant to add. | +##### Returns -#### Returns +[`IModel`](../README.md#imodel)[] -`void` +An array of IModel objects. -#### Throws +#### Implementation of -Error - If the variant's alias does not match the model's alias. +[`IModel`](../README.md#imodel).[`variants`](../README.md#variants) -*** +## Methods ### createAudioClient() @@ -320,7 +411,7 @@ Selects a specific variant. | Parameter | Type | Description | | ------ | ------ | ------ | -| `variant` | [`ModelVariant`](ModelVariant.md) | The model variant to select. | +| `variant` | [`IModel`](../README.md#imodel) | The model variant to select. Must be one of the variants in `variants`. | #### Returns @@ -328,7 +419,11 @@ Selects a specific variant. #### Throws -Error - If the argument is not a ModelVariant object, or if the variant does not belong to this model. +Error - If the variant does not belong to this model. + +#### Implementation of + +[`IModel`](../README.md#imodel).[`selectVariant`](../README.md#selectvariant) *** diff --git a/sdk/js/docs/classes/ModelLoadManager.md b/sdk/js/docs/classes/ModelLoadManager.md index f445659b..564d561f 100644 --- a/sdk/js/docs/classes/ModelLoadManager.md +++ b/sdk/js/docs/classes/ModelLoadManager.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / ModelLoadManager +[foundry-local-sdk](../README.md) / ModelLoadManager # Class: ModelLoadManager diff --git a/sdk/js/docs/classes/ModelVariant.md b/sdk/js/docs/classes/ModelVariant.md deleted file mode 100644 index 837ead70..00000000 --- a/sdk/js/docs/classes/ModelVariant.md +++ /dev/null @@ -1,307 +0,0 @@ -[@prathikrao/foundry-local-sdk](../README.md) / ModelVariant - -# Class: ModelVariant - -Represents a specific variant of a model (e.g., a specific quantization or format). -Contains the low-level implementation for interacting with the model. - -## Implements - -- [`IModel`](../README.md#imodel) - -## Constructors - -### Constructor - -```ts -new ModelVariant( - modelInfo, - coreInterop, - modelLoadManager): ModelVariant; -``` - -#### Parameters - -| Parameter | Type | -| ------ | ------ | -| `modelInfo` | [`ModelInfo`](../README.md#modelinfo) | -| `coreInterop` | `CoreInterop` | -| `modelLoadManager` | [`ModelLoadManager`](ModelLoadManager.md) | - -#### Returns - -`ModelVariant` - -## Accessors - -### alias - -#### Get Signature - -```ts -get alias(): string; -``` - -Gets the alias of the model. - -##### Returns - -`string` - -The model alias. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`alias`](../README.md#alias) - -*** - -### id - -#### Get Signature - -```ts -get id(): string; -``` - -Gets the unique identifier of the model variant. - -##### Returns - -`string` - -The model ID. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`id`](../README.md#id-3) - -*** - -### isCached - -#### Get Signature - -```ts -get isCached(): boolean; -``` - -Checks if the model variant is cached locally. - -##### Returns - -`boolean` - -True if cached, false otherwise. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`isCached`](../README.md#iscached) - -*** - -### modelInfo - -#### Get Signature - -```ts -get modelInfo(): ModelInfo; -``` - -Gets the detailed information about the model variant. - -##### Returns - -[`ModelInfo`](../README.md#modelinfo) - -The ModelInfo object. - -*** - -### path - -#### Get Signature - -```ts -get path(): string; -``` - -Gets the local file path of the model variant. - -##### Returns - -`string` - -The local file path. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`path`](../README.md#path) - -## Methods - -### createAudioClient() - -```ts -createAudioClient(): AudioClient; -``` - -Creates an AudioClient for interacting with the model via audio operations. - -#### Returns - -[`AudioClient`](AudioClient.md) - -An AudioClient instance. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`createAudioClient`](../README.md#createaudioclient) - -*** - -### createChatClient() - -```ts -createChatClient(): ChatClient; -``` - -Creates a ChatClient for interacting with the model via chat completions. - -#### Returns - -[`ChatClient`](ChatClient.md) - -A ChatClient instance. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`createChatClient`](../README.md#createchatclient) - -*** - -### createResponsesClient() - -```ts -createResponsesClient(baseUrl): ResponsesClient; -``` - -Creates a ResponsesClient for interacting with the model via the Responses API. - -#### Parameters - -| Parameter | Type | Description | -| ------ | ------ | ------ | -| `baseUrl` | `string` | The base URL of the Foundry Local web service. | - -#### Returns - -[`ResponsesClient`](ResponsesClient.md) - -A ResponsesClient instance. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`createResponsesClient`](../README.md#createresponsesclient) - -*** - -### download() - -```ts -download(progressCallback?): Promise; -``` - -Downloads the model variant. - -#### Parameters - -| Parameter | Type | Description | -| ------ | ------ | ------ | -| `progressCallback?` | (`progress`) => `void` | Optional callback to report download progress (0-100). | - -#### Returns - -`Promise`\<`void`\> - -#### Implementation of - -[`IModel`](../README.md#imodel).[`download`](../README.md#download) - -*** - -### isLoaded() - -```ts -isLoaded(): Promise; -``` - -Checks if the model variant is loaded in memory. - -#### Returns - -`Promise`\<`boolean`\> - -True if loaded, false otherwise. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`isLoaded`](../README.md#isloaded) - -*** - -### load() - -```ts -load(): Promise; -``` - -Loads the model variant into memory. - -#### Returns - -`Promise`\<`void`\> - -A promise that resolves when the model is loaded. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`load`](../README.md#load) - -*** - -### removeFromCache() - -```ts -removeFromCache(): void; -``` - -Removes the model variant from the local cache. - -#### Returns - -`void` - -#### Implementation of - -[`IModel`](../README.md#imodel).[`removeFromCache`](../README.md#removefromcache) - -*** - -### unload() - -```ts -unload(): Promise; -``` - -Unloads the model variant from memory. - -#### Returns - -`Promise`\<`void`\> - -A promise that resolves when the model is unloaded. - -#### Implementation of - -[`IModel`](../README.md#imodel).[`unload`](../README.md#unload) diff --git a/sdk/js/docs/classes/ResponsesClient.md b/sdk/js/docs/classes/ResponsesClient.md index 5ee70c81..0ccd9a60 100644 --- a/sdk/js/docs/classes/ResponsesClient.md +++ b/sdk/js/docs/classes/ResponsesClient.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / ResponsesClient +[foundry-local-sdk](../README.md) / ResponsesClient # Class: ResponsesClient diff --git a/sdk/js/docs/classes/ResponsesClientSettings.md b/sdk/js/docs/classes/ResponsesClientSettings.md index 08b9ea94..47dfc55e 100644 --- a/sdk/js/docs/classes/ResponsesClientSettings.md +++ b/sdk/js/docs/classes/ResponsesClientSettings.md @@ -1,4 +1,4 @@ -[@prathikrao/foundry-local-sdk](../README.md) / ResponsesClientSettings +[foundry-local-sdk](../README.md) / ResponsesClientSettings # Class: ResponsesClientSettings @@ -22,7 +22,7 @@ new ResponsesClientSettings(): ResponsesClientSettings; ### frequencyPenalty? ```ts -optional frequencyPenalty: number; +optional frequencyPenalty?: number; ``` *** @@ -30,7 +30,7 @@ optional frequencyPenalty: number; ### instructions? ```ts -optional instructions: string; +optional instructions?: string; ``` System-level instructions to guide the model. @@ -40,7 +40,7 @@ System-level instructions to guide the model. ### maxOutputTokens? ```ts -optional maxOutputTokens: number; +optional maxOutputTokens?: number; ``` *** @@ -48,7 +48,7 @@ optional maxOutputTokens: number; ### metadata? ```ts -optional metadata: Record; +optional metadata?: Record; ``` *** @@ -56,7 +56,7 @@ optional metadata: Record; ### parallelToolCalls? ```ts -optional parallelToolCalls: boolean; +optional parallelToolCalls?: boolean; ``` *** @@ -64,7 +64,7 @@ optional parallelToolCalls: boolean; ### presencePenalty? ```ts -optional presencePenalty: number; +optional presencePenalty?: number; ``` *** @@ -72,7 +72,7 @@ optional presencePenalty: number; ### reasoning? ```ts -optional reasoning: ReasoningConfig; +optional reasoning?: ReasoningConfig; ``` *** @@ -80,7 +80,7 @@ optional reasoning: ReasoningConfig; ### seed? ```ts -optional seed: number; +optional seed?: number; ``` *** @@ -88,7 +88,7 @@ optional seed: number; ### store? ```ts -optional store: boolean; +optional store?: boolean; ``` *** @@ -96,7 +96,7 @@ optional store: boolean; ### temperature? ```ts -optional temperature: number; +optional temperature?: number; ``` *** @@ -104,7 +104,7 @@ optional temperature: number; ### text? ```ts -optional text: TextConfig; +optional text?: TextConfig; ``` *** @@ -112,7 +112,7 @@ optional text: TextConfig; ### toolChoice? ```ts -optional toolChoice: ResponseToolChoice; +optional toolChoice?: ResponseToolChoice; ``` *** @@ -120,7 +120,7 @@ optional toolChoice: ResponseToolChoice; ### topP? ```ts -optional topP: number; +optional topP?: number; ``` *** @@ -128,5 +128,5 @@ optional topP: number; ### truncation? ```ts -optional truncation: TruncationStrategy; +optional truncation?: TruncationStrategy; ``` diff --git a/sdk/js/examples/audio-transcription.ts b/sdk/js/examples/audio-transcription.ts index 7fddf2d8..4e4fc2d4 100644 --- a/sdk/js/examples/audio-transcription.ts +++ b/sdk/js/examples/audio-transcription.ts @@ -72,9 +72,9 @@ async function main() { // Example: Streaming transcription console.log('\nTesting streaming transcription...'); - await audioClient.transcribeStreaming(audioFilePath, (chunk: any) => { + for await (const chunk of audioClient.transcribeStreaming(audioFilePath)) { process.stdout.write(chunk.text); - }); + } console.log('\n'); // Unload the model diff --git a/sdk/js/examples/chat-completion.ts b/sdk/js/examples/chat-completion.ts index 2c283e23..f18b989c 100644 --- a/sdk/js/examples/chat-completion.ts +++ b/sdk/js/examples/chat-completion.ts @@ -18,6 +18,17 @@ async function main() { }); console.log('✓ SDK initialized successfully'); + const availableEps = manager.discoverEps(); + console.log(`\nAvailable execution providers: ${availableEps.map((ep) => ep.name).join(', ')}`); + + console.log('\nDownloading and registering execution providers...'); + const downloadResult = await manager.downloadAndRegisterEps(); + if (downloadResult.success) { + console.log('✓ All execution providers registered successfully'); + } else { + console.log(`⚠️ Some execution providers failed to download and/or register: ${downloadResult.failedEps.join(', ')}`); + } + // Explore available models console.log('\nFetching available models...'); const catalog = manager.catalog; @@ -37,7 +48,7 @@ async function main() { console.log(` - ${cachedModel.alias}`); } - const modelAlias = 'MODEL_ALIAS'; // Replace with a valid model alias from the list above + const modelAlias = 'qwen2.5-0.5b'; // Load the model first console.log(`\nLoading model ${modelAlias}...`); @@ -70,15 +81,14 @@ async function main() { // Example streaming completion console.log('\nTesting streaming completion...'); - await chatClient.completeStreamingChat( - [{ role: 'user', content: 'Write a short poem about programming.' }], - (chunk) => { - const content = chunk.choices?.[0]?.message?.content; - if (content) { - process.stdout.write(content); - } + for await (const chunk of chatClient.completeStreamingChat( + [{ role: 'user', content: 'Write a short poem about programming.' }] + )) { + const content = chunk.choices?.[0]?.message?.content; + if (content) { + process.stdout.write(content); } - ); + } console.log('\n'); // Model management example diff --git a/sdk/js/examples/tool-calling.ts b/sdk/js/examples/tool-calling.ts index bb4ed541..c3640a8f 100644 --- a/sdk/js/examples/tool-calling.ts +++ b/sdk/js/examples/tool-calling.ts @@ -109,22 +109,18 @@ async function main() { let toolCallData: any = null; console.log('Chat completion response:'); - await chatClient.completeStreamingChat( - messages, - tools, - (chunk: any) => { - const content = chunk.choices?.[0]?.message?.content; - if (content) { - process.stdout.write(content); - } - - // Capture tool call data - const toolCalls = chunk.choices?.[0]?.message?.tool_calls; - if (toolCalls && toolCalls.length > 0) { - toolCallData = toolCalls[0]; - } + for await (const chunk of chatClient.completeStreamingChat(messages, tools)) { + const content = chunk.choices?.[0]?.message?.content; + if (content) { + process.stdout.write(content); + } + + // Capture tool call data + const toolCalls = chunk.choices?.[0]?.message?.tool_calls; + if (toolCalls && toolCalls.length > 0) { + toolCallData = toolCalls[0]; } - ); + } console.log('\n'); // Handle tool invocation @@ -159,16 +155,12 @@ async function main() { }; console.log('Chat completion response:'); - await chatClient.completeStreamingChat( - messages, - tools, - (chunk: any) => { - const content = chunk.choices?.[0]?.message?.content; - if (content) { - process.stdout.write(content); - } + for await (const chunk of chatClient.completeStreamingChat(messages, tools)) { + const content = chunk.choices?.[0]?.message?.content; + if (content) { + process.stdout.write(content); } - ); + } console.log('\n'); console.log('\n✓ Example completed successfully'); diff --git a/sdk/js/package.json b/sdk/js/package.json index bdfadf5e..abe390f2 100644 --- a/sdk/js/package.json +++ b/sdk/js/package.json @@ -1,19 +1,25 @@ { - "name": "@prathikrao/foundry-local-sdk", - "version": "0.0.3", + "name": "foundry-local-sdk", + "version": "1.0.0", "description": "Foundry Local JavaScript SDK", "main": "dist/index.js", "types": "dist/index.d.ts", "type": "module", "files": [ "dist", - "script" + "script/install-standard.cjs", + "script/install-winml.cjs", + "script/install-utils.cjs", + "script/pack.cjs", + "script/preinstall.cjs" ], "scripts": { "build": "tsc -p tsconfig.build.json", "docs": "typedoc", "example": "tsx examples/chat-completion.ts", - "install": "node script/install.cjs", + "install": "node script/install-standard.cjs", + "pack": "node script/pack.cjs", + "pack:winml": "node script/pack.cjs winml", "preinstall": "node script/preinstall.cjs", "test": "mocha --import=tsx test/**/*.test.ts" }, @@ -45,4 +51,4 @@ }, "author": "", "license": "ISC" -} +} \ No newline at end of file diff --git a/sdk/js/script/install-standard.cjs b/sdk/js/script/install-standard.cjs new file mode 100644 index 00000000..f56df943 --- /dev/null +++ b/sdk/js/script/install-standard.cjs @@ -0,0 +1,26 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Install script for foundry-local-sdk (standard variant). + +'use strict'; + +const os = require('os'); +const { NUGET_FEED, ORT_NIGHTLY_FEED, runInstall } = require('./install-utils.cjs'); + +const useNightly = process.env.npm_config_nightly === 'true'; + +const ARTIFACTS = [ + { name: 'Microsoft.AI.Foundry.Local.Core', version: '1.0.0-rc1', feed: ORT_NIGHTLY_FEED, nightly: useNightly }, + { name: os.platform() === 'linux' ? 'Microsoft.ML.OnnxRuntime.Gpu.Linux' : 'Microsoft.ML.OnnxRuntime.Foundry', version: '1.24.3', feed: NUGET_FEED, nightly: false }, + { name: 'Microsoft.ML.OnnxRuntimeGenAI.Foundry', version: '0.12.2', feed: NUGET_FEED, nightly: false }, +]; + +(async () => { + try { + await runInstall(ARTIFACTS); + } catch (err) { + console.error('[foundry-local] Installation failed:', err instanceof Error ? err.message : err); + process.exit(1); + } +})(); diff --git a/sdk/js/script/install-utils.cjs b/sdk/js/script/install-utils.cjs new file mode 100644 index 00000000..f9a5186c --- /dev/null +++ b/sdk/js/script/install-utils.cjs @@ -0,0 +1,193 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Shared NuGet download and extraction utilities for install scripts. + +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const https = require('https'); +const AdmZip = require('adm-zip'); + +const PLATFORM_MAP = { + 'win32-x64': 'win-x64', + 'win32-arm64': 'win-arm64', + 'linux-x64': 'linux-x64', + 'darwin-arm64': 'osx-arm64', +}; +const platformKey = `${os.platform()}-${os.arch()}`; +const RID = PLATFORM_MAP[platformKey]; +const BIN_DIR = path.join(__dirname, '..', 'packages', '@foundry-local-core', platformKey); +const EXT = os.platform() === 'win32' ? '.dll' : os.platform() === 'darwin' ? '.dylib' : '.so'; + +const REQUIRED_FILES = [ + `Microsoft.AI.Foundry.Local.Core${EXT}`, + `${os.platform() === 'win32' ? '' : 'lib'}onnxruntime${EXT}`, + `${os.platform() === 'win32' ? '' : 'lib'}onnxruntime-genai${EXT}`, +]; + +const NUGET_FEED = 'https://api.nuget.org/v3/index.json'; +const ORT_NIGHTLY_FEED = 'https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json'; + +// --- Download helpers --- + +async function downloadWithRetryAndRedirects(url, destStream = null) { + const maxRedirects = 5; + let currentUrl = url; + let redirects = 0; + + while (redirects < maxRedirects) { + const response = await new Promise((resolve, reject) => { + https.get(currentUrl, (res) => resolve(res)) + .on('error', reject); + }); + + if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) { + currentUrl = response.headers.location; + response.resume(); + redirects++; + console.log(` Following redirect to ${new URL(currentUrl).host}...`); + continue; + } + + if (response.statusCode !== 200) { + throw new Error(`Download failed with status ${response.statusCode}: ${currentUrl}`); + } + + if (destStream) { + response.pipe(destStream); + return new Promise((resolve, reject) => { + destStream.on('finish', resolve); + destStream.on('error', reject); + response.on('error', reject); + }); + } else { + let data = ''; + response.on('data', chunk => data += chunk); + return new Promise((resolve, reject) => { + response.on('end', () => resolve(data)); + response.on('error', reject); + }); + } + } + throw new Error('Too many redirects'); +} + +async function downloadJson(url) { + return JSON.parse(await downloadWithRetryAndRedirects(url)); +} + +async function downloadFile(url, dest) { + const file = fs.createWriteStream(dest); + try { + await downloadWithRetryAndRedirects(url, file); + file.close(); + } catch (e) { + file.close(); + if (fs.existsSync(dest)) fs.unlinkSync(dest); + throw e; + } +} + +const serviceIndexCache = new Map(); + +async function getBaseAddress(feedUrl) { + if (!serviceIndexCache.has(feedUrl)) { + serviceIndexCache.set(feedUrl, await downloadJson(feedUrl)); + } + const resources = serviceIndexCache.get(feedUrl).resources || []; + const res = resources.find(r => r['@type'] && r['@type'].startsWith('PackageBaseAddress/3.0.0')); + if (!res) throw new Error('Could not find PackageBaseAddress/3.0.0 in NuGet feed.'); + const baseAddress = res['@id']; + return baseAddress.endsWith('/') ? baseAddress : baseAddress + '/'; +} + +async function resolveLatestVersion(feedUrl, packageName) { + const baseAddress = await getBaseAddress(feedUrl); + const versionsUrl = `${baseAddress}${packageName.toLowerCase()}/index.json`; + const versionData = await downloadJson(versionsUrl); + const versions = versionData.versions || []; + if (versions.length === 0) throw new Error(`No versions found for ${packageName}`); + versions.sort((a, b) => b.localeCompare(a)); + console.log(`[foundry-local] Latest version of ${packageName}: ${versions[0]}`); + return versions[0]; +} + +async function installPackage(artifact, tempDir) { + const pkgName = artifact.name; + let pkgVer = artifact.version; + if (artifact.nightly) { + console.log(` Resolving latest version for ${pkgName}...`); + pkgVer = await resolveLatestVersion(artifact.feed, pkgName); + } + + const baseAddress = await getBaseAddress(artifact.feed); + const nameLower = pkgName.toLowerCase(); + const verLower = pkgVer.toLowerCase(); + const downloadUrl = `${baseAddress}${nameLower}/${verLower}/${nameLower}.${verLower}.nupkg`; + + const nupkgPath = path.join(tempDir, `${pkgName}.${pkgVer}.nupkg`); + console.log(` Downloading ${pkgName} ${pkgVer}...`); + await downloadFile(downloadUrl, nupkgPath); + + console.log(` Extracting...`); + const zip = new AdmZip(nupkgPath); + const targetPathPrefix = `runtimes/${RID}/native/`.toLowerCase(); + const entries = zip.getEntries().filter(e => { + const p = e.entryName.toLowerCase(); + return p.includes(targetPathPrefix) && p.endsWith(EXT); + }); + + if (entries.length > 0) { + entries.forEach(entry => { + zip.extractEntryTo(entry, BIN_DIR, false, true); + console.log(` Extracted ${entry.name}`); + }); + } else { + console.warn(` No files found for RID ${RID} in ${pkgName}.`); + } + + // Update platform package.json version for Core packages + if (pkgName.startsWith('Microsoft.AI.Foundry.Local.Core')) { + const pkgJsonPath = path.join(BIN_DIR, 'package.json'); + if (fs.existsSync(pkgJsonPath)) { + const pkgJson = JSON.parse(fs.readFileSync(pkgJsonPath, 'utf8')); + pkgJson.version = pkgVer; + fs.writeFileSync(pkgJsonPath, JSON.stringify(pkgJson, null, 2)); + } + } +} + +async function runInstall(artifacts) { + if (!RID) { + console.warn(`[foundry-local] Unsupported platform: ${platformKey}. Skipping.`); + return; + } + + if (fs.existsSync(BIN_DIR) && REQUIRED_FILES.every(f => fs.existsSync(path.join(BIN_DIR, f)))) { + if (process.env.npm_config_nightly === 'true') { + console.log(`[foundry-local] Nightly requested. Forcing reinstall...`); + fs.rmSync(BIN_DIR, { recursive: true, force: true }); + } else { + console.log(`[foundry-local] Native libraries already installed.`); + return; + } + } + + console.log(`[foundry-local] Installing native libraries for ${RID}...`); + fs.mkdirSync(BIN_DIR, { recursive: true }); + + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'foundry-install-')); + try { + for (const artifact of artifacts) { + await installPackage(artifact, tempDir); + } + console.log('[foundry-local] Installation complete.'); + } finally { + try { fs.rmSync(tempDir, { recursive: true, force: true }); } catch {} + } +} + +module.exports = { NUGET_FEED, ORT_NIGHTLY_FEED, runInstall }; diff --git a/sdk/js/script/install-winml.cjs b/sdk/js/script/install-winml.cjs new file mode 100644 index 00000000..aa5e3d22 --- /dev/null +++ b/sdk/js/script/install-winml.cjs @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Install script for foundry-local-sdk-winml variant. + +'use strict'; + +const { NUGET_FEED, ORT_NIGHTLY_FEED, runInstall } = require('./install-utils.cjs'); + +const useNightly = process.env.npm_config_nightly === 'true'; + +const ARTIFACTS = [ + { name: 'Microsoft.AI.Foundry.Local.Core.WinML', version: '1.0.0-rc1', feed: ORT_NIGHTLY_FEED, nightly: useNightly }, + { name: 'Microsoft.ML.OnnxRuntime.Foundry', version: '1.23.2.3', feed: NUGET_FEED, nightly: false }, + { name: 'Microsoft.ML.OnnxRuntimeGenAI.WinML', version: '0.12.2', feed: NUGET_FEED, nightly: false }, +]; + +(async () => { + try { + await runInstall(ARTIFACTS); + } catch (err) { + console.error('Failed to install WinML artifacts:', err); + process.exit(1); + } +})(); diff --git a/sdk/js/script/install.cjs b/sdk/js/script/install.cjs deleted file mode 100644 index 3db771b8..00000000 --- a/sdk/js/script/install.cjs +++ /dev/null @@ -1,353 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -// Adapted from onnxruntime\js\node\script\install-utils.js -// The file in packages/ are the original source of truth that we are downloading and "installing" into our project's source tree. -// The file in node_modules/... is a symlink created by NPM to mark them as dependencies of the overall package. - -'use strict'; - -const fs = require('fs'); -const path = require('path'); -const os = require('os'); -const https = require('https'); -const AdmZip = require('adm-zip'); - -// Determine platform -const PLATFORM_MAP = { - 'win32-x64': 'win-x64', - 'win32-arm64': 'win-arm64', - 'linux-x64': 'linux-x64', - 'darwin-arm64': 'osx-arm64', -}; -const platformKey = `${os.platform()}-${os.arch()}`; -const RID = PLATFORM_MAP[platformKey]; - -if (!RID) { - console.warn(`[foundry-local] Unsupported platform: ${platformKey}. Skipping native library installation.`); - process.exit(0); -} - -// Write to the source 'packages' directory so binaries persist and link correctly via package.json -const BIN_DIR = path.join(__dirname, '..', 'packages', '@foundry-local-core', platformKey); -const REQUIRED_FILES = [ - 'Microsoft.AI.Foundry.Local.Core.dll', - 'onnxruntime.dll', - 'onnxruntime-genai.dll', -].map(f => f.replace('.dll', os.platform() === 'win32' ? '.dll' : os.platform() === 'darwin' ? '.dylib' : '.so')); - -// When you run npm install --winml, npm does not pass --winml as a command-line argument to your script. -// Instead, it sets an environment variable named npm_config_winml to 'true'. -const useWinML = process.env.npm_config_winml === 'true'; -const useNightly = process.env.npm_config_nightly === 'true'; - -console.log(`[foundry-local] WinML enabled: ${useWinML}`); -console.log(`[foundry-local] Nightly enabled: ${useNightly}`); - -const NUGET_FEED = 'https://api.nuget.org/v3/index.json'; -const ORT_FEED = 'https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT/nuget/v3/index.json'; -const ORT_NIGHTLY_FEED = 'https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json'; - -// If nightly is requested, pull Core/GenAI from the ORT-Nightly feed where nightly builds are published. -// Otherwise use the standard NuGet.org feed. -const CORE_FEED = useNightly ? ORT_NIGHTLY_FEED : NUGET_FEED; - -const FOUNDRY_LOCAL_CORE_ARTIFACT = { - name: 'Microsoft.AI.Foundry.Local.Core', - version: '0.9.0.8-rc3', - feed: ORT_NIGHTLY_FEED, - nightly: useNightly -} - -const FOUNDRY_LOCAL_CORE_WINML_ARTIFACT = { - name: 'Microsoft.AI.Foundry.Local.Core.WinML', - version: '0.9.0.8-rc3', - feed: ORT_NIGHTLY_FEED, - nightly: useNightly -} - -const ONNX_RUNTIME_FOUNDRY_ARTIFACT = { - name: 'Microsoft.ML.OnnxRuntime.Foundry', - version: '1.24.3', - feed: NUGET_FEED, - nightly: false -} - -const ONNX_RUNTIME_WINML_ARTIFACT = { - name: 'Microsoft.ML.OnnxRuntime.Foundry', - version: '1.23.2.3', - feed: NUGET_FEED, - nightly: false -} - -const ONNX_RUNTIME_LINUX_ARTIFACT = { - name: 'Microsoft.ML.OnnxRuntime.Gpu.Linux', - version: '1.24.3', - feed: NUGET_FEED, - nightly: false -} - -const ONNX_RUNTIME_GENAI_FOUNDRY_ARTIFACT = { - name: 'Microsoft.ML.OnnxRuntimeGenAI.Foundry', - version: '0.12.2', - feed: NUGET_FEED, - nightly: false -} - -const ONNX_RUNTIME_GENAI_WINML_ARTIFACT = { - name: 'Microsoft.ML.OnnxRuntimeGenAI.WinML', - version: '0.12.2', - feed: NUGET_FEED, - nightly: false -} - -const WINML_ARTIFACTS = [ - FOUNDRY_LOCAL_CORE_WINML_ARTIFACT, - ONNX_RUNTIME_WINML_ARTIFACT, - ONNX_RUNTIME_GENAI_WINML_ARTIFACT -]; - -const NON_WINML_ARTIFACTS = [ - FOUNDRY_LOCAL_CORE_ARTIFACT, - ONNX_RUNTIME_FOUNDRY_ARTIFACT, - ONNX_RUNTIME_GENAI_FOUNDRY_ARTIFACT -]; - -const LINUX_ARTIFACTS = [ - FOUNDRY_LOCAL_CORE_ARTIFACT, - ONNX_RUNTIME_LINUX_ARTIFACT, - ONNX_RUNTIME_GENAI_FOUNDRY_ARTIFACT -]; - -let ARTIFACTS = []; -if (useWinML) { - console.log(`[foundry-local] Using WinML artifacts...`); - ARTIFACTS = WINML_ARTIFACTS; -} else if (os.platform() === 'linux') { - console.log(`[foundry-local] Using Linux GPU artifacts...`); - ARTIFACTS = LINUX_ARTIFACTS; -} else { - console.log(`[foundry-local] Using standard artifacts...`); - ARTIFACTS = NON_WINML_ARTIFACTS; -} - -// Check if already installed -if (fs.existsSync(BIN_DIR) && REQUIRED_FILES.every(f => fs.existsSync(path.join(BIN_DIR, f)))) { - if (useNightly) { - console.log(`[foundry-local] Nightly requested. Forcing reinstall...`); - fs.rmSync(BIN_DIR, { recursive: true, force: true }); - } else { - console.log(`[foundry-local] Native libraries already installed.`); - process.exit(0); - } -} - -console.log(`[foundry-local] Installing native libraries for ${RID}...`); -fs.mkdirSync(BIN_DIR, { recursive: true }); - -async function downloadWithRetryAndRedirects(url, destStream = null) { - const maxRedirects = 5; - let currentUrl = url; - let redirects = 0; - - while (redirects < maxRedirects) { - const response = await new Promise((resolve, reject) => { - https.get(currentUrl, (res) => resolve(res)) - .on('error', reject); - }); - - // When you request a file from api.nuget.org, it rarely serves the file directly. - // Instead, it usually responds with a 302 Found or 307 Temporary Redirect pointing to a Content Delivery Network (CDN) - // or a specific Storage Account where the actual file lives. Node.js treats a redirect as a completed request so we - // need to explicitly handle it here. - if (response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) { - currentUrl = response.headers.location; - response.resume(); // Consume/discard response data to free up socket - redirects++; - console.log(` Following redirect to ${new URL(currentUrl).host}...`); - continue; - } - - if (response.statusCode !== 200) { - throw new Error(`Download failed with status ${response.statusCode}: ${currentUrl}`); - } - - // destStream is null when the function is used to download JSON data (like NuGet feed index or package metadata) rather than a file - if (destStream) { - response.pipe(destStream); - return new Promise((resolve, reject) => { - destStream.on('finish', resolve); - destStream.on('error', reject); - response.on('error', reject); - }); - } else { - let data = ''; - response.on('data', chunk => data += chunk); - return new Promise((resolve, reject) => { - response.on('end', () => resolve(data)); - response.on('error', reject); - }); - } - } - throw new Error('Too many redirects'); -} - -async function downloadJson(url) { - const data = await downloadWithRetryAndRedirects(url); - return JSON.parse(data); -} - -async function downloadFile(url, dest) { - const file = fs.createWriteStream(dest); - try { - await downloadWithRetryAndRedirects(url, file); - file.close(); - } catch (e) { - file.close(); - if (fs.existsSync(dest)) fs.unlinkSync(dest); - throw e; - } -} - - -// Map to cache service index resources -const serviceIndexCache = new Map(); - -async function getBaseAddress(feedUrl) { - // 1. Get Service Index - if (!serviceIndexCache.has(feedUrl)) { - const index = await downloadJson(feedUrl); - serviceIndexCache.set(feedUrl, index); - } - - const serviceIndex = serviceIndexCache.get(feedUrl); - - // 2. Find PackageBaseAddress/3.0.0 - const resources = serviceIndex.resources || []; - const baseAddressRes = resources.find(r => r['@type'] && r['@type'].startsWith('PackageBaseAddress/3.0.0')); - - if (!baseAddressRes) { - throw new Error('Could not find PackageBaseAddress/3.0.0 in NuGet feed.'); - } - - const baseAddress = baseAddressRes['@id']; - // Ensure trailing slash - return baseAddress.endsWith('/') ? baseAddress : baseAddress + '/'; -} - -async function resolveLatestVersion(feedUrl, packageName) { - const baseAddress = await getBaseAddress(feedUrl); - const nameLower = packageName.toLowerCase(); - - // Fetch version list: {baseAddress}/{lower_id}/index.json - const versionsUrl = `${baseAddress}${nameLower}/index.json`; - try { - const versionData = await downloadJson(versionsUrl); - const versions = versionData.versions || []; - - if (versions.length === 0) { - throw new Error('No versions found'); - } - - // Sort descending to prioritize latest date-based versions (e.g. 0.9.0-dev.YYYYMMDD...) - versions.sort((a, b) => b.localeCompare(a)); - - const latestVersion = versions[0]; - console.log(`[foundry-local] Installing latest version of Foundry Local Core: ${latestVersion}`); - return latestVersion; - } catch (e) { - throw new Error(`Failed to fetch versions for ${packageName} from ${versionsUrl}: ${e.message}`); - } -} - -async function resolvePackageRawUrl(feedUrl, packageName, version) { - const properBase = await getBaseAddress(feedUrl); - - // 3. Construct .nupkg URL (lowercase is standard for V3) - const nameLower = packageName.toLowerCase(); - const verLower = version.toLowerCase(); - - return `${properBase}${nameLower}/${verLower}/${nameLower}.${verLower}.nupkg`; -} - -async function installPackage(artifact, tempDir) { - const pkgName = artifact.name; - const feedUrl = artifact.feed; - - // Resolve version if not specified - let pkgVer = artifact.version; - let isNightly = artifact.nightly; - if (isNightly) { - console.log(` Resolving latest version for ${pkgName}...`); - pkgVer = await resolveLatestVersion(feedUrl, pkgName); - } - - console.log(` Resolving ${pkgName} ${pkgVer}...`); - const downloadUrl = await resolvePackageRawUrl(feedUrl, pkgName, pkgVer); - - const nupkgPath = path.join(tempDir, `${pkgName}.${pkgVer}.nupkg`); - - console.log(` Downloading ${downloadUrl}...`); - await downloadFile(downloadUrl, nupkgPath); - - console.log(` Extracting...`); - const zip = new AdmZip(nupkgPath); - const zipEntries = zip.getEntries(); - - // Pattern: runtimes/{RID}/native/{file}.{ext} - const ext = os.platform() === 'win32' ? '.dll' : os.platform() === 'darwin' ? '.dylib' : '.so'; - const targetPathPrefix = `runtimes/${RID}/native/`.toLowerCase(); - - let found = false; - - console.log(` Scanning for all ${ext} files in ${targetPathPrefix}...`); - const entries = zipEntries.filter(e => { - const entryPathLower = e.entryName.toLowerCase(); - return entryPathLower.includes(targetPathPrefix) && entryPathLower.endsWith(ext); - }); - - if (entries.length > 0) { - entries.forEach(entry => { - console.log(` Found ${entry.entryName}`); - zip.extractEntryTo(entry, BIN_DIR, false, true); - console.log(` Extracted ${entry.name}`); - }); - found = true; - } else { - console.warn(` ⚠ No files found for RID ${RID} in package.`); - } - - // After extracting, update the packages/@foundry-local-core/RID/package.json version to match the downloaded artifact - if (found && pkgName.startsWith('Microsoft.AI.Foundry.Local.Core')) { - const pkgJsonPath = path.join(BIN_DIR, 'package.json'); - try { - if (fs.existsSync(pkgJsonPath)) { - const pkgJson = JSON.parse(fs.readFileSync(pkgJsonPath, 'utf8')); - pkgJson.version = pkgVer; - fs.writeFileSync(pkgJsonPath, JSON.stringify(pkgJson, null, 2)); - console.log(` Updated package.json version to ${pkgVer}`); - } - } catch (e) { - console.warn(` Failed to update package.json version: ${e.message}`); - } - } -} - -async function main() { - const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'foundry-install-')); - try { - for (const artifact of ARTIFACTS) { - await installPackage(artifact, tempDir); - } - console.log('[foundry-local] ✓ Installation complete.'); - } catch (e) { - console.error(`[foundry-local] Installation failed: ${e.message}`); - process.exit(1); - } finally { - try { - fs.rmSync(tempDir, { recursive: true, force: true }); - } catch {} - } -} - -main(); diff --git a/sdk/js/script/pack.cjs b/sdk/js/script/pack.cjs new file mode 100644 index 00000000..32057c7e --- /dev/null +++ b/sdk/js/script/pack.cjs @@ -0,0 +1,32 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Usage: +// node script/pack.cjs -> foundry-local-sdk-.tgz +// node script/pack.cjs winml -> foundry-local-sdk-winml-.tgz + +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const { execSync } = require('child_process'); + +const pkgPath = path.join(__dirname, '..', 'package.json'); +const original = fs.readFileSync(pkgPath, 'utf8'); +const isWinML = process.argv[2] === 'winml'; + +try { + const pkg = JSON.parse(original); + if (isWinML) { + pkg.name = 'foundry-local-sdk-winml'; + pkg.scripts.install = 'node script/install-winml.cjs'; + pkg.files = ['dist', 'script/install-winml.cjs', 'script/install-utils.cjs', 'script/preinstall.cjs']; + } else { + pkg.files = ['dist', 'script/install-standard.cjs', 'script/install-utils.cjs', 'script/preinstall.cjs']; + } + fs.writeFileSync(pkgPath, JSON.stringify(pkg, null, 2)); + execSync('npm pack', { cwd: path.join(__dirname, '..'), stdio: 'inherit' }); +} finally { + // Always restore original package.json + fs.writeFileSync(pkgPath, original); +} diff --git a/sdk/js/src/catalog.ts b/sdk/js/src/catalog.ts index bf2ae5c9..d4331c38 100644 --- a/sdk/js/src/catalog.ts +++ b/sdk/js/src/catalog.ts @@ -1,8 +1,9 @@ import { CoreInterop } from './detail/coreInterop.js'; import { ModelLoadManager } from './detail/modelLoadManager.js'; -import { Model } from './model.js'; -import { ModelVariant } from './modelVariant.js'; +import { Model } from './detail/model.js'; +import { ModelVariant } from './detail/modelVariant.js'; import { ModelInfo } from './types.js'; +import { IModel } from './imodel.js'; /** * Represents a catalog of AI models available in the system. @@ -31,6 +32,11 @@ export class Catalog { return this._name; } + /** @internal */ + invalidateCache(): void { + this.lastFetch = 0; + } + private async updateModels(): Promise { // TODO: make this configurable if ((Date.now() - this.lastFetch) < 6 * 60 * 60 * 1000) { // 6 hours @@ -71,9 +77,9 @@ export class Catalog { /** * Lists all available models in the catalog. * This method is asynchronous as it may fetch the model list from a remote service or perform file I/O. - * @returns A Promise that resolves to an array of Model objects. + * @returns A Promise that resolves to an array of IModel objects. */ - public async getModels(): Promise { + public async getModels(): Promise { await this.updateModels(); return this._models; } @@ -82,10 +88,10 @@ export class Catalog { * Retrieves a model by its alias. * This method is asynchronous as it may ensure the catalog is up-to-date by fetching from a remote service. * @param alias - The alias of the model to retrieve. - * @returns A Promise that resolves to the Model object if found, otherwise throws an error. + * @returns A Promise that resolves to the IModel object if found, otherwise throws an error. * @throws Error - If alias is null, undefined, or empty. */ - public async getModel(alias: string): Promise { + public async getModel(alias: string): Promise { if (typeof alias !== 'string' || alias.trim() === '') { throw new Error('Model alias must be a non-empty string.'); } @@ -100,12 +106,14 @@ export class Catalog { /** * Retrieves a specific model variant by its ID. + * NOTE: This will return an IModel with a single variant. Use getModel to get an IModel with all available + * variants. * This method is asynchronous as it may ensure the catalog is up-to-date by fetching from a remote service. * @param modelId - The unique identifier of the model variant. - * @returns A Promise that resolves to the ModelVariant object if found, otherwise throws an error. + * @returns A Promise that resolves to the IModel object if found, otherwise throws an error. * @throws Error - If modelId is null, undefined, or empty. */ - public async getModelVariant(modelId: string): Promise { + public async getModelVariant(modelId: string): Promise { if (typeof modelId !== 'string' || modelId.trim() === '') { throw new Error('Model ID must be a non-empty string.'); } @@ -121,9 +129,9 @@ export class Catalog { /** * Retrieves a list of all locally cached model variants. * This method is asynchronous as it may involve file I/O or querying the underlying core. - * @returns A Promise that resolves to an array of cached ModelVariant objects. + * @returns A Promise that resolves to an array of cached IModel objects. */ - public async getCachedModels(): Promise { + public async getCachedModels(): Promise { await this.updateModels(); const cachedModelListJson = this.coreInterop.executeCommand("get_cached_models"); let cachedModelIds: string[] = []; @@ -132,7 +140,7 @@ export class Catalog { } catch (error) { throw new Error(`Failed to parse cached model list JSON: ${error}`); } - const cachedModels: Set = new Set(); + const cachedModels: Set = new Set(); for (const modelId of cachedModelIds) { const variant = this.modelIdToModelVariant.get(modelId); @@ -147,9 +155,9 @@ export class Catalog { * Retrieves a list of all currently loaded model variants. * This operation is asynchronous because checking the loaded status may involve querying * the underlying core or an external service, which can be an I/O bound operation. - * @returns A Promise that resolves to an array of loaded ModelVariant objects. + * @returns A Promise that resolves to an array of loaded IModel objects. */ - public async getLoadedModels(): Promise { + public async getLoadedModels(): Promise { await this.updateModels(); let loadedModelIds: string[] = []; try { @@ -157,7 +165,7 @@ export class Catalog { } catch (error) { throw new Error(`Failed to list loaded models: ${error}`); } - const loadedModels: ModelVariant[] = []; + const loadedModels: IModel[] = []; for (const modelId of loadedModelIds) { const variant = this.modelIdToModelVariant.get(modelId); @@ -167,4 +175,33 @@ export class Catalog { } return loadedModels; } + + /** + * Get the latest version of a model. + * This is used to check if a newer version of a model is available in the catalog for download. + * @param modelOrModelVariant - The model to check for the latest version. + * @returns The latest version of the model. Will match the input if it is the latest version. + */ + public async getLatestVersion(modelOrModelVariant: IModel): Promise { + await this.updateModels(); + + // Resolve to the parent Model by alias + const model = this.modelAliasToModel.get(modelOrModelVariant.alias); + if (!model) { + throw new Error(`Model with alias '${modelOrModelVariant.alias}' not found in catalog.`); + } + + // variants are sorted by version, so the first one matching the name is the latest version + const latest = model.variants.find(v => v.info.name === modelOrModelVariant.info.name); + if (!latest) { + throw new Error( + `Internal error. Mismatch between model (alias:${model.alias}) and ` + + `model variant (alias:${modelOrModelVariant.alias}).` + ); + } + + // if input was the latest return the input (could be model or model variant) + // otherwise return the latest model variant + return latest.id === modelOrModelVariant.id ? modelOrModelVariant : latest; + } } \ No newline at end of file diff --git a/sdk/js/src/detail/coreInterop.ts b/sdk/js/src/detail/coreInterop.ts index 167784e7..b4cbf36c 100644 --- a/sdk/js/src/detail/coreInterop.ts +++ b/sdk/js/src/detail/coreInterop.ts @@ -129,7 +129,7 @@ export class CoreInterop { } } - public executeCommandStreaming(command: string, params: any, callback: (chunk: string) => void): Promise { + public executeCommandStreaming(command: string, params: any, callback: (chunk: string) => void): Promise { const cmdBuf = koffi.alloc('char', command.length + 1); koffi.encode(cmdBuf, 'char', command, command.length + 1); @@ -143,7 +143,7 @@ export class CoreInterop { callback(chunk); }, koffi.pointer(CallbackType)); - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { const req = { Command: koffi.address(cmdBuf), CommandLength: command.length, @@ -167,7 +167,8 @@ export class CoreInterop { const errorMsg = koffi.decode(res.Error, 'char', res.ErrorLength); reject(new Error(`Command '${command}' failed: ${errorMsg}`)); } else { - resolve(); + const responseData = res.Data ? koffi.decode(res.Data, 'char', res.DataLength) : ''; + resolve(responseData); } } finally { // Free the heap-allocated response strings using koffi.free() diff --git a/sdk/js/src/model.ts b/sdk/js/src/detail/model.ts similarity index 74% rename from sdk/js/src/model.ts rename to sdk/js/src/detail/model.ts index e2b37119..77af2cb6 100644 --- a/sdk/js/src/model.ts +++ b/sdk/js/src/detail/model.ts @@ -1,8 +1,9 @@ import { ModelVariant } from './modelVariant.js'; -import { ChatClient } from './openai/chatClient.js'; -import { AudioClient } from './openai/audioClient.js'; -import { ResponsesClient } from './openai/responsesClient.js'; -import { IModel } from './imodel.js'; +import { ChatClient } from '../openai/chatClient.js'; +import { AudioClient } from '../openai/audioClient.js'; +import { ResponsesClient } from '../openai/responsesClient.js'; +import { IModel } from '../imodel.js'; +import { ModelInfo } from '../types.js'; /** * Represents a high-level AI model that may have multiple variants (e.g., quantized versions, different formats). @@ -20,25 +21,14 @@ export class Model implements IModel { this.selectedVariant = variant; } - private validateVariantInput(variant: ModelVariant, caller: string): void { - if (variant === null || variant === undefined) { - throw new Error(`${caller}() requires a ModelVariant object but received ${variant}.`); - } - if (typeof variant !== 'object') { - throw new Error( - `${caller}() requires a ModelVariant object but received ${typeof variant}.` - ); - } - } - /** * Adds a new variant to this model. * Automatically selects the new variant if it is cached and the current one is not. * @param variant - The model variant to add. - * @throws Error - If the argument is not a ModelVariant object, or if the variant's alias does not match the model's alias. + * @throws Error - If the variant's alias does not match the model's alias. + * @internal */ public addVariant(variant: ModelVariant): void { - this.validateVariantInput(variant, 'addVariant'); if (!variant || variant.alias !== this._alias) { throw new Error(`Variant alias "${variant?.alias}" does not match model alias "${this._alias}".`); } @@ -52,14 +42,13 @@ export class Model implements IModel { /** * Selects a specific variant. - * @param variant - The model variant to select. - * @throws Error - If the argument is not a ModelVariant object, or if the variant does not belong to this model. + * @param variant - The model variant to select. Must be one of the variants in `variants`. + * @throws Error - If the variant does not belong to this model. */ - public selectVariant(variant: ModelVariant): void { - this.validateVariantInput(variant, 'selectVariant'); + public selectVariant(variant: IModel): void { const matchingVariant = this._variants.find(v => v.id === variant.id); if (!variant.id || !matchingVariant) { - throw new Error(`Model variant with ID ${variant.id} does not belong to model "${this._alias}".`); + throw new Error(`Input variant was not found in Variants.`); } this.selectedVariant = matchingVariant; } @@ -80,6 +69,14 @@ export class Model implements IModel { return this._alias; } + /** + * Gets the ModelInfo of the currently selected variant. + * @returns The ModelInfo object. + */ + public get info(): ModelInfo { + return this.selectedVariant.info; + } + /** * Checks if the currently selected variant is cached locally. * @returns True if cached, false otherwise. @@ -98,12 +95,32 @@ export class Model implements IModel { /** * Gets all available variants for this model. - * @returns An array of ModelVariant objects. + * @returns An array of IModel objects. */ - public get variants(): ModelVariant[] { + public get variants(): IModel[] { return this._variants; } + public get contextLength(): number | null { + return this.selectedVariant.contextLength; + } + + public get inputModalities(): string | null { + return this.selectedVariant.inputModalities; + } + + public get outputModalities(): string | null { + return this.selectedVariant.outputModalities; + } + + public get capabilities(): string | null { + return this.selectedVariant.capabilities; + } + + public get supportsToolCalling(): boolean | null { + return this.selectedVariant.supportsToolCalling; + } + /** * Downloads the currently selected variant. * @param progressCallback - Optional callback to report download progress. diff --git a/sdk/js/src/modelVariant.ts b/sdk/js/src/detail/modelVariant.ts similarity index 73% rename from sdk/js/src/modelVariant.ts rename to sdk/js/src/detail/modelVariant.ts index 4d3e2bee..5d50696b 100644 --- a/sdk/js/src/modelVariant.ts +++ b/sdk/js/src/detail/modelVariant.ts @@ -1,14 +1,15 @@ -import { CoreInterop } from './detail/coreInterop.js'; -import { ModelLoadManager } from './detail/modelLoadManager.js'; -import { ModelInfo } from './types.js'; -import { ChatClient } from './openai/chatClient.js'; -import { AudioClient } from './openai/audioClient.js'; -import { ResponsesClient } from './openai/responsesClient.js'; -import { IModel } from './imodel.js'; +import { CoreInterop } from './coreInterop.js'; +import { ModelLoadManager } from './modelLoadManager.js'; +import { ModelInfo } from '../types.js'; +import { ChatClient } from '../openai/chatClient.js'; +import { AudioClient } from '../openai/audioClient.js'; +import { ResponsesClient } from '../openai/responsesClient.js'; +import { IModel } from '../imodel.js'; /** * Represents a specific variant of a model (e.g., a specific quantization or format). * Contains the low-level implementation for interacting with the model. + * @internal */ export class ModelVariant implements IModel { private _modelInfo: ModelInfo; @@ -41,10 +42,49 @@ export class ModelVariant implements IModel { * Gets the detailed information about the model variant. * @returns The ModelInfo object. */ - public get modelInfo(): ModelInfo { + public get info(): ModelInfo { return this._modelInfo; } + /** + * A ModelVariant is a single variant, so variants returns itself. + */ + public get variants(): IModel[] { + return [this]; + } + + /** + * SelectVariant is not supported on a ModelVariant. + * Call Catalog.getModel() to get an IModel with all variants available. + * @throws Error always. + */ + public selectVariant(_variant: IModel): void { + throw new Error( + `selectVariant is not supported on a ModelVariant. ` + + `Call Catalog.getModel("${this.alias}") to get an IModel with all variants available.` + ); + } + + public get contextLength(): number | null { + return this._modelInfo.contextLength ?? null; + } + + public get inputModalities(): string | null { + return this._modelInfo.inputModalities ?? null; + } + + public get outputModalities(): string | null { + return this._modelInfo.outputModalities ?? null; + } + + public get capabilities(): string | null { + return this._modelInfo.capabilities ?? null; + } + + public get supportsToolCalling(): boolean | null { + return this._modelInfo.supportsToolCalling ?? null; + } + /** * Checks if the model variant is cached locally. * @returns True if cached, false otherwise. diff --git a/sdk/js/src/foundryLocalManager.ts b/sdk/js/src/foundryLocalManager.ts index bc408f78..f22acdc0 100644 --- a/sdk/js/src/foundryLocalManager.ts +++ b/sdk/js/src/foundryLocalManager.ts @@ -3,6 +3,7 @@ import { CoreInterop } from './detail/coreInterop.js'; import { ModelLoadManager } from './detail/modelLoadManager.js'; import { Catalog } from './catalog.js'; import { ResponsesClient } from './openai/responsesClient.js'; +import { EpInfo, EpDownloadResult } from './types.js'; /** * The main entry point for the Foundry Local SDK. @@ -61,6 +62,7 @@ export class FoundryLocalManager { return this._urls; } + /** * Starts the local web service. * Use the `urls` property to retrieve the bound addresses after the service has started. @@ -94,6 +96,122 @@ export class FoundryLocalManager { return this._urls.length > 0; } + /** + * Discovers available execution providers (EPs) and their registration status. + * @returns An array of EpInfo describing each available EP. + */ + public discoverEps(): EpInfo[] { + const response = this.coreInterop.executeCommand("discover_eps"); + type RawEpInfo = { + Name: string; + IsRegistered: boolean; + }; + + try { + const raw = JSON.parse(response) as RawEpInfo[]; + return raw.map((ep) => ({ + name: ep.Name, + isRegistered: ep.IsRegistered + })); + } catch (error) { + throw new Error(`Failed to decode JSON response from discover_eps: ${error}. Response was: ${response}`); + } + } + + /** + * Downloads and registers execution providers. + * @returns A promise that resolves with an EpDownloadResult describing the outcome. + */ + public downloadAndRegisterEps(): Promise; + /** + * Downloads and registers execution providers. + * @param names - Array of EP names to download. + * @returns A promise that resolves with an EpDownloadResult describing the outcome. + */ + public downloadAndRegisterEps(names: string[]): Promise; + /** + * Downloads and registers execution providers, reporting progress. + * @param progressCallback - Callback invoked with (epName, percent) as each EP downloads. Percent is 0-100. + * @returns A promise that resolves with an EpDownloadResult describing the outcome. + */ + public downloadAndRegisterEps(progressCallback: (epName: string, percent: number) => void): Promise; + /** + * Downloads and registers execution providers, reporting progress. + * @param names - Array of EP names to download. + * @param progressCallback - Callback invoked with (epName, percent) as each EP downloads. Percent is 0-100. + * @returns A promise that resolves with an EpDownloadResult describing the outcome. + */ + public downloadAndRegisterEps(names: string[], progressCallback: (epName: string, percent: number) => void): Promise; + public async downloadAndRegisterEps( + namesOrCallback?: string[] | ((epName: string, percent: number) => void), + progressCallback?: (epName: string, percent: number) => void + ): Promise { + let names: string[] | undefined; + if (typeof namesOrCallback === 'function') { + progressCallback = namesOrCallback; + } else { + names = namesOrCallback; + } + + const params: { Params?: { Names: string } } = {}; + if (names && names.length > 0) { + params.Params = { Names: names.join(",") }; + } + + type RawEpDownloadResult = { + Success: boolean; + Status: string; + RegisteredEps: string[]; + FailedEps: string[]; + }; + + let response: string; + + if (progressCallback) { + response = await this.coreInterop.executeCommandStreaming( + "download_and_register_eps", + Object.keys(params).length > 0 ? params : undefined, + (chunk: string) => { + const sepIndex = chunk.indexOf('|'); + if (sepIndex >= 0) { + const epName = chunk.substring(0, sepIndex); + const percent = parseFloat(chunk.substring(sepIndex + 1)); + if (!isNaN(percent)) { + progressCallback(epName || '', percent); + } + } + } + ); + } else { + response = await this.coreInterop.executeCommandStreaming( + "download_and_register_eps", + Object.keys(params).length > 0 ? params : undefined, + () => {} // no-op callback + ); + } + + let epResult: EpDownloadResult; + try { + const raw = JSON.parse(response) as RawEpDownloadResult; + epResult = { + success: raw.Success, + status: raw.Status, + registeredEps: raw.RegisteredEps, + failedEps: raw.FailedEps + }; + } catch (error) { + throw new Error(`Failed to decode JSON response from download_and_register_eps: ${error}. Response was: ${response}`); + } + + // Invalidate the catalog cache if any EP was newly registered so the next access + // re-fetches models with the updated set of available EPs. + if (epResult.success || epResult.registeredEps.length > 0) { + this._catalog.invalidateCache(); + } + + return epResult; + } + /** * Creates a ResponsesClient for interacting with the Responses API. * The web service must be started first via `startWebService()`. diff --git a/sdk/js/src/imodel.ts b/sdk/js/src/imodel.ts index be0913d6..9243a0a0 100644 --- a/sdk/js/src/imodel.ts +++ b/sdk/js/src/imodel.ts @@ -1,13 +1,21 @@ import { ChatClient } from './openai/chatClient.js'; import { AudioClient } from './openai/audioClient.js'; import { ResponsesClient } from './openai/responsesClient.js'; +import { ModelInfo } from './types.js'; export interface IModel { get id(): string; get alias(): string; + get info(): ModelInfo; get isCached(): boolean; isLoaded(): Promise; + get contextLength(): number | null; + get inputModalities(): string | null; + get outputModalities(): string | null; + get capabilities(): string | null; + get supportsToolCalling(): boolean | null; + download(progressCallback?: (progress: number) => void): Promise; get path(): string; load(): Promise; @@ -23,4 +31,17 @@ export interface IModel { * @param baseUrl - The base URL of the Foundry Local web service. */ createResponsesClient(baseUrl: string): ResponsesClient; + + /** + * Variants of the model that are available. Variants of the model are optimized for different devices. + */ + get variants(): IModel[]; + + /** + * Select a model variant from variants to use for IModel operations. + * An IModel from `variants` can also be used directly. + * @param variant - Model variant to select. Must be one of the variants in `variants`. + * @throws Error if variant is not valid for this model. + */ + selectVariant(variant: IModel): void; } diff --git a/sdk/js/src/index.ts b/sdk/js/src/index.ts index 7d7ee17a..87f108af 100644 --- a/sdk/js/src/index.ts +++ b/sdk/js/src/index.ts @@ -1,8 +1,10 @@ export { FoundryLocalManager } from './foundryLocalManager.js'; export type { FoundryLocalConfig } from './configuration.js'; export { Catalog } from './catalog.js'; -export { Model } from './model.js'; -export { ModelVariant } from './modelVariant.js'; +/** @internal */ +export { Model } from './detail/model.js'; +/** @internal */ +export { ModelVariant } from './detail/modelVariant.js'; export type { IModel } from './imodel.js'; export { ChatClient, ChatClientSettings } from './openai/chatClient.js'; export { AudioClient, AudioClientSettings } from './openai/audioClient.js'; diff --git a/sdk/js/src/openai/audioClient.ts b/sdk/js/src/openai/audioClient.ts index 59267015..7b174924 100644 --- a/sdk/js/src/openai/audioClient.ts +++ b/sdk/js/src/openai/audioClient.ts @@ -89,66 +89,153 @@ export class AudioClient { } /** - * Transcribes audio into the input language using streaming. + * Transcribes audio into the input language using streaming, returning an async iterable of chunks. * @param audioFilePath - Path to the audio file to transcribe. - * @param callback - A callback function that receives each chunk of the streaming response. - * @returns A promise that resolves when the stream is complete. - * @throws Error - If audioFilePath or callback are invalid, or streaming fails. + * @returns An async iterable that yields parsed streaming transcription chunks. + * @throws Error - If audioFilePath is invalid, or streaming fails. + * + * @example + * ```typescript + * for await (const chunk of audioClient.transcribeStreaming('recording.wav')) { + * process.stdout.write(chunk.text); + * } + * ``` */ - public async transcribeStreaming(audioFilePath: string, callback: (chunk: any) => void): Promise { + public transcribeStreaming(audioFilePath: string): AsyncIterable { this.validateAudioFilePath(audioFilePath); - if (!callback || typeof callback !== 'function') { - throw new Error('Callback must be a valid function.'); - } + const request = { Model: this.modelId, FileName: audioFilePath, ...this.settings._serialize() }; - - let error: Error | null = null; - try { - await this.coreInterop.executeCommandStreaming( - "audio_transcribe", - { Params: { OpenAICreateRequest: JSON.stringify(request) } }, - (chunkStr: string) => { - // Skip processing if we already encountered an error - if (error) { - return; - } - - if (chunkStr) { - let chunk: any; - try { - chunk = JSON.parse(chunkStr); - } catch (e) { - // Don't throw from callback - store first error and stop processing - error = new Error(`Failed to parse streaming chunk: ${e instanceof Error ? e.message : String(e)}`, { cause: e }); - return; + // Capture instance properties to local variables because `this` is not + // accessible inside the [Symbol.asyncIterator]() method below — it's a + // regular method on the returned object literal, not on the AudioClient. + const coreInterop = this.coreInterop; + const modelId = this.modelId; + + // Return an AsyncIterable object. The [Symbol.asyncIterator]() factory + // is called once when the consumer starts a `for await` loop, and it + // returns the AsyncIterator (with next() / return() methods). + return { + [Symbol.asyncIterator](): AsyncIterator { + // Buffer for chunks received from the native callback. + // Uses a head index for O(1) dequeue instead of Array.shift() which is O(n). + // JavaScript's single-threaded event loop ensures no race conditions + // between the callback pushing chunks and next() consuming them. + const chunks: any[] = []; + let head = 0; + let done = false; + let cancelled = false; + let error: Error | null = null; + let resolve: (() => void) | null = null; + let nextInFlight = false; + + const streamingPromise = coreInterop.executeCommandStreaming( + "audio_transcribe", + { Params: { OpenAICreateRequest: JSON.stringify(request) } }, + (chunkStr: string) => { + if (cancelled || error) return; + if (chunkStr) { + try { + const chunk = JSON.parse(chunkStr); + chunks.push(chunk); + } catch (e) { + if (!error) { + error = new Error( + `Failed to parse streaming chunk: ${e instanceof Error ? e.message : String(e)}`, + { cause: e } + ); + } + } + } + // Wake up any waiting next() call + if (resolve) { + const r = resolve; + resolve = null; + r(); } + } + // When the native stream completes, mark done and wake up any + // pending next() call so it can see that iteration has ended. + ).then(() => { + done = true; + if (resolve) { + const r = resolve; + resolve = null; + r(); // resolve the pending next() promise + } + }).catch((err) => { + if (!error) { + const underlyingError = err instanceof Error ? err : new Error(String(err)); + error = new Error( + `Streaming audio transcription failed for model '${modelId}': ${underlyingError.message}`, + { cause: underlyingError } + ); + } + done = true; + if (resolve) { + const r = resolve; + resolve = null; + r(); + } + }); + // Return the AsyncIterator object consumed by `for await`. + // next() yields buffered chunks one at a time; return() is + // called automatically when the consumer breaks out early. + return { + async next(): Promise> { + if (nextInFlight) { + throw new Error('next() called concurrently on streaming iterator; await each call before invoking next().'); + } + nextInFlight = true; try { - callback(chunk); - } catch (e) { - // Don't throw from callback - store first error and stop processing - error = new Error(`User callback threw an error: ${e instanceof Error ? e.message : String(e)}`, { cause: e }); - return; + while (true) { + if (head < chunks.length) { + const value = chunks[head]; + chunks[head] = undefined; // allow GC + head++; + // Compact the array when all buffered chunks have been consumed + if (head === chunks.length) { + chunks.length = 0; + head = 0; + } + return { value, done: false }; + } + if (error) { + throw error; + } + if (done || cancelled) { + return { value: undefined, done: true }; + } + // Wait for the next chunk or completion + await new Promise((r) => { resolve = r; }); + } + } finally { + nextInFlight = false; } + }, + async return(): Promise> { + // Mark cancelled so the callback stops buffering. + // Note: the underlying native stream cannot be cancelled + // (CoreInterop.executeCommandStreaming has no abort support), + // so the koffi callback may still fire but will no-op due + // to the cancelled guard above. + cancelled = true; + chunks.length = 0; + head = 0; + if (resolve) { + const r = resolve; + resolve = null; + r(); + } + return { value: undefined, done: true }; } - } - ); - - // If we encountered an error during streaming, reject now - if (error) { - throw error; + }; } - } catch (err) { - const underlyingError = err instanceof Error ? err : new Error(String(err)); - throw new Error( - `Streaming audio transcription failed for model '${this.modelId}': ${underlyingError.message}`, - { cause: underlyingError } - ); - } + }; } } diff --git a/sdk/js/src/openai/chatClient.ts b/sdk/js/src/openai/chatClient.ts index 7aa77170..f844da41 100644 --- a/sdk/js/src/openai/chatClient.ts +++ b/sdk/js/src/openai/chatClient.ts @@ -211,26 +211,33 @@ export class ChatClient { } /** - * Performs a streaming chat completion. + * Performs a streaming chat completion, returning an async iterable of chunks. * @param messages - An array of message objects. - * @param tools - An array of tool objects. - * @param callback - A callback function that receives each chunk of the streaming response. - * @returns A promise that resolves when the stream is complete. - * @throws Error - If messages, tools, or callback are invalid, or streaming fails. + * @param tools - An optional array of tool objects. + * @returns An async iterable that yields parsed streaming response chunks. + * @throws Error - If messages or tools are invalid, or streaming fails. + * + * @example + * ```typescript + * // Without tools: + * for await (const chunk of chatClient.completeStreamingChat(messages)) { + * const content = chunk.choices?.[0]?.delta?.content; + * if (content) process.stdout.write(content); + * } + * + * // With tools: + * for await (const chunk of chatClient.completeStreamingChat(messages, tools)) { + * const content = chunk.choices?.[0]?.delta?.content; + * if (content) process.stdout.write(content); + * } + * ``` */ - public async completeStreamingChat(messages: any[], callback: (chunk: any) => void): Promise; - public async completeStreamingChat(messages: any[], tools: any[], callback: (chunk: any) => void): Promise; - public async completeStreamingChat(messages: any[], toolsOrCallback: any[] | ((chunk: any) => void), maybeCallback?: (chunk: any) => void): Promise { - const tools = Array.isArray(toolsOrCallback) ? toolsOrCallback : undefined; - const callback = (Array.isArray(toolsOrCallback) ? maybeCallback : toolsOrCallback) as ((chunk: any) => void) | undefined; - + public completeStreamingChat(messages: any[]): AsyncIterable; + public completeStreamingChat(messages: any[], tools: any[]): AsyncIterable; + public completeStreamingChat(messages: any[], tools?: any[]): AsyncIterable { this.validateMessages(messages); this.validateTools(tools); - if (!callback || typeof callback !== 'function') { - throw new Error('Callback must be a valid function.'); - } - const request = { model: this.modelId, messages, @@ -239,49 +246,132 @@ export class ChatClient { ...this.settings._serialize() }; - let error: Error | null = null; + // Capture instance properties to local variables because `this` is not + // accessible inside the [Symbol.asyncIterator]() method below — it's a + // regular method on the returned object literal, not on the ChatClient. + const coreInterop = this.coreInterop; + const modelId = this.modelId; - try { - await this.coreInterop.executeCommandStreaming( - 'chat_completions', - { Params: { OpenAICreateRequest: JSON.stringify(request) } }, - (chunkStr: string) => { - // Skip processing if we already encountered an error - if (error) return; + // Return an AsyncIterable object. The [Symbol.asyncIterator]() factory + // is called once when the consumer starts a `for await` loop, and it + // returns the AsyncIterator (with next() / return() methods). + return { + [Symbol.asyncIterator](): AsyncIterator { + // Buffer for chunks received from the native callback. + // Uses a head index for O(1) dequeue instead of Array.shift() which is O(n). + // JavaScript's single-threaded event loop ensures no race conditions + // between the callback pushing chunks and next() consuming them. + const chunks: any[] = []; + let head = 0; + let done = false; + let cancelled = false; + let error: Error | null = null; + let resolve: (() => void) | null = null; + let nextInFlight = false; - if (chunkStr) { - let chunk: any; - try { - chunk = JSON.parse(chunkStr); - } catch (e) { - // Don't throw from callback - store first error and stop processing - error = new Error( - `Failed to parse streaming chunk: ${e instanceof Error ? e.message : String(e)}`, - { cause: e } - ); - return; + const streamingPromise = coreInterop.executeCommandStreaming( + 'chat_completions', + { Params: { OpenAICreateRequest: JSON.stringify(request) } }, + (chunkStr: string) => { + if (cancelled || error) return; + if (chunkStr) { + try { + const chunk = JSON.parse(chunkStr); + chunks.push(chunk); + } catch (e) { + if (!error) { + error = new Error( + `Failed to parse streaming chunk: ${e instanceof Error ? e.message : String(e)}`, + { cause: e } + ); + } + } } + // Wake up any waiting next() call + if (resolve) { + const r = resolve; + resolve = null; + r(); + } + } + // When the native stream completes, mark done and wake up any + // pending next() call so it can see that iteration has ended. + ).then(() => { + done = true; + if (resolve) { + const r = resolve; + resolve = null; + r(); // resolve the pending next() promise + } + }).catch((err) => { + if (!error) { + const underlyingError = err instanceof Error ? err : new Error(String(err)); + error = new Error( + `Streaming chat completion failed for model '${modelId}': ${underlyingError.message}`, + { cause: underlyingError } + ); + } + done = true; + if (resolve) { + const r = resolve; + resolve = null; + r(); + } + }); + // Return the AsyncIterator object consumed by `for await`. + // next() yields buffered chunks one at a time; return() is + // called automatically when the consumer breaks out early. + return { + async next(): Promise> { + if (nextInFlight) { + throw new Error('next() called concurrently on streaming iterator; await each call before invoking next().'); + } + nextInFlight = true; try { - callback(chunk); - } catch (e) { - // Don't throw from callback - store first error and stop processing - error = new Error( - `User callback threw an error: ${e instanceof Error ? e.message : String(e)}`, - { cause: e } - ); + while (true) { + if (head < chunks.length) { + const value = chunks[head]; + chunks[head] = undefined; // allow GC + head++; + // Compact the array when all buffered chunks have been consumed + if (head === chunks.length) { + chunks.length = 0; + head = 0; + } + return { value, done: false }; + } + if (error) { + throw error; + } + if (done || cancelled) { + return { value: undefined, done: true }; + } + // Wait for the next chunk or completion + await new Promise((r) => { resolve = r; }); + } + } finally { + nextInFlight = false; + } + }, + async return(): Promise> { + // Mark cancelled so the callback stops buffering. + // Note: the underlying native stream cannot be cancelled + // (CoreInterop.executeCommandStreaming has no abort support), + // so the koffi callback may still fire but will no-op due + // to the cancelled guard above. + cancelled = true; + chunks.length = 0; + head = 0; + if (resolve) { + const r = resolve; + resolve = null; + r(); } + return { value: undefined, done: true }; } - } - ); - - // If we encountered an error during streaming, reject now - if (error) throw error; - } catch (err) { - const underlyingError = err instanceof Error ? err : new Error(String(err)); - throw new Error(`Streaming chat completion failed for model '${this.modelId}': ${underlyingError.message}`, { - cause: underlyingError - }); - } + }; + } + }; } } diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts index 639676de..521ae34b 100644 --- a/sdk/js/src/types.ts +++ b/sdk/js/src/types.ts @@ -50,6 +50,10 @@ export interface ModelInfo { maxOutputTokens?: number | null; minFLVersion?: string | null; createdAtUnix: number; + contextLength?: number | null; + inputModalities?: string | null; + outputModalities?: string | null; + capabilities?: string | null; } export interface ResponseFormat { @@ -63,6 +67,30 @@ export interface ToolChoice { name?: string; } +// ============================================================================ +// Execution Provider Types +// ============================================================================ + +/** Describes a discoverable execution provider bootstrapper. */ +export interface EpInfo { + /** The identifier of the bootstrapper/execution provider (e.g. "CUDAExecutionProvider"). */ + name: string; + /** True if this EP has already been successfully downloaded and registered. */ + isRegistered: boolean; +} + +/** Result of an explicit EP download and registration operation. */ +export interface EpDownloadResult { + /** True if all requested EPs were successfully downloaded and registered. */ + success: boolean; + /** Human-readable status message. */ + status: string; + /** Names of EPs that were successfully registered. */ + registeredEps: string[]; + /** Names of EPs that failed to register. */ + failedEps: string[]; +} + // ============================================================================ // Responses API Types // Aligned with OpenAI Responses API / OpenResponses spec and diff --git a/sdk/js/test/catalog.test.ts b/sdk/js/test/catalog.test.ts index df47d4f6..8c320723 100644 --- a/sdk/js/test/catalog.test.ts +++ b/sdk/js/test/catalog.test.ts @@ -1,5 +1,7 @@ import { describe, it } from 'mocha'; import { expect } from 'chai'; +import { Catalog } from '../src/catalog.js'; +import { DeviceType, type ModelInfo } from '../src/types.js'; import { getTestManager, TEST_MODEL_ALIAS } from './testUtils.js'; describe('Catalog Tests', () => { @@ -106,4 +108,97 @@ describe('Catalog Tests', () => { expect((error as Error).message).to.include('Available variants:'); } }); + + it('should resolve latest version for model and variant inputs', async function() { + // Mirror the C# test by using synthetic model data sorted by version descending. + const testModelInfos: ModelInfo[] = [ + { + id: 'test-model:3', + name: 'test-model', + version: 3, + alias: 'test-alias', + displayName: 'Test Model', + providerType: 'test', + uri: 'test://model/3', + modelType: 'ONNX', + runtime: { deviceType: DeviceType.CPU, executionProvider: 'CPUExecutionProvider' }, + cached: false, + createdAtUnix: 1700000003 + }, + { + id: 'test-model:2', + name: 'test-model', + version: 2, + alias: 'test-alias', + displayName: 'Test Model', + providerType: 'test', + uri: 'test://model/2', + modelType: 'ONNX', + runtime: { deviceType: DeviceType.CPU, executionProvider: 'CPUExecutionProvider' }, + cached: false, + createdAtUnix: 1700000002 + }, + { + id: 'test-model:1', + name: 'test-model', + version: 1, + alias: 'test-alias', + displayName: 'Test Model', + providerType: 'test', + uri: 'test://model/1', + modelType: 'ONNX', + runtime: { deviceType: DeviceType.CPU, executionProvider: 'CPUExecutionProvider' }, + cached: false, + createdAtUnix: 1700000001 + } + ]; + + const mockCoreInterop = { + executeCommand(command: string): string { + if (command === 'get_catalog_name') { + return 'TestCatalog'; + } + if (command === 'get_model_list') { + return JSON.stringify(testModelInfos); + } + if (command === 'get_cached_models') { + return '[]'; + } + throw new Error(`Unexpected command: ${command}`); + } + } as any; + + const mockLoadManager = { + listLoaded: async () => [] + } as any; + + const catalog = new Catalog(mockCoreInterop, mockLoadManager); + + const model = await catalog.getModel('test-alias'); + expect(model).to.not.be.undefined; + + const variants = model.variants; + expect(variants).to.have.length(3); + + const latestVariant = variants[0]; + const middleVariant = variants[1]; + const oldestVariant = variants[2]; + + expect(latestVariant.id).to.equal('test-model:3'); + expect(middleVariant.id).to.equal('test-model:2'); + expect(oldestVariant.id).to.equal('test-model:1'); + + const result1 = await catalog.getLatestVersion(latestVariant); + expect(result1.id).to.equal('test-model:3'); + + const result2 = await catalog.getLatestVersion(middleVariant); + expect(result2.id).to.equal('test-model:3'); + + const result3 = await catalog.getLatestVersion(oldestVariant); + expect(result3.id).to.equal('test-model:3'); + + model.selectVariant(latestVariant); + const resultFromModel = await catalog.getLatestVersion(model); + expect(resultFromModel).to.equal(model); + }); }); diff --git a/sdk/js/test/foundryLocalManager.test.ts b/sdk/js/test/foundryLocalManager.test.ts index 5ab40043..48adcff4 100644 --- a/sdk/js/test/foundryLocalManager.test.ts +++ b/sdk/js/test/foundryLocalManager.test.ts @@ -16,4 +16,66 @@ describe('Foundry Local Manager Tests', () => { // We don't assert the exact name as it might change, but we ensure it exists expect(catalog.name).to.be.a('string'); }); + + it('downloadAndRegisterEps should call command without params when names are omitted', async function() { + const manager = getTestManager() as any; + const calls: unknown[][] = []; + const originalExecuteCommandStreaming = manager.coreInterop.executeCommandStreaming; + + manager.coreInterop.executeCommandStreaming = (...args: unknown[]) => { + calls.push(args); + return Promise.resolve(JSON.stringify({ + Success: true, + Status: 'All providers registered', + RegisteredEps: ['CUDAExecutionProvider'], + FailedEps: [] + })); + }; + + try { + const result = await manager.downloadAndRegisterEps(); + expect(calls.length).to.equal(1); + expect(calls[0][0]).to.equal('download_and_register_eps'); + expect(calls[0][1]).to.be.undefined; + expect(result).to.deep.equal({ + success: true, + status: 'All providers registered', + registeredEps: ['CUDAExecutionProvider'], + failedEps: [] + }); + } finally { + manager.coreInterop.executeCommandStreaming = originalExecuteCommandStreaming; + } + }); + + it('downloadAndRegisterEps should send Names param when subset is provided', async function() { + const manager = getTestManager() as any; + const calls: unknown[][] = []; + const originalExecuteCommandStreaming = manager.coreInterop.executeCommandStreaming; + + manager.coreInterop.executeCommandStreaming = (...args: unknown[]) => { + calls.push(args); + return Promise.resolve(JSON.stringify({ + Success: false, + Status: 'Some providers failed', + RegisteredEps: ['CUDAExecutionProvider'], + FailedEps: ['OpenVINOExecutionProvider'] + })); + }; + + try { + const result = await manager.downloadAndRegisterEps(['CUDAExecutionProvider', 'OpenVINOExecutionProvider']); + expect(calls.length).to.equal(1); + expect(calls[0][0]).to.equal('download_and_register_eps'); + expect(calls[0][1]).to.deep.equal({ Params: { Names: 'CUDAExecutionProvider,OpenVINOExecutionProvider' } }); + expect(result).to.deep.equal({ + success: false, + status: 'Some providers failed', + registeredEps: ['CUDAExecutionProvider'], + failedEps: ['OpenVINOExecutionProvider'] + }); + } finally { + manager.coreInterop.executeCommandStreaming = originalExecuteCommandStreaming; + } + }); }); diff --git a/sdk/js/test/model.test.ts b/sdk/js/test/model.test.ts index acc4d6e2..4048d9a1 100644 --- a/sdk/js/test/model.test.ts +++ b/sdk/js/test/model.test.ts @@ -39,7 +39,12 @@ describe('Model Tests', () => { expect(model).to.not.be.undefined; if (!model || !cachedVariant) return; - model.selectVariant(cachedVariant); + // Select the cached variant by finding it in the model's variants + const matchingVariant = model.variants.find(v => v.id === cachedVariant.id); + expect(matchingVariant).to.not.be.undefined; + if (matchingVariant) { + model.selectVariant(matchingVariant); + } // Ensure it's not loaded initially (or unload if it is) if (await model.isLoaded()) { diff --git a/sdk/js/test/openai/audioClient.test.ts b/sdk/js/test/openai/audioClient.test.ts index a57c02e5..10da05be 100644 --- a/sdk/js/test/openai/audioClient.test.ts +++ b/sdk/js/test/openai/audioClient.test.ts @@ -110,13 +110,13 @@ describe('Audio Client Tests', () => { audioClient.settings.temperature = 0.0; // for deterministic results let fullResponse = ''; - await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => { + for await (const chunk of audioClient.transcribeStreaming(AUDIO_FILE_PATH)) { expect(chunk).to.not.be.undefined; expect(chunk.text).to.not.be.undefined; expect(chunk.text).to.be.a('string'); expect(chunk.text.length).to.be.greaterThan(0); fullResponse += chunk.text; - }); + } console.log(`Full response: ${fullResponse}`); expect(fullResponse).to.equal(EXPECTED_TEXT); @@ -151,13 +151,13 @@ describe('Audio Client Tests', () => { audioClient.settings.temperature = 0.0; // for deterministic results let fullResponse = ''; - await audioClient.transcribeStreaming(AUDIO_FILE_PATH, (chunk) => { + for await (const chunk of audioClient.transcribeStreaming(AUDIO_FILE_PATH)) { expect(chunk).to.not.be.undefined; expect(chunk.text).to.not.be.undefined; expect(chunk.text).to.be.a('string'); expect(chunk.text.length).to.be.greaterThan(0); fullResponse += chunk.text; - }); + } console.log(`Full response: ${fullResponse}`); expect(fullResponse).to.equal(EXPECTED_TEXT); @@ -190,27 +190,12 @@ describe('Audio Client Tests', () => { const audioClient = model.createAudioClient(); try { - await audioClient.transcribeStreaming('', () => {}); + // transcribeStreaming validates synchronously before returning the AsyncIterable + audioClient.transcribeStreaming(''); expect.fail('Should have thrown an error for empty audio file path'); } catch (error) { expect(error).to.be.instanceOf(Error); expect((error as Error).message).to.include('Audio file path must be a non-empty string'); } }); - - it('should throw when transcribing streaming with invalid callback', async function() { - const manager = getTestManager(); - const catalog = manager.catalog; - const model = await catalog.getModel(WHISPER_MODEL_ALIAS); - const audioClient = model.createAudioClient(); - const invalidCallbacks: any[] = [null, undefined, 42, {}, 'not-a-function']; - for (const invalidCallback of invalidCallbacks) { - try { - await audioClient.transcribeStreaming(AUDIO_FILE_PATH, invalidCallback as any); - expect.fail('Should have thrown an error for invalid callback'); - } catch (error) { - expect(error).to.be.instanceOf(Error); - } - } - }); }); \ No newline at end of file diff --git a/sdk/js/test/openai/chatClient.test.ts b/sdk/js/test/openai/chatClient.test.ts index 5f612845..7be190ce 100644 --- a/sdk/js/test/openai/chatClient.test.ts +++ b/sdk/js/test/openai/chatClient.test.ts @@ -81,13 +81,13 @@ describe('Chat Client Tests', () => { let fullContent = ''; let chunkCount = 0; - await client.completeStreamingChat(messages, (chunk: any) => { + for await (const chunk of client.completeStreamingChat(messages)) { chunkCount++; const content = chunk.choices?.[0]?.delta?.content; if (content) { fullContent += content; } - }); + } expect(chunkCount).to.be.greaterThan(0); expect(fullContent).to.be.a('string'); @@ -102,13 +102,13 @@ describe('Chat Client Tests', () => { fullContent = ''; chunkCount = 0; - await client.completeStreamingChat(messages, (chunk: any) => { + for await (const chunk of client.completeStreamingChat(messages)) { chunkCount++; const content = chunk.choices?.[0]?.delta?.content; if (content) { fullContent += content; } - }); + } expect(chunkCount).to.be.greaterThan(0); expect(fullContent).to.be.a('string'); @@ -172,7 +172,8 @@ describe('Chat Client Tests', () => { const invalidMessages: any[] = [[], null, undefined]; for (const invalidMessage of invalidMessages) { try { - await client.completeStreamingChat(invalidMessage, () => {}); + // completeStreamingChat validates synchronously before returning the AsyncIterable + client.completeStreamingChat(invalidMessage); expect.fail(`Should have thrown an error for ${Array.isArray(invalidMessage) ? 'empty' : invalidMessage} messages`); } catch (error) { expect(error).to.be.instanceOf(Error); @@ -181,23 +182,6 @@ describe('Chat Client Tests', () => { } }); - it('should throw when completing streaming chat with invalid callback', async function() { - const manager = getTestManager(); - const catalog = manager.catalog; - const model = await catalog.getModel(TEST_MODEL_ALIAS); - const client = model.createChatClient(); - const messages = [{ role: 'user', content: 'Hello' }]; - const invalidCallbacks: any[] = [null, undefined, {} as any, 'not a function' as any]; - for (const invalidCallback of invalidCallbacks) { - try { - await client.completeStreamingChat(messages as any, invalidCallback as any); - expect.fail('Should have thrown an error for invalid callback'); - } catch (error) { - expect(error).to.be.instanceOf(Error); - } - } - }); - it('should perform tool calling chat completion (non-streaming)', async function() { this.timeout(20000); const manager = getTestManager(); @@ -305,7 +289,7 @@ describe('Chat Client Tests', () => { let lastToolCallChunk: any = null; // Check that each response chunk contains the expected information - await client.completeStreamingChat(messages, tools, (chunk: any) => { + for await (const chunk of client.completeStreamingChat(messages, tools)) { const content = chunk.choices?.[0]?.message?.content ?? chunk.choices?.[0]?.delta?.content; if (content) { fullResponse += content; @@ -314,7 +298,7 @@ describe('Chat Client Tests', () => { if (toolCalls && toolCalls.length > 0) { lastToolCallChunk = chunk; } - }); + } expect(fullResponse).to.be.a('string').and.not.equal(''); expect(lastToolCallChunk).to.not.be.null; @@ -341,12 +325,12 @@ describe('Chat Client Tests', () => { // Run the next turn of the conversation fullResponse = ''; - await client.completeStreamingChat(messages, tools, (chunk: any) => { + for await (const chunk of client.completeStreamingChat(messages, tools)) { const content = chunk.choices?.[0]?.message?.content ?? chunk.choices?.[0]?.delta?.content; if (content) { fullResponse += content; } - }); + } // Check that the conversation continued expect(fullResponse).to.be.a('string').and.not.equal(''); diff --git a/sdk/js/test/openai/responsesClient.test.ts b/sdk/js/test/openai/responsesClient.test.ts index 925a2360..f0dbf4b0 100644 --- a/sdk/js/test/openai/responsesClient.test.ts +++ b/sdk/js/test/openai/responsesClient.test.ts @@ -10,7 +10,7 @@ import type { MessageItem, } from '../../src/types.js'; import { FoundryLocalManager } from '../../src/foundryLocalManager.js'; -import { Model } from '../../src/model.js'; +import type { IModel } from '../../src/imodel.js'; describe('ResponsesClient Tests', () => { @@ -371,7 +371,7 @@ describe('ResponsesClient Tests', () => { describe('Integration (requires model + web service)', function() { let manager: FoundryLocalManager; - let model: Model; + let model: IModel; let client: ResponsesClient; let skipped = false; diff --git a/sdk/python/.gitignore b/sdk/python/.gitignore new file mode 100644 index 00000000..543c109e --- /dev/null +++ b/sdk/python/.gitignore @@ -0,0 +1,20 @@ +# Native binaries downloaded from NuGet (per-platform) +packages/ + +# Build / egg info +*.egg-info/ +dist/ +build/ +*.whl +*.tar.gz +__pycache__/ + +# Logs +logs/ + +# IDE +.vscode/ +.idea/ + +# pytest +.pytest_cache/ diff --git a/sdk/python/LICENSE.txt b/sdk/python/LICENSE.txt new file mode 100644 index 00000000..48bc6bb4 --- /dev/null +++ b/sdk/python/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Microsoft Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/sdk/python/README.md b/sdk/python/README.md new file mode 100644 index 00000000..3ff677d2 --- /dev/null +++ b/sdk/python/README.md @@ -0,0 +1,310 @@ +# Foundry Local Python SDK + +The Foundry Local Python SDK provides a Python interface for interacting with local AI models via the Foundry Local Core native library. It allows you to discover, download, load, and run inference on models directly on your local machine — no cloud required. + +## Features + +- **Model Discovery** – browse and search the model catalog +- **Model Management** – download, cache, load, and unload models +- **Chat Completions** – OpenAI-compatible chat API (non-streaming and streaming) +- **Tool Calling** – function-calling support with chat completions +- **Audio Transcription** – Whisper-based speech-to-text (non-streaming and streaming) +- **Built-in Web Service** – optional HTTP endpoint for multi-process scenarios +- **Native Performance** – ctypes FFI to AOT-compiled Foundry Local Core + +## Installation + +Two package variants are published — choose the one that matches your target hardware: + +| Variant | Package | Native backends | +|---|---|---| +| Standard (cross-platform) | `foundry-local-sdk` | CPU / WebGPU / CUDA | +| WinML (Windows only) | `foundry-local-sdk-winml` | Windows ML + all standard backends | + +```bash +# Standard (cross-platform — Linux, macOS, Windows) +pip install foundry-local-sdk + +# WinML (Windows only) +pip install foundry-local-sdk-winml +``` + +Each package installs the correct native binaries (`foundry-local-core`, `onnxruntime-core`, `onnxruntime-genai-core`) as wheel dependencies. They are mutually exclusive — install only one per environment. WinML is auto-detected at runtime: if the WinML package is installed, the SDK automatically enables the Windows App Runtime Bootstrap. + +### Building from source + +```bash +cd sdk/python + +# Standard wheel +python -m build --wheel + +# WinML wheel (uses the build_backend.py shim) +python -m build --wheel -C winml=true +``` + +For editable installs during development (native packages installed separately via `foundry-local-install`): + +```bash +pip install -e . +``` + +### Installing native binaries for development / CI + +When working from source the native packages are not pulled in automatically. Use the `foundry-local-install` CLI to install them: + +```bash +# Standard +foundry-local-install + +# WinML (Windows only) +foundry-local-install --winml +``` + +Add `--verbose` to print the resolved binary paths after installation: + +```bash +foundry-local-install --verbose +foundry-local-install --winml --verbose +``` + +> **Note:** The standard and WinML native packages use different PyPI package names (`foundry-local-core` vs `foundry-local-core-winml`) so they can coexist in the same pip index, but they should not be installed in the same Python environment simultaneously. + +## Explicit EP Management + +You can explicitly discover and download execution providers (EPs): + +```python +# Discover available EPs and registration status +eps = manager.discover_eps() +for ep in eps: + print(f"{ep.name} - registered: {ep.is_registered}") + +# Download and register all available EPs +result = manager.download_and_register_eps() +print(f"Success: {result.success}, Status: {result.status}") + +# Download only specific EPs +result2 = manager.download_and_register_eps([eps[0].name]) +``` + +### Per-EP download progress + +Pass a `progress_callback` to receive `(ep_name, percent)` updates as each EP downloads (`percent` is 0–100): + +```python +current_ep = "" + +def on_progress(ep_name: str, percent: float) -> None: + global current_ep + if ep_name != current_ep: + if current_ep: + print() + current_ep = ep_name + print(f"\r {ep_name} {percent:5.1f}%", end="", flush=True) + +manager.download_and_register_eps(progress_callback=on_progress) +print() +``` + +Catalog access does not block on EP downloads. Call `download_and_register_eps()` when you need hardware-accelerated execution providers. + +## Quick Start + +```python +from foundry_local_sdk import Configuration, FoundryLocalManager + +# 1. Initialize +config = Configuration(app_name="MyApp") +FoundryLocalManager.initialize(config) +manager = FoundryLocalManager.instance + +# 2. Discover models +catalog = manager.catalog +models = catalog.list_models() +for m in models: + print(f" {m.alias}") + +# 3. Load a model +model = catalog.get_model("phi-3.5-mini") +model.load() + +# 4. Chat +client = model.get_chat_client() +response = client.complete_chat([ + {"role": "user", "content": "Why is the sky blue?"} +]) +print(response.choices[0].message.content) + +# 5. Cleanup +model.unload() +``` + +## Usage + +### Initialization + +Create a `Configuration` and initialize the singleton `FoundryLocalManager`. + +```python +from foundry_local_sdk import Configuration, FoundryLocalManager +from foundry_local_sdk.configuration import LogLevel + +config = Configuration( + app_name="MyApp", + model_cache_dir="/path/to/cache", # optional + log_level=LogLevel.INFORMATION, # optional (default: Warning) + additional_settings={"Bootstrap": "false"}, # optional +) +FoundryLocalManager.initialize(config) +manager = FoundryLocalManager.instance +``` + +### Discovering Models + +```python +catalog = manager.catalog + +# List all models in the catalog +models = catalog.list_models() + +# Get a specific model by alias +model = catalog.get_model("qwen2.5-0.5b") + +# Get a specific variant by ID +variant = catalog.get_model_variant("qwen2.5-0.5b-instruct-generic-cpu:4") + +# List locally cached models +cached = catalog.get_cached_models() + +# List currently loaded models +loaded = catalog.get_loaded_models() +``` + +### Inspecting Model Metadata + +`IModel` exposes metadata properties from the catalog: + +```python +model = catalog.get_model("phi-3.5-mini") + +# Identity +print(model.id) # e.g. "phi-3.5-mini-instruct-generic-gpu:3" +print(model.alias) # e.g. "phi-3.5-mini" + +# Context and token limits +print(model.context_length) # e.g. 131072 (tokens), or None if unknown + +# Modalities and capabilities +print(model.input_modalities) # e.g. "text" or "text,image" +print(model.output_modalities) # e.g. "text" +print(model.capabilities) # e.g. "chat,completion" +print(model.supports_tool_calling) # True, False, or None + +# Cache / load state +print(model.is_cached) +print(model.is_loaded) +``` + +### Loading and Running a Model + +```python +model = catalog.get_model("qwen2.5-0.5b") + +# Select a specific variant (optional – defaults to highest-priority cached variant) +cached = catalog.get_cached_models() +variant = next(v for v in cached if v.alias == "qwen2.5-0.5b") +model.select_variant(variant) + +# Load into memory +model.load() + +# Non-streaming chat +client = model.get_chat_client() +client.settings.temperature = 0.0 +client.settings.max_tokens = 500 + +result = client.complete_chat([ + {"role": "user", "content": "What is 7 multiplied by 6?"} +]) +print(result.choices[0].message.content) # "42" + +# Streaming chat +messages = [{"role": "user", "content": "Tell me a joke"}] + +def on_chunk(chunk): + delta = chunk.choices[0].delta + if delta and delta.content: + print(delta.content, end="", flush=True) + +client.complete_streaming_chat(messages, on_chunk) + +# Unload when done +model.unload() +``` + +### Web Service (Optional) + +Start a built-in HTTP server for multi-process access. + +```python +manager.start_web_service() +print(f"Listening on: {manager.urls}") + +# ... use the service ... + +manager.stop_web_service() +``` + +## API Reference + +### Core Classes + +| Class | Description | +|---|---| +| `Configuration` | SDK configuration (app name, cache dir, log level, web service settings) | +| `FoundryLocalManager` | Singleton entry point – initialization, catalog access, web service | +| `EpInfo` | Discoverable execution provider info (`name`, `is_registered`) | +| `EpDownloadResult` | Result of EP download/registration (`success`, `status`, `registered_eps`, `failed_eps`) | +| `Catalog` | Model discovery – listing, lookup by alias/ID, cached/loaded queries | +| `IModel` | Abstract interface for models — identity, metadata, lifecycle, client creation, variant selection | + +### OpenAI Clients + +| Class | Description | +|---|---| +| `ChatClient` | Chat completions (non-streaming and streaming) with tool calling | +| `AudioClient` | Audio transcription (non-streaming and streaming) | + +### Internal / Detail + +| Class | Description | +|---|---| +| `Model` | Alias-level `IModel` implementation used by `Catalog.get_model()` (implementation detail) | +| `ModelVariant` | Specific model variant (implementation detail — implements `IModel`) | +| `CoreInterop` | ctypes FFI layer to the native Foundry Local Core library | +| `ModelLoadManager` | Load/unload via core interop or external web service | +| `ModelInfo` | Pydantic model for catalog entries | + +### CLI entry point + +| Function | CLI name | Description | +|---|---|---| +| `foundry_local_sdk.detail.utils.foundry_local_install` | `foundry-local-install` | Install and verify native binaries (`--winml` for WinML variant) | + +> **Migration note:** The function was previously named `verify_native_install`. The public CLI name (`foundry-local-install`) and its behaviour are unchanged; only the Python function name in `foundry_local_sdk.detail.utils` was updated to `foundry_local_install` for consistency. + +## Running Tests + +```bash +pip install -r requirements-dev.txt +python -m pytest test/ -v +``` + +See [test/README.md](test/README.md) for detailed test setup and structure. + +## Running Examples + +```bash +python examples/chat_completion.py +``` \ No newline at end of file diff --git a/sdk/python/build_backend.py b/sdk/python/build_backend.py new file mode 100644 index 00000000..1bdf6cbb --- /dev/null +++ b/sdk/python/build_backend.py @@ -0,0 +1,154 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""PEP 517 build backend shim for foundry-local-sdk. + +Delegates all hooks to ``setuptools.build_meta`` after optionally +patching ``pyproject.toml`` and ``requirements.txt`` in-place for the +WinML variant build. + +Usage +----- +Standard (default):: + + python -m build --wheel + +WinML variant:: + + python -m build --wheel -C winml=true + +Environment variable fallback (useful in CI pipelines):: + + FOUNDRY_VARIANT=winml python -m build --wheel + +CI usage (install without pulling dependencies):: + + pip install --no-deps +""" + +from __future__ import annotations + +import contextlib +import os +import shutil +from collections.abc import Generator +from pathlib import Path + +import setuptools.build_meta as _sb + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +_PROJECT_ROOT = Path(__file__).parent +_PYPROJECT = _PROJECT_ROOT / "pyproject.toml" +_REQUIREMENTS = _PROJECT_ROOT / "requirements.txt" +_REQUIREMENTS_WINML = _PROJECT_ROOT / "requirements-winml.txt" + +# The exact string in pyproject.toml to patch for the WinML variant. +_STANDARD_NAME = 'name = "foundry-local-sdk"' +_WINML_NAME = 'name = "foundry-local-sdk-winml"' + + +# --------------------------------------------------------------------------- +# Variant detection +# --------------------------------------------------------------------------- + + +def _is_winml(config_settings: dict | None) -> bool: + """Return True when the WinML variant should be built. + + Checks ``config_settings["winml"]`` first (set via ``-C winml=true``), + then falls back to the ``FOUNDRY_VARIANT`` environment variable. + """ + if config_settings and str(config_settings.get("winml", "")).lower() == "true": + return True + return os.environ.get("FOUNDRY_VARIANT", "").lower() == "winml" + + +# --------------------------------------------------------------------------- +# In-place patching context manager +# --------------------------------------------------------------------------- + + +@contextlib.contextmanager +def _patch_for_winml() -> Generator[None, None, None]: + """Temporarily patch ``pyproject.toml`` and ``requirements.txt`` for WinML. + + Both files are restored to their original content in the ``finally`` + block, even if the build raises an exception. + """ + pyproject_original = _PYPROJECT.read_text(encoding="utf-8") + requirements_original = _REQUIREMENTS.read_text(encoding="utf-8") + try: + # Patch package name (simple string replacement — no TOML writer needed) + patched_pyproject = pyproject_original.replace(_STANDARD_NAME, _WINML_NAME, 1) + if patched_pyproject == pyproject_original: + raise RuntimeError( + f"Could not find {_STANDARD_NAME!r} in pyproject.toml — " + "WinML name patch failed." + ) + _PYPROJECT.write_text(patched_pyproject, encoding="utf-8") + + # Swap requirements.txt with the WinML variant + shutil.copy2(_REQUIREMENTS_WINML, _REQUIREMENTS) + + yield + finally: + _PYPROJECT.write_text(pyproject_original, encoding="utf-8") + _REQUIREMENTS.write_text(requirements_original, encoding="utf-8") + + +def _apply_patches(config_settings: dict | None): + """Return a context manager that applies the appropriate patches.""" + if _is_winml(config_settings): + return _patch_for_winml() + return contextlib.nullcontext() + + +# --------------------------------------------------------------------------- +# PEP 517 hook delegation +# --------------------------------------------------------------------------- + + +def get_requires_for_build_wheel(config_settings=None): + with _apply_patches(config_settings): + return _sb.get_requires_for_build_wheel(config_settings) + + +def prepare_metadata_for_build_wheel(metadata_directory, config_settings=None): + with _apply_patches(config_settings): + return _sb.prepare_metadata_for_build_wheel(metadata_directory, config_settings) + + +def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): + with _apply_patches(config_settings): + return _sb.build_wheel(wheel_directory, config_settings, metadata_directory) + + +def get_requires_for_build_editable(config_settings=None): + with _apply_patches(config_settings): + return _sb.get_requires_for_build_editable(config_settings) + + +def prepare_metadata_for_build_editable(metadata_directory, config_settings=None): + with _apply_patches(config_settings): + return _sb.prepare_metadata_for_build_editable(metadata_directory, config_settings) + + +def build_editable(wheel_directory, config_settings=None, metadata_directory=None): + with _apply_patches(config_settings): + return _sb.build_editable(wheel_directory, config_settings, metadata_directory) + + +def get_requires_for_build_sdist(config_settings=None): + with _apply_patches(config_settings): + return _sb.get_requires_for_build_sdist(config_settings) + + +def build_sdist(sdist_directory, config_settings=None): + if _is_winml(config_settings): + with _patch_for_winml(): + return _sb.build_sdist(sdist_directory, config_settings) + return _sb.build_sdist(sdist_directory, config_settings) diff --git a/sdk/python/examples/chat_completion.py b/sdk/python/examples/chat_completion.py new file mode 100644 index 00000000..c0c58048 --- /dev/null +++ b/sdk/python/examples/chat_completion.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +"""Example: Chat completion using Foundry Local Python SDK. + +Demonstrates basic chat completion with the Foundry Local runtime, +including model discovery, loading, and inference. +""" + +from foundry_local_sdk import Configuration, FoundryLocalManager + +def main(): + # 1. Initialize the SDK + config = Configuration(app_name="ChatCompletionExample") + print("Initializing Foundry Local Manager") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + + # Discover available EPs and register them explicitly when needed. + eps = manager.discover_eps() + print("Available execution providers:") + for ep in eps: + print(f" - {ep.name} (registered: {ep.is_registered})") + + ep_result = manager.download_and_register_eps() + print(f"EP registration success: {ep_result.success} ({ep_result.status})") + + # 2. Print available models in the catalog and cache + models = manager.catalog.list_models() + print("Available models in catalog:") + for m in models: + print(f" - {m.alias} ({m.id})") + + cached_models = manager.catalog.get_cached_models() + print("\nCached models:") + for m in cached_models: + print(f" - {m.alias} ({m.id})") + + CACHED_MODEL_ALIAS = "qwen2.5-0.5b" + + # 3. Find a model from the cache (+ download if not cached) + model = manager.catalog.get_model(CACHED_MODEL_ALIAS) + if model is None: + print(f"Model '{CACHED_MODEL_ALIAS}' not found in catalog.") + print("Available models:") + for m in manager.catalog.list_models(): + print(f" - {m.alias} ({m.id})") + return + + if not model.is_cached: + print(f"Downloading {model.alias}...") + model.download(progress_callback=lambda pct: print(f" {pct:.1f}%", end="\r")) + print() + + # 4. Load the model + print(f"Loading {model.alias}...", end="") + model.load() + print("loaded!") + + try: + # 5. Create a chat client and send a message + client = model.get_chat_client() + + print("\n--- Non-streaming ---") + response = client.complete_chat( + messages=[{"role": "user", "content": "What is the capital of France? Reply briefly."}] + ) + print(f"Response: {response.choices[0].message.content}") + + # 6. Streaming + print("\n--- Streaming ---") + for chunk in client.complete_streaming_chat( + [{"role": "user", "content": "Tell me a short joke."}] + ): + if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end="", flush=True) + print() # newline after streaming + + except Exception as e: + print(f"Error during inference: {e}") + + finally: + # 7. Cleanup + model.unload() + print("\nModel unloaded.") + + +if __name__ == "__main__": + main() diff --git a/sdk/python/pyproject.toml b/sdk/python/pyproject.toml new file mode 100644 index 00000000..ef93b6f7 --- /dev/null +++ b/sdk/python/pyproject.toml @@ -0,0 +1,55 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "build_backend" +backend-path = ["."] + +[project] +name = "foundry-local-sdk" +dynamic = ["version", "dependencies"] +description = "Foundry Local Manager Python SDK: Control-plane SDK for Foundry Local." +readme = "README.md" +requires-python = ">=3.11" +license = "MIT" +license-files = ["LICENSE.txt"] +authors = [ + {name = "Microsoft Corporation", email = "foundrylocaldevs@microsoft.com"}, +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] + +[project.urls] +Homepage = "https://github.com/microsoft/Foundry-Local" + +[project.scripts] +foundry-local-install = "foundry_local_sdk.detail.utils:foundry_local_install" + +[tool.setuptools.package-dir] +foundry_local_sdk = "src" +"foundry_local_sdk.detail" = "src/detail" +"foundry_local_sdk.openai" = "src/openai" + +[tool.setuptools] +packages = ["foundry_local_sdk", "foundry_local_sdk.detail", "foundry_local_sdk.openai"] + +[tool.setuptools.dynamic] +version = {attr = "foundry_local_sdk.version.__version__"} +dependencies = {file = ["requirements.txt"]} + +[tool.pytest.ini_options] +testpaths = ["test"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +timeout = 60 diff --git a/sdk/python/requirements-dev.txt b/sdk/python/requirements-dev.txt new file mode 100644 index 00000000..aea40875 --- /dev/null +++ b/sdk/python/requirements-dev.txt @@ -0,0 +1,5 @@ +-r requirements.txt +build +coverage +pytest +pytest-timeout diff --git a/sdk/python/requirements-winml.txt b/sdk/python/requirements-winml.txt new file mode 100644 index 00000000..eb9d4aa1 --- /dev/null +++ b/sdk/python/requirements-winml.txt @@ -0,0 +1,7 @@ +pydantic>=2.0.0 +requests>=2.32.4 +openai>=2.24.0 +# WinML native binary packages from the ORT-Nightly PyPI feed. +foundry-local-core-winml==1.0.0-rc1 +onnxruntime-core==1.24.3 +onnxruntime-genai-core==0.12.1 \ No newline at end of file diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt new file mode 100644 index 00000000..1c0f62ac --- /dev/null +++ b/sdk/python/requirements.txt @@ -0,0 +1,7 @@ +pydantic>=2.0.0 +requests>=2.32.4 +openai>=2.24.0 +# Standard native binary packages from the ORT-Nightly PyPI feed. +foundry-local-core==1.0.0-rc1 +onnxruntime-core==1.24.3 +onnxruntime-genai-core==0.12.1 \ No newline at end of file diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py new file mode 100644 index 00000000..14534d19 --- /dev/null +++ b/sdk/python/src/__init__.py @@ -0,0 +1,23 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import logging +import sys + +from .configuration import Configuration +from .foundry_local_manager import FoundryLocalManager +from .version import __version__ + +_logger = logging.getLogger(__name__) +_logger.setLevel(logging.WARNING) + +_sc = logging.StreamHandler(stream=sys.stdout) +_formatter = logging.Formatter( + "[foundry-local] | %(asctime)s | %(levelname)-8s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S" +) +_sc.setFormatter(_formatter) +_logger.addHandler(_sc) +_logger.propagate = False + +__all__ = ["Configuration", "FoundryLocalManager", "__version__"] diff --git a/sdk/python/src/catalog.py b/sdk/python/src/catalog.py new file mode 100644 index 00000000..51f5bd8f --- /dev/null +++ b/sdk/python/src/catalog.py @@ -0,0 +1,179 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import datetime +import logging +import threading +from typing import List, Optional +from pydantic import TypeAdapter + +from .imodel import IModel +from .detail.model import Model +from .detail.model_variant import ModelVariant + +from .detail.core_interop import CoreInterop, get_cached_model_ids +from .detail.model_data_types import ModelInfo +from .detail.model_load_manager import ModelLoadManager +from .exception import FoundryLocalException + +logger = logging.getLogger(__name__) + +class Catalog(): + """Model catalog for discovering and querying available models. + + Provides methods to list models, look up by alias or ID, and query + cached or loaded models. The model list is refreshed every 6 hours. + """ + + def __init__(self, model_load_manager: ModelLoadManager, core_interop: CoreInterop): + """Initialize the Catalog. + + Args: + model_load_manager: Manager for loading/unloading models. + core_interop: Native interop layer for Foundry Local Core. + """ + self._core_interop = core_interop + self._model_load_manager = model_load_manager + self._lock = threading.Lock() + + self._models: List[ModelInfo] = [] + self._model_alias_to_model = {} + self._model_id_to_model_variant = {} + self._last_fetch = datetime.datetime.min + + response = core_interop.execute_command("get_catalog_name") + if response.error is not None: + raise FoundryLocalException(f"Failed to get catalog name: {response.error}") + + self.name = response.data + + def _update_models(self): + with self._lock: + # refresh every 6 hours + if (datetime.datetime.now() - self._last_fetch) < datetime.timedelta(hours=6): + return + + response = self._core_interop.execute_command("get_model_list") + if response.error is not None: + raise FoundryLocalException(f"Failed to get model list: {response.error}") + + model_list_json = response.data + + adapter = TypeAdapter(list[ModelInfo]) + models: List[ModelInfo] = adapter.validate_json(model_list_json) + + self._model_alias_to_model.clear() + self._model_id_to_model_variant.clear() + + for model_info in models: + variant = ModelVariant(model_info, self._model_load_manager, self._core_interop) + + value = self._model_alias_to_model.get(model_info.alias) + if value is None: + value = Model(variant, self._core_interop) + self._model_alias_to_model[model_info.alias] = value + else: + value._add_variant(variant) + + self._model_id_to_model_variant[variant.id] = variant + + self._models = models + self._last_fetch = datetime.datetime.now() + + def _invalidate_cache(self): + with self._lock: + self._last_fetch = datetime.datetime.min + + def list_models(self) -> List[IModel]: + """ + List the available models in the catalog. + :return: List of IModel instances. + """ + self._update_models() + return list(self._model_alias_to_model.values()) + + def get_model(self, model_alias: str) -> Optional[IModel]: + """ + Lookup a model by its alias. + :param model_alias: Model alias. + :return: IModel if found. + """ + self._update_models() + return self._model_alias_to_model.get(model_alias) + + def get_model_variant(self, model_id: str) -> Optional[IModel]: + """ + Lookup a model variant by its unique model id. + NOTE: This will return an IModel with a single variant. Use get_model to get an IModel with all available + variants. + :param model_id: Model id. + :return: IModel if found. + """ + self._update_models() + return self._model_id_to_model_variant.get(model_id) + + def get_latest_version(self, model_or_model_variant: IModel) -> IModel: + """ + Resolve the latest catalog version for the provided model or variant. + + :param model_or_model_variant: IModel to resolve. + :return: Latest catalog version for the same model name. + :raises FoundryLocalException: If the alias or name cannot be resolved. + """ + self._update_models() + + model = self._model_alias_to_model.get(model_or_model_variant.alias) + if model is None: + raise FoundryLocalException( + f"Model with alias '{model_or_model_variant.alias}' not found in catalog." + ) + + latest = next( + (variant for variant in model.variants if variant.info.name == model_or_model_variant.info.name), + None, + ) + if latest is None: + raise FoundryLocalException( + f"Internal error. Mismatch between model (alias:{model.alias}) and " + f"model variant (alias:{model_or_model_variant.alias})." + ) + + return model_or_model_variant if latest.id == model_or_model_variant.id else latest + + def get_cached_models(self) -> List[IModel]: + """ + Get a list of currently downloaded models from the model cache. + :return: List of IModel instances. + """ + self._update_models() + + cached_model_ids = get_cached_model_ids(self._core_interop) + + cached_models: List[IModel] = [] + for model_id in cached_model_ids: + model_variant = self._model_id_to_model_variant.get(model_id) + if model_variant is not None: + cached_models.append(model_variant) + + return cached_models + + def get_loaded_models(self) -> List[IModel]: + """ + Get a list of the currently loaded models. + :return: List of IModel instances. + """ + self._update_models() + + loaded_model_ids = self._model_load_manager.list_loaded() + loaded_models: List[IModel] = [] + + for model_id in loaded_model_ids: + model_variant = self._model_id_to_model_variant.get(model_id) + if model_variant is not None: + loaded_models.append(model_variant) + + return loaded_models \ No newline at end of file diff --git a/sdk/python/src/configuration.py b/sdk/python/src/configuration.py new file mode 100644 index 00000000..23967efb --- /dev/null +++ b/sdk/python/src/configuration.py @@ -0,0 +1,163 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +import logging +import re + +from typing import Optional, Dict +from urllib.parse import urlparse + +from .exception import FoundryLocalException + +from .logging_helper import LogLevel + +logger = logging.getLogger(__name__) + + +class Configuration: + """Configuration for Foundry Local SDK. + + Configuration values: + app_name: Your application name. MUST be set to a valid name. + foundry_local_core_path: Path to the Foundry Local Core native library. + app_data_dir: Application data directory. + Default: {home}/.{appname}, where {home} is the user's home directory + and {appname} is the app_name value. + model_cache_dir: Model cache directory. + Default: {appdata}/cache/models, where {appdata} is the app_data_dir value. + logs_dir: Log directory. + Default: {appdata}/logs + log_level: Logging level. + Valid values are: Verbose, Debug, Information, Warning, Error, Fatal. + Default: LogLevel.WARNING + web: Optional configuration for the built-in web service. + NOTE: This is not included in all builds. + additional_settings: Additional settings that Foundry Local Core can consume. + Keys and values are strings. + """ + + class WebService: + """Configuration settings if the optional web service is used.""" + + def __init__( + self, + urls: Optional[str] = None, + external_url: Optional[str] = None + ): + """Initialize WebService configuration. + + Args: + urls: Url/s to bind to the web service when + FoundryLocalManager.start_web_service() is called. + After startup, FoundryLocalManager.urls will contain the actual URL/s + the service is listening on. + Default: 127.0.0.1:0, which binds to a random ephemeral port. + Multiple URLs can be specified as a semi-colon separated list. + external_url: If the web service is running in a separate process, + it will be accessed using this URI. + Both processes should be using the same version of the SDK. + If a random port is assigned when creating the web service in the + external process the actual port must be provided here. + """ + self.urls = urls + self.external_url = external_url + + def __init__( + self, + app_name: str, + foundry_local_core_path: Optional[str] = None, + app_data_dir: Optional[str] = None, + model_cache_dir: Optional[str] = None, + logs_dir: Optional[str] = None, + log_level: Optional[LogLevel] = LogLevel.WARNING, + web: Optional['Configuration.WebService'] = None, + additional_settings: Optional[Dict[str, str]] = None + ): + """Initialize Configuration. + + Args: + app_name: Your application name. MUST be set to a valid name. + app_data_dir: Application data directory. Optional. + model_cache_dir: Model cache directory. Optional. + logs_dir: Log directory. Optional. + log_level: Logging level. Default: LogLevel.WARNING + web: Optional configuration for the built-in web service. + additional_settings: Additional settings dictionary. Optional. + """ + self.app_name = app_name + self.foundry_local_core_path = foundry_local_core_path + self.app_data_dir = app_data_dir + self.model_cache_dir = model_cache_dir + self.logs_dir = logs_dir + self.log_level = log_level + self.web = web + self.additional_settings = additional_settings + + # make sure app name only has safe characters as it's used as a directory name + self._safe_app_name_chars = re.compile(r'^[A-Za-z0-9._-]+$') + + def validate(self) -> None: + """Validate the configuration. + + Raises: + FoundryLocalException: If configuration is invalid. + """ + if not self.app_name: + raise FoundryLocalException( + "Configuration AppName must be set to a valid application name." + ) + + # Check for invalid filename characters + if not bool(self._safe_app_name_chars.match(self.app_name)): + raise FoundryLocalException("Configuration AppName value contains invalid characters.") + + if self.web is not None and self.web.external_url is not None: + parsed = urlparse(self.web.external_url) + if not parsed.port or parsed.port == 0: + raise FoundryLocalException("Configuration Web.ExternalUrl has invalid port.") + + def as_dictionary(self) -> Dict[str, str]: + """Convert configuration to a dictionary of string key-value pairs. + + Returns: + Dictionary containing configuration values as strings. + + Raises: + FoundryLocalException: If AppName is not set to a valid value. + """ + if not self.app_name: + raise FoundryLocalException( + "Configuration AppName must be set to a valid application name." + ) + + config_values = { + "AppName": self.app_name, + "LogLevel": str(self.log_level) + } + + if self.app_data_dir: + config_values["AppDataDir"] = self.app_data_dir + + if self.model_cache_dir: + config_values["ModelCacheDir"] = self.model_cache_dir + + if self.logs_dir: + config_values["LogsDir"] = self.logs_dir + + if self.foundry_local_core_path: + config_values["FoundryLocalCorePath"] = self.foundry_local_core_path + + if self.web is not None: + if self.web.urls is not None: + config_values["WebServiceUrls"] = self.web.urls + + # Emit any additional settings. + if self.additional_settings is not None: + for key, value in self.additional_settings.items(): + if not key: + continue # skip empty keys + config_values[key] = value if value is not None else "" + + return config_values diff --git a/sdk/python/src/detail/__init__.py b/sdk/python/src/detail/__init__.py new file mode 100644 index 00000000..d9a7cbc0 --- /dev/null +++ b/sdk/python/src/detail/__init__.py @@ -0,0 +1,25 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""This file is required for Python to treat this directory as a package, +enabling dotted imports such as ``foundry_local_sdk.detail.core_interop``. + +The re-exports below are optional convenience aliases so callers can write +``from foundry_local_sdk.detail import CoreInterop`` instead of importing +from the individual submodule directly. +""" + +from .core_interop import CoreInterop, InteropRequest, Response +from .model_data_types import ModelInfo, DeviceType, Runtime +from .model_load_manager import ModelLoadManager + +__all__ = [ + "CoreInterop", + "DeviceType", + "InteropRequest", + "ModelInfo", + "ModelLoadManager", + "Response", + "Runtime", +] diff --git a/sdk/python/src/detail/core_interop.py b/sdk/python/src/detail/core_interop.py new file mode 100644 index 00000000..4f4ddb67 --- /dev/null +++ b/sdk/python/src/detail/core_interop.py @@ -0,0 +1,309 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import ctypes +import json +import logging +import os +import sys + +from dataclasses import dataclass +from pathlib import Path +from typing import Callable, Dict, Optional +from ..configuration import Configuration +from ..exception import FoundryLocalException +from .utils import get_native_binary_paths, NativeBinaryPaths, create_ort_symlinks, _get_ext + +logger = logging.getLogger(__name__) + +class InteropRequest: + """Request payload for a Foundry Local Core command. + + Args: + params: Dictionary of key-value string parameters. + """ + + def __init__(self, params: Dict[str, str] = None): + self.params = params or {} + + def to_json(self) -> str: + """Serialize the request to a JSON string.""" + return json.dumps({"Params": self.params}, ensure_ascii=False) # FLC expects UTF-8 encoded JSON (not ascii) + + +class RequestBuffer(ctypes.Structure): + """ctypes Structure matching the native ``RequestBuffer`` C struct.""" + + _fields_ = [ + ("Command", ctypes.c_void_p), + ("CommandLength", ctypes.c_int), + ("Data", ctypes.c_void_p), + ("DataLength", ctypes.c_int), + ] + + +class ResponseBuffer(ctypes.Structure): + """ctypes Structure matching the native ``ResponseBuffer`` C struct.""" + + _fields_ = [ + ("Data", ctypes.c_void_p), + ("DataLength", ctypes.c_int), + ("Error", ctypes.c_void_p), + ("ErrorLength", ctypes.c_int), + ] + + +@dataclass +class Response: + """Result from a Foundry Local Core command. + Either ``data`` or ``error`` will be set, never both. + """ + + data: Optional[str] = None + error: Optional[str] = None + + +class CallbackHelper: + """Internal helper class to convert the callback from ctypes to a str and call the python callback.""" + @staticmethod + def callback(data_ptr, length, self_ptr): + self = None + try: + self = ctypes.cast(self_ptr, ctypes.POINTER(ctypes.py_object)).contents.value + + # convert to a string and pass to the python callback + data_bytes = ctypes.string_at(data_ptr, length) + data_str = data_bytes.decode('utf-8') + self._py_callback(data_str) + except Exception as e: + if self is not None and self.exception is None: + self.exception = e # keep the first only as they are likely all the same + + def __init__(self, py_callback: Callable[[str], None]): + self._py_callback = py_callback + self.exception = None + + +class CoreInterop: + """ctypes FFI layer for the Foundry Local Core native library. + + Provides ``execute_command`` and ``execute_command_with_callback`` to + invoke native commands exposed by ``Microsoft.AI.Foundry.Local.Core``. + """ + + _initialized = False + _flcore_library = None + _genai_library = None + _ort_library = None + + instance = None + + # Callback function for native interop. + # This returns a string and its length, and an optional user provided object. + CALLBACK_TYPE = ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p) + + @staticmethod + def _initialize_native_libraries() -> 'NativeBinaryPaths': + """Load the native Foundry Local Core library and its dependencies. + + Locates the binaries from the installed Python packages + ``foundry-local-core``, ``onnxruntime-core``, and + ``onnxruntime-genai-core`` using :func:`get_native_binary_paths`. + + Returns: + NativeBinaryPaths with resolved paths to all native binaries. + """ + paths = get_native_binary_paths() + if paths is None: + raise RuntimeError( + "Could not locate native libraries.\n" + " Standard variant : pip install foundry-local-sdk\n" + " WinML variant : pip install foundry-local-sdk-winml\n" + " Dev/CI install : foundry-local-install (or --winml)" + ) + + logger.info("Native libraries found — Core: %s ORT: %s GenAI: %s", + paths.core, paths.ort, paths.genai) + + # Create the onnxruntime.dll symlink on Linux/macOS if needed. + # create_ort_symlinks(paths) + os.environ["ORT_LIB_PATH"] = str(paths.ort) # For ORT-GENAI to find ORT dependency + + if sys.platform.startswith("win"): + # Register every binary directory so the .NET AOT Core library + # can resolve sibling DLLs via P/Invoke. + for native_dir in paths.all_dirs(): + os.add_dll_directory(str(native_dir)) + + # Explicitly pre-load ORT and GenAI so their symbols are globally + # available when Core does P/Invoke lookups at runtime. + # On Windows the PATH manipulation above is sufficient; on + # Linux/macOS we need RTLD_GLOBAL so that dlopen() within the + # Core native code can resolve ORT/GenAI symbols. + # ORT must be loaded before GenAI (GenAI depends on ORT). + if sys.platform.startswith("win"): + CoreInterop._ort_library = ctypes.CDLL(str(paths.ort)) + CoreInterop._genai_library = ctypes.CDLL(str(paths.genai)) + else: + CoreInterop._ort_library = ctypes.CDLL(str(paths.ort), mode=os.RTLD_GLOBAL) + CoreInterop._genai_library = ctypes.CDLL(str(paths.genai), mode=os.RTLD_GLOBAL) + + CoreInterop._flcore_library = ctypes.CDLL(str(paths.core)) + + # Set the function signatures + lib = CoreInterop._flcore_library + lib.execute_command.argtypes = [ctypes.POINTER(RequestBuffer), + ctypes.POINTER(ResponseBuffer)] + lib.execute_command.restype = None + + lib.free_response.argtypes = [ctypes.POINTER(ResponseBuffer)] + lib.free_response.restype = None + + # Set the callback function signature and delegate info + lib.execute_command_with_callback.argtypes = [ctypes.POINTER(RequestBuffer), + ctypes.POINTER(ResponseBuffer), + ctypes.c_void_p, # callback_fn + ctypes.c_void_p] # user_data + lib.execute_command_with_callback.restype = None + + return paths + + @staticmethod + def _to_c_buffer(s: str): + # Helper: encodes strings into unmanaged memory + if s is None: + return ctypes.c_void_p(0), 0, None + + buf = s.encode("utf-8") + ptr = ctypes.create_string_buffer(buf) # keeps memory alive in Python + return ctypes.cast(ptr, ctypes.c_void_p), len(buf), ptr + + def __init__(self, config: Configuration): + if not CoreInterop._initialized: + paths = CoreInterop._initialize_native_libraries() + CoreInterop._initialized = True + + # Pass the full path to the Core DLL so the native layer can + # discover sibling DLLs via Path.GetDirectoryName(FoundryLocalCorePath). + flcore_lib_name = f"Microsoft.AI.Foundry.Local.Core{_get_ext()}" + config.foundry_local_core_path = str(paths.core_dir / flcore_lib_name) + + # Pass ORT and GenAI library paths so the C# native library resolver + # can search their directories (they may be in separate pip packages). + if config.additional_settings is None: + config.additional_settings = {} + config.additional_settings["OrtLibraryPath"] = str(paths.ort) + config.additional_settings["OrtGenAILibraryPath"] = str(paths.genai) + + # Auto-detect WinML Bootstrap: if the Bootstrap DLL is present + # in the native binaries directory and the user hasn't explicitly + # set the Bootstrap config, enable it automatically. + if sys.platform.startswith("win"): + bootstrap_dll = paths.core_dir / "Microsoft.WindowsAppRuntime.Bootstrap.dll" + if bootstrap_dll.exists(): + # Pre-load so the DLL is already in the process when + # C# P/Invoke resolves it during Bootstrap.Initialize(). + ctypes.CDLL(str(bootstrap_dll)) + if config.additional_settings is None: + config.additional_settings = {} + if "Bootstrap" not in config.additional_settings: + logger.info("WinML Bootstrap DLL detected — enabling Bootstrap") + config.additional_settings["Bootstrap"] = "true" + + request = InteropRequest(params=config.as_dictionary()) + response = self.execute_command("initialize", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to initialize Foundry.Local.Core: {response.error}") + + logger.info("Foundry.Local.Core initialized successfully: %s", response.data) + + def _execute_command(self, command: str, interop_request: InteropRequest = None, + callback: CoreInterop.CALLBACK_TYPE = None): + cmd_ptr, cmd_len, cmd_buf = CoreInterop._to_c_buffer(command) + data_ptr, data_len, data_buf = CoreInterop._to_c_buffer(interop_request.to_json() if interop_request else None) + + req = RequestBuffer(Command=cmd_ptr, CommandLength=cmd_len, Data=data_ptr, DataLength=data_len) + resp = ResponseBuffer() + lib = CoreInterop._flcore_library + + if (callback is not None): + # If a callback is provided, use the execute_command_with_callback method + # We need a helper to do the initial conversion from ctypes to Python and pass it through to the + # provided callback function + callback_helper = CallbackHelper(callback) + callback_py_obj = ctypes.py_object(callback_helper) + callback_helper_ptr = ctypes.cast(ctypes.pointer(callback_py_obj), ctypes.c_void_p) + callback_fn = CoreInterop.CALLBACK_TYPE(CallbackHelper.callback) + + lib.execute_command_with_callback(ctypes.byref(req), ctypes.byref(resp), callback_fn, callback_helper_ptr) + + if callback_helper.exception is not None: + raise callback_helper.exception + else: + lib.execute_command(ctypes.byref(req), ctypes.byref(resp)) + + req = None # Free Python reference to request + + response_str = ctypes.string_at(resp.Data, resp.DataLength).decode("utf-8") if resp.Data else None + error_str = ctypes.string_at(resp.Error, resp.ErrorLength).decode("utf-8") if resp.Error else None + + # C# owns the memory in the response so we need to free it explicitly + lib.free_response(resp) + + return Response(data=response_str, error=error_str) + + def execute_command(self, command_name: str, command_input: Optional[InteropRequest] = None) -> Response: + """Execute a command synchronously. + + Args: + command_name: The native command name (e.g. ``"get_model_list"``). + command_input: Optional request parameters. + + Returns: + A ``Response`` with ``data`` on success or ``error`` on failure. + """ + logger.debug("Executing command: %s Input: %s", command_name, + command_input.params if command_input else None) + + response = self._execute_command(command_name, command_input) + return response + + def execute_command_with_callback(self, command_name: str, command_input: Optional[InteropRequest], + callback: Callable[[str], None]) -> Response: + """Execute a command with a streaming callback. + + The ``callback`` receives incremental string data from the native layer + (e.g. streaming chat tokens or download progress). + + Args: + command_name: The native command name. + command_input: Optional request parameters. + callback: Called with each incremental string response. + + Returns: + A ``Response`` with ``data`` on success or ``error`` on failure. + """ + logger.debug("Executing command with callback: %s Input: %s", command_name, + command_input.params if command_input else None) + response = self._execute_command(command_name, command_input, callback) + return response + + +def get_cached_model_ids(core_interop: CoreInterop) -> list[str]: + """Get the list of models that have been downloaded and are cached.""" + + response = core_interop.execute_command("get_cached_models") + if response.error is not None: + raise FoundryLocalException(f"Failed to get cached models: {response.error}") + + try: + model_ids = json.loads(response.data) + except json.JSONDecodeError as e: + raise FoundryLocalException(f"Failed to decode JSON response: Response was: {response.data}") from e + + return model_ids + diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py new file mode 100644 index 00000000..189920b1 --- /dev/null +++ b/sdk/python/src/detail/model.py @@ -0,0 +1,143 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from __future__ import annotations + +import logging +from typing import Callable, List, Optional + +from ..imodel import IModel +from ..openai.chat_client import ChatClient +from ..openai.audio_client import AudioClient +from .model_variant import ModelVariant +from ..exception import FoundryLocalException +from .core_interop import CoreInterop +from .model_data_types import ModelInfo + +logger = logging.getLogger(__name__) + + +class Model(IModel): + """A model identified by an alias that groups one or more variants. + + Operations are delegated to the currently selected variant. + """ + + def __init__(self, model_variant: ModelVariant, core_interop: CoreInterop): + self._alias = model_variant.alias + self._variants: List[ModelVariant] = [model_variant] + # Variants are sorted by Core, so the first one added is the default + self._selected_variant = model_variant + self._core_interop = core_interop + + def _add_variant(self, variant: ModelVariant) -> None: + if variant.alias != self._alias: + raise FoundryLocalException( + f"Variant alias {variant.alias} does not match model alias {self._alias}" + ) + + self._variants.append(variant) + + # Prefer the highest priority locally cached variant + if variant.info.cached and not self._selected_variant.info.cached: + self._selected_variant = variant + + def select_variant(self, variant: IModel) -> None: + """ + Select a specific model variant to use for IModel operations. + An IModel from ``variants`` can also be used directly. + + :param variant: IModel to select. Must be one of the variants in ``variants``. + :raises FoundryLocalException: If variant is not valid for this model + """ + matching = next((v for v in self._variants if v.id == variant.id), None) + if matching is None: + raise FoundryLocalException( + "Input variant was not found in Variants." + ) + + self._selected_variant = matching + + @property + def variants(self) -> List[IModel]: + """List of all variants for this model.""" + return list(self._variants) # Return a copy to prevent external modification + + @property + def id(self) -> str: + """Model Id of the currently selected variant.""" + return self._selected_variant.id + + @property + def alias(self) -> str: + """Alias of this model.""" + return self._alias + + @property + def info(self) -> ModelInfo: + """ModelInfo of the currently selected variant.""" + return self._selected_variant.info + + @property + def context_length(self) -> Optional[int]: + """Maximum context length (in tokens) of the currently selected variant.""" + return self._selected_variant.context_length + + @property + def input_modalities(self) -> Optional[str]: + """Comma-separated input modalities of the currently selected variant.""" + return self._selected_variant.input_modalities + + @property + def output_modalities(self) -> Optional[str]: + """Comma-separated output modalities of the currently selected variant.""" + return self._selected_variant.output_modalities + + @property + def capabilities(self) -> Optional[str]: + """Comma-separated capability tags of the currently selected variant.""" + return self._selected_variant.capabilities + + @property + def supports_tool_calling(self) -> Optional[bool]: + """Whether the currently selected variant supports tool/function calling.""" + return self._selected_variant.supports_tool_calling + + @property + def is_cached(self) -> bool: + """Is the currently selected variant cached locally?""" + return self._selected_variant.is_cached + + @property + def is_loaded(self) -> bool: + """Is the currently selected variant loaded in memory?""" + return self._selected_variant.is_loaded + + def download(self, progress_callback: Optional[Callable[[float], None]] = None) -> None: + """Download the currently selected variant.""" + self._selected_variant.download(progress_callback) + + def get_path(self) -> str: + """Get the path to the currently selected variant.""" + return self._selected_variant.get_path() + + def load(self) -> None: + """Load the currently selected variant into memory.""" + self._selected_variant.load() + + def unload(self) -> None: + """Unload the currently selected variant from memory.""" + self._selected_variant.unload() + + def remove_from_cache(self) -> None: + """Remove the currently selected variant from the local cache.""" + self._selected_variant.remove_from_cache() + + def get_chat_client(self) -> ChatClient: + """Get a chat client for the currently selected variant.""" + return self._selected_variant.get_chat_client() + + def get_audio_client(self) -> AudioClient: + """Get an audio client for the currently selected variant.""" + return self._selected_variant.get_audio_client() diff --git a/sdk/python/src/detail/model_data_types.py b/sdk/python/src/detail/model_data_types.py new file mode 100644 index 00000000..46525dc7 --- /dev/null +++ b/sdk/python/src/detail/model_data_types.py @@ -0,0 +1,80 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from typing import Optional, List +from pydantic import BaseModel, Field + +from enum import StrEnum + +# ---------- ENUMS ---------- +class DeviceType(StrEnum): + """Device types supported by model variants.""" + + CPU = "CPU" + GPU = "GPU" + NPU = "NPU" + +# ---------- DATA MODELS ---------- + +class PromptTemplate(BaseModel): + """Prompt template strings for system, user, assistant, and raw prompt roles.""" + + system: Optional[str] = Field(default=None, alias="system") + user: Optional[str] = Field(default=None, alias="user") + assistant: Optional[str] = Field(default=None, alias="assistant") + prompt: Optional[str] = Field(default=None, alias="prompt") + + +class Runtime(BaseModel): + """Runtime configuration specifying the device type and execution provider.""" + + device_type: DeviceType = Field(alias="deviceType") + execution_provider: str = Field(alias="executionProvider") + + +class Parameter(BaseModel): + """A named parameter with an optional string value.""" + + name: str + value: Optional[str] = None + + +class ModelSettings(BaseModel): + """Model-specific settings containing a list of parameters.""" + + parameters: Optional[List[Parameter]] = Field(default=None, alias="parameters") + + +class ModelInfo(BaseModel): + """Catalog metadata for a single model variant. + + Fields are populated from the JSON response of the ``get_model_list`` command. + """ + + id: str = Field(alias="id", description="Unique identifier of the model. Generally :") + name: str = Field(alias="name", description="Model variant name") + version: int = Field(alias="version") + alias: str = Field(..., description="Alias of the model") + display_name: Optional[str] = Field(default=None, alias="displayName") + provider_type: str = Field(alias="providerType") + uri: str = Field(alias="uri") + model_type: str = Field(alias="modelType") + prompt_template: Optional[PromptTemplate] = Field(default=None, alias="promptTemplate") + publisher: Optional[str] = Field(default=None, alias="publisher") + model_settings: Optional[ModelSettings] = Field(default=None, alias="modelSettings") + license: Optional[str] = Field(default=None, alias="license") + license_description: Optional[str] = Field(default=None, alias="licenseDescription") + cached: bool = Field(alias="cached") + task: Optional[str] = Field(default=None, alias="task") + runtime: Optional[Runtime] = Field(default=None, alias="runtime") + file_size_mb: Optional[int] = Field(default=None, alias="fileSizeMb") + supports_tool_calling: Optional[bool] = Field(default=None, alias="supportsToolCalling") + max_output_tokens: Optional[int] = Field(default=None, alias="maxOutputTokens") + min_fl_version: Optional[str] = Field(default=None, alias="minFLVersion") + created_at_unix: int = Field(alias="createdAt") + context_length: Optional[int] = Field(default=None, alias="contextLength") + input_modalities: Optional[str] = Field(default=None, alias="inputModalities") + output_modalities: Optional[str] = Field(default=None, alias="outputModalities") + capabilities: Optional[str] = Field(default=None, alias="capabilities") diff --git a/sdk/python/src/detail/model_load_manager.py b/sdk/python/src/detail/model_load_manager.py new file mode 100644 index 00000000..8ffd087a --- /dev/null +++ b/sdk/python/src/detail/model_load_manager.py @@ -0,0 +1,166 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from __future__ import annotations + +import json +import logging +import requests + +from typing import List +from urllib.parse import quote + +from ..exception import FoundryLocalException +from ..version import __version__ as sdk_version +from .core_interop import CoreInterop, InteropRequest + +logger = logging.getLogger(__name__) + + +class ModelLoadManager: + """Manages loading and unloading of models in Foundry Local. + + Can operate in two modes: direct interop with Foundry Local Core, or via + an external web service if the configuration provides a + ``WebServiceExternalUrl`` value. + """ + + _headers = {"user-agent": f"foundry-local-python-sdk/{sdk_version}"} + + def __init__(self, core_interop: CoreInterop, external_service_url: str = None): + self._core_interop = core_interop + self._external_service_url = external_service_url + + def load(self, model_id: str) -> None: + """ + Load a model by its ID. + + This method loads a model either via direct interop with Foundry Local Core + or, if an external service URL is configured, by calling the external web + service. + + :param model_id: The ID of the model to load. + :raises FoundryLocalException: If the model cannot be loaded successfully, + for example due to an error returned from Foundry Local Core or from + the external service, including underlying HTTP or network errors when + communicating with the external service. + """ + if self._external_service_url: + self._web_load_model(model_id) + return + + request = InteropRequest({"Model": model_id}) + response = self._core_interop.execute_command("load_model", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to load model {model_id}: {response.error}") + + def unload(self, model_id: str) -> None: + """ + Unload a model by its ID. + :param model_id: The ID of the model to unload. + """ + if self._external_service_url: + self._web_unload_model(model_id) + return + + request = InteropRequest({"Model": model_id}) + response = self._core_interop.execute_command("unload_model", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to unload model {model_id}: {response.error}") + + def list_loaded(self) -> list[str]: + """ + List loaded models. + :return: List of loaded model IDs + """ + if self._external_service_url: + return self._web_list_loaded_models() + + response = self._core_interop.execute_command("list_loaded_models") + if response.error is not None: + raise FoundryLocalException(f"Failed to list loaded models: {response.error}") + + try: + model_ids = json.loads(response.data) + except json.JSONDecodeError as e: + raise FoundryLocalException(f"Failed to decode JSON response: Response was: {response.data}") from e + + return model_ids + + def _web_list_loaded_models(self) -> List[str]: + try: + response = requests.get(f"{self._external_service_url}/models/loaded", headers=self._headers, timeout=10) + + if not response.ok: + raise FoundryLocalException( + f"Error listing loaded models from {self._external_service_url}: {response.reason}" + ) + + content = response.text + logger.debug("Loaded models json from %s: %s", self._external_service_url, content) + + model_list = json.loads(content) + return model_list if model_list is not None else [] + except requests.RequestException as e: + raise FoundryLocalException( + f"HTTP request failed when listing loaded models from {self._external_service_url}" + ) from e + except json.JSONDecodeError as e: + raise FoundryLocalException(f"Failed to decode JSON response: Response was: {content}") from e + + def _web_load_model(self, model_id: str) -> None: + """ + Load a model via the external web service. + + :param model_id: The ID of the model to load + :raises FoundryLocalException: If the HTTP request fails or response is invalid + """ + try: + encoded_model_id = quote(model_id) + url = f"{self._external_service_url}/models/load/{encoded_model_id}" + + # Future: add query params like load timeout + # query_params = { + # # "timeout": "30" + # } + # response = requests.get(url, params=query_params) + + response = requests.get(url, headers=self._headers, timeout=10) + + if not response.ok: + raise FoundryLocalException( + f"Error loading model {model_id} from {self._external_service_url}: " + f"{response.reason}" + ) + + content = response.text + logger.info("Model %s loaded successfully from %s: %s", + model_id, self._external_service_url, content) + + except requests.RequestException as e: + raise FoundryLocalException( + f"HTTP request failed when loading model {model_id} from {self._external_service_url}: {e}" + ) from e + + def _web_unload_model(self, model_id: str) -> None: + try: + encoded_model_id = quote(model_id) + url = f"{self._external_service_url}/models/unload/{encoded_model_id}" + + response = requests.get(url, headers=self._headers, timeout=10) + + if not response.ok: + raise FoundryLocalException( + f"Error unloading model {model_id} from {self._external_service_url}: " + f"{response.reason}" + ) + + content = response.text + logger.info("Model %s unloaded successfully from %s: %s", + model_id, self._external_service_url, content) + + except requests.RequestException as e: + raise FoundryLocalException( + f"HTTP request failed when unloading model {model_id} from {self._external_service_url}: {e}" + ) from e diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py new file mode 100644 index 00000000..a5ac02d4 --- /dev/null +++ b/sdk/python/src/detail/model_variant.py @@ -0,0 +1,172 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from __future__ import annotations + +import logging +from typing import Callable, List, Optional + +from ..imodel import IModel +from ..exception import FoundryLocalException + +from .core_interop import CoreInterop, InteropRequest +from .model_data_types import ModelInfo +from .core_interop import get_cached_model_ids +from .model_load_manager import ModelLoadManager +from ..openai.audio_client import AudioClient +from ..openai.chat_client import ChatClient + +logger = logging.getLogger(__name__) + + +class ModelVariant(IModel): + """A specific variant of a model (e.g. a particular device type, version, or quantization). + + Implements ``IModel`` and provides download, cache, load/unload, and + client-creation operations for a single model variant. + """ + + def __init__(self, model_info: ModelInfo, model_load_manager: ModelLoadManager, core_interop: CoreInterop): + """Initialize a ModelVariant. + + Args: + model_info: Catalog metadata for this variant. + model_load_manager: Manager for loading/unloading models. + core_interop: Native interop layer for Foundry Local Core. + """ + self._model_info = model_info + self._model_load_manager = model_load_manager + self._core_interop = core_interop + + self._id = model_info.id + self._alias = model_info.alias + + @property + def id(self) -> str: + """Unique model variant ID (e.g. ``name:version``).""" + return self._id + + @property + def alias(self) -> str: + """Model alias shared across variants.""" + return self._alias + + @property + def info(self) -> ModelInfo: + """Full catalog metadata for this variant.""" + return self._model_info + + @property + def context_length(self) -> Optional[int]: + """Maximum context length (in tokens) supported by this variant, or ``None`` if unknown.""" + return self._model_info.context_length + + @property + def variants(self) -> List[IModel]: + """A ModelVariant is a single variant, so variants returns itself.""" + return [self] + + def select_variant(self, variant: IModel) -> None: + """SelectVariant is not supported on a ModelVariant. + + Call ``Catalog.get_model()`` to get an IModel with all variants available. + + :raises FoundryLocalException: Always. + """ + raise FoundryLocalException( + f"select_variant is not supported on a ModelVariant. " + f'Call Catalog.get_model("{self._alias}") to get an IModel with all variants available.' + ) + + @property + def input_modalities(self) -> Optional[str]: + """Comma-separated input modalities (e.g. ``"text,image"``), or ``None`` if unknown.""" + return self._model_info.input_modalities + + @property + def output_modalities(self) -> Optional[str]: + """Comma-separated output modalities (e.g. ``"text"``), or ``None`` if unknown.""" + return self._model_info.output_modalities + + @property + def capabilities(self) -> Optional[str]: + """Comma-separated capability tags (e.g. ``"chat,completion"``), or ``None`` if unknown.""" + return self._model_info.capabilities + + @property + def supports_tool_calling(self) -> Optional[bool]: + """Whether this variant supports tool/function calling, or ``None`` if unknown.""" + return self._model_info.supports_tool_calling + + @property + def is_cached(self) -> bool: + """``True`` if this variant is present in the local model cache.""" + cached_model_ids = get_cached_model_ids(self._core_interop) + return self.id in cached_model_ids + + @property + def is_loaded(self) -> bool: + """``True`` if this variant is currently loaded into memory.""" + loaded_model_ids = self._model_load_manager.list_loaded() + return self.id in loaded_model_ids + + def download(self, progress_callback: Callable[[float], None] = None): + """Download this variant to the local cache. + + Args: + progress_callback: Optional callback receiving download progress as a + percentage (0.0 to 100.0). + """ + request = InteropRequest(params={"Model": self.id}) + if progress_callback is None: + response = self._core_interop.execute_command("download_model", request) + else: + response = self._core_interop.execute_command_with_callback( + "download_model", request, + lambda pct_str: progress_callback(float(pct_str)) + ) + + logger.info("Download response: %s", response) + if response.error is not None: + raise FoundryLocalException(f"Failed to download model: {response.error}") + + def get_path(self) -> str: + """Get the local file-system path to this variant if cached. + + Returns: + Path to the model directory. + + Raises: + FoundryLocalException: If the model path cannot be retrieved. + """ + request = InteropRequest(params={"Model": self.id}) + response = self._core_interop.execute_command("get_model_path", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to get model path: {response.error}") + + return response.data + + def load(self) -> None: + """Load this variant into memory for inference.""" + self._model_load_manager.load(self.id) + + def remove_from_cache(self) -> None: + """Remove this variant from the local model cache.""" + request = InteropRequest(params={"Model": self.id}) + response = self._core_interop.execute_command("remove_cached_model", request) + if response.error is not None: + raise FoundryLocalException(f"Failed to remove model from cache: {response.error}") + + + def unload(self) -> None: + """Unload this variant from memory.""" + self._model_load_manager.unload(self.id) + + def get_chat_client(self) -> ChatClient: + """Create an OpenAI-compatible ``ChatClient`` for this variant.""" + return ChatClient(self.id, self._core_interop) + + def get_audio_client(self) -> AudioClient: + """Create an OpenAI-compatible ``AudioClient`` for this variant.""" + return AudioClient(self.id, self._core_interop) \ No newline at end of file diff --git a/sdk/python/src/detail/utils.py b/sdk/python/src/detail/utils.py new file mode 100644 index 00000000..5a054610 --- /dev/null +++ b/sdk/python/src/detail/utils.py @@ -0,0 +1,294 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Utility functions for the Foundry Local SDK. + +Includes native library locator logic and helper functions used by +other SDK modules. +""" + +from __future__ import annotations + +import argparse +import importlib.util +import json +import logging +import os +import sys + +from dataclasses import dataclass +from pathlib import Path + +from enum import StrEnum +from ..exception import FoundryLocalException + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Platform helpers +# --------------------------------------------------------------------------- + +# Maps Python sys.platform to native shared library extension +EXT_MAP: dict[str, str] = { + "win32": ".dll", + "linux": ".so", + "darwin": ".dylib", +} + + +def _get_ext() -> str: + """Get the native library file extension for the current platform.""" + for plat_prefix, ext in EXT_MAP.items(): + if sys.platform.startswith(plat_prefix): + return ext + raise RuntimeError(f"Unsupported platform: {sys.platform}") + + +# --------------------------------------------------------------------------- +# Package-based binary discovery +# --------------------------------------------------------------------------- + +# On Linux/macOS the ORT shared libraries carry the "lib" prefix while the +# Core library refers to them without it — a symlink "onnxruntime.dll" → +# "libonnxruntime.so/.dylib" is created to bridge the gap (see below). +_ORT_PREFIX = "" if sys.platform == "win32" else "lib" + + +def _native_binary_names() -> tuple[str, str, str]: + """Return the expected native binary filenames for the current platform.""" + ext = _get_ext() + return ( + f"Microsoft.AI.Foundry.Local.Core{ext}", + f"{_ORT_PREFIX}onnxruntime{ext}", + f"{_ORT_PREFIX}onnxruntime-genai{ext}", + ) + + +def _find_file_in_package(package_name: str, filename: str) -> Path | None: + """Locate a native binary *filename* inside an installed Python package. + + Searches the package root and common sub-directories (``capi/``, + ``native/``, ``lib/``). Falls back to a recursive ``rglob`` scan of + the entire package tree when none of the quick paths match. + + Args: + package_name: The PyPI package name (hyphens or underscores accepted; + e.g. ``"onnxruntime-genai-core"`` or ``"onnxruntime_genai_core"``). + filename: The filename to look for (e.g. ``"onnxruntime-genai.dll"``). + + Returns: + Absolute ``Path`` to the file, or ``None`` if not found. + """ + import_name = package_name.replace("-", "_") + spec = importlib.util.find_spec(import_name) + if spec is None or spec.origin is None: + return None + + pkg_root = Path(spec.origin).parent + + # Quick checks for well-known sub-directories first + for candidate_dir in (pkg_root, pkg_root / "capi", pkg_root / "native", pkg_root / "lib", pkg_root / "bin"): + candidate = candidate_dir / filename + if candidate.exists(): + return candidate + + # Recursive fallback + for match in pkg_root.rglob(filename): + return match + + return None + + +@dataclass +class NativeBinaryPaths: + """Resolved paths to the three native binaries required by the SDK.""" + + core: Path + ort: Path + genai: Path + + @property + def core_dir(self) -> Path: + """Directory that contains the Core binary.""" + return self.core.parent + + @property + def ort_dir(self) -> Path: + """Directory that contains the OnnxRuntime binary.""" + return self.ort.parent + + @property + def genai_dir(self) -> Path: + """Directory that contains the OnnxRuntimeGenAI binary.""" + return self.genai.parent + + def all_dirs(self) -> list[Path]: + """Return a deduplicated list of directories that contain the binaries.""" + seen: list[Path] = [] + for d in (self.core_dir, self.ort_dir, self.genai_dir): + if d not in seen: + seen.append(d) + return seen + + +def get_native_binary_paths() -> NativeBinaryPaths | None: + """Locate native binaries from installed Python packages. + + Returns: + A :class:`NativeBinaryPaths` instance if all three binaries were + found, or ``None`` if any is missing. + """ + core_name, ort_name, genai_name = _native_binary_names() + + # Probe WinML packages first; fall back to standard if not installed. + core_path = _find_file_in_package("foundry-local-core-winml", core_name) or _find_file_in_package("foundry-local-core", core_name) + ort_path = _find_file_in_package("onnxruntime-core", ort_name) + genai_path = _find_file_in_package("onnxruntime-genai-core", genai_name) + + if core_path and ort_path and genai_path: + return NativeBinaryPaths(core=core_path, ort=ort_path, genai=genai_path) + + return None + +def create_ort_symlinks(paths: NativeBinaryPaths) -> None: + """Create compatibility symlinks for ORT in the Core library directory on Linux/macOS. + + Workaround for ORT issue https://github.com/microsoft/onnxruntime/issues/27263. + + On Linux/macOS the native packages ship ORT binaries with a ``lib`` prefix + (e.g. ``libonnxruntime.dylib``) in their own package directories, while the + .NET AOT Core library P/Invokes ``onnxruntime.dylib`` / ``onnxruntime-genai.dylib`` + and searches its *own* directory first (matching the JS SDK behaviour where all + binaries live in a single ``coreDir``). + + This function creates ``onnxruntime{ext}`` and ``onnxruntime-genai{ext}`` symlinks + in ``paths.core_dir`` pointing at the absolute paths of the respective binaries so + the Core DLL can resolve them via ``dlopen`` without needing ``DYLD_LIBRARY_PATH``. + """ + if sys.platform == "win32": + return + + ext = ".dylib" if sys.platform == "darwin" else ".so" + + # Pairs of (actual binary path, link stem to create in core_dir) + links: list[tuple[Path, str]] = [ + (paths.ort, "onnxruntime"), + (paths.genai, "onnxruntime-genai"), + ] + + for src_path, link_stem in links: + link_path = paths.core_dir / f"{link_stem}{ext}" + if not link_path.exists(): + if src_path.exists(): + os.symlink(str(src_path), link_path) + logger.info("Created symlink: %s -> %s", link_path, src_path) + else: + logger.warning("Cannot create symlink %s: source %s not found", link_path, src_path) + + # Create a libonnxruntime symlink in genai_dir pointing to the real ORT + # binary so the dynamic linker can resolve GenAI's dependency. + if paths.genai_dir != paths.ort_dir: + ort_link_in_genai = paths.genai_dir / paths.ort.name + if not ort_link_in_genai.exists(): + if paths.ort.exists(): + os.symlink(str(paths.ort), ort_link_in_genai) + logger.info("Created symlink: %s -> %s", ort_link_in_genai, paths.ort) + else: + logger.warning("Cannot create symlink %s: source %s not found", + ort_link_in_genai, paths.ort) + + +# --------------------------------------------------------------------------- +# CLI entry point for verifying native binary installation +# --------------------------------------------------------------------------- + + +def foundry_local_install(args: list[str] | None = None) -> None: + """CLI entry point for installing and verifying native binaries. + + Usage:: + + foundry-local-install [--winml] [--verbose] + + Installs the platform-specific native libraries required by the SDK via + pip, then verifies they can be located. Use ``--winml`` to install the + WinML variants of the native packages (Windows only). + + Standard variant (default):: + + foundry-local-install + # installs: foundry-local-core, onnxruntime-core, onnxruntime-genai-core + + WinML variant:: + + foundry-local-install --winml + # installs: foundry-local-core-winml, onnxruntime-core, onnxruntime-genai-core + """ + import subprocess + + parser = argparse.ArgumentParser( + description=( + "Install and verify the platform-specific native libraries required by " + "the Foundry Local SDK via pip. Use --winml to install the WinML variants " + "(Windows only). Without --winml the standard cross-platform packages are installed." + ), + prog="foundry-local-install", + ) + parser.add_argument( + "--winml", + action="store_true", + help=( + "Install WinML native package (foundry-local-core-winml) " + "instead of the standard cross-platform package." + ), + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Print the resolved path for each binary after installation.", + ) + parsed = parser.parse_args(args) + + if parsed.winml: + variant = "WinML" + packages = ["foundry-local-core-winml", "onnxruntime-core", "onnxruntime-genai-core"] + else: + variant = "standard" + packages = ["foundry-local-core", "onnxruntime-core", "onnxruntime-genai-core"] + + print(f"[foundry-local] Installing {variant} native packages: {', '.join(packages)}") + subprocess.check_call([sys.executable, "-m", "pip", "install", *packages]) + + paths = get_native_binary_paths() + if paths is None: + core_name, ort_name, genai_name = _native_binary_names() + missing: list[str] = [] + if parsed.winml: + if _find_file_in_package("foundry-local-core-winml", core_name) is None: + missing.append("foundry-local-core-winml") + else: + if _find_file_in_package("foundry-local-core", core_name) is None: + missing.append("foundry-local-core") + if _find_file_in_package("onnxruntime-core", ort_name) is None: + missing.append("onnxruntime-core") + if _find_file_in_package("onnxruntime-genai-core", genai_name) is None: + missing.append("onnxruntime-genai-core") + print( + "[foundry-local] ERROR: Could not locate native binaries after installation. " + f"Missing: {', '.join(missing)}", + file=sys.stderr, + ) + hint = "pip install foundry-local-sdk-winml" if parsed.winml else "pip install foundry-local-sdk" + print(f" Try: {hint}", file=sys.stderr) + sys.exit(1) + + print(f"[foundry-local] {variant.capitalize()} native libraries installed and verified.") + if parsed.verbose: + print(f" Core : {paths.core}") + print(f" ORT : {paths.ort}") + print(f" GenAI : {paths.genai}") + + + diff --git a/sdk/python/src/ep_types.py b/sdk/python/src/ep_types.py new file mode 100644 index 00000000..42d84acf --- /dev/null +++ b/sdk/python/src/ep_types.py @@ -0,0 +1,24 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from typing import List + +from pydantic import BaseModel, Field + + +class EpInfo(BaseModel): + """Metadata describing a discoverable execution provider (EP).""" + + name: str = Field(alias="Name") + is_registered: bool = Field(alias="IsRegistered") + + +class EpDownloadResult(BaseModel): + """Result of an explicit EP download and registration operation.""" + + success: bool = Field(alias="Success") + status: str = Field(alias="Status") + registered_eps: List[str] = Field(alias="RegisteredEps") + failed_eps: List[str] = Field(alias="FailedEps") diff --git a/sdk/python/src/exception.py b/sdk/python/src/exception.py new file mode 100644 index 00000000..0cff6a90 --- /dev/null +++ b/sdk/python/src/exception.py @@ -0,0 +1,7 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +class FoundryLocalException(Exception): + """Base exception for Foundry Local SDK errors.""" diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py new file mode 100644 index 00000000..a649f8e5 --- /dev/null +++ b/sdk/python/src/foundry_local_manager.py @@ -0,0 +1,196 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import json +import logging +import threading + +from typing import Callable, List, Optional + +from pydantic import TypeAdapter + +from .catalog import Catalog +from .configuration import Configuration +from .ep_types import EpDownloadResult, EpInfo +from .logging_helper import set_default_logger_severity +from .detail.core_interop import CoreInterop, InteropRequest +from .detail.model_load_manager import ModelLoadManager +from .exception import FoundryLocalException + +logger = logging.getLogger(__name__) + + +class FoundryLocalManager: + """Singleton manager for Foundry Local SDK operations. + + Call ``FoundryLocalManager.initialize(config)`` once at startup, then access + the singleton via ``FoundryLocalManager.instance``. + + Attributes: + instance: The singleton ``FoundryLocalManager`` instance (set after ``initialize``). + catalog: The model ``Catalog`` for discovering and managing models. + urls: Bound URL(s) after ``start_web_service()`` is called, or ``None``. + """ + + _lock = threading.Lock() + instance: FoundryLocalManager = None + + @staticmethod + def initialize(config: Configuration): + """Initialize the Foundry Local SDK with the given configuration. + + This method must be called before using any other part of the SDK. + + Args: + config: Configuration object for the SDK. + """ + # Delegate singleton creation to the constructor, which enforces + # the singleton invariant under a lock and sets `instance`. + FoundryLocalManager(config) + + def __init__(self, config: Configuration): + # Enforce singleton creation under a class-level lock and ensure + # that `FoundryLocalManager.instance` is set exactly once. + with FoundryLocalManager._lock: + if FoundryLocalManager.instance is not None: + raise FoundryLocalException( + "FoundryLocalManager is a singleton and has already been initialized." + ) + config.validate() + self.config = config + self._initialize() + FoundryLocalManager.instance = self + + self.urls = None + + def _initialize(self): + set_default_logger_severity(self.config.log_level) + + external_service_url = self.config.web.external_url if self.config.web else None + + self._core_interop = CoreInterop(self.config) + self._model_load_manager = ModelLoadManager(self._core_interop, external_service_url) + self.catalog = Catalog(self._model_load_manager, self._core_interop) + + def discover_eps(self) -> list[EpInfo]: + """Discover available execution providers and their registration status. + + Returns: + List of ``EpInfo`` entries for all discoverable EPs. + + Raises: + FoundryLocalException: If EP discovery fails or response JSON is invalid. + """ + response = self._core_interop.execute_command("discover_eps") + if response.error is not None: + raise FoundryLocalException(f"Error discovering execution providers: {response.error}") + + try: + adapter = TypeAdapter(List[EpInfo]) + return adapter.validate_json(response.data or "[]") + except Exception as e: + raise FoundryLocalException( + f"Failed to decode JSON response from discover_eps: {e}. Response was: {response.data}" + ) from e + + def download_and_register_eps( + self, + names: Optional[list[str]] = None, + progress_callback: Optional[Callable[[str, float], None]] = None, + ) -> EpDownloadResult: + """Download and register execution providers. + + Args: + names: Optional subset of EP names to download. If omitted or empty, + all discoverable EPs are downloaded. + progress_callback: Optional callback ``(ep_name: str, percent: float) -> None`` + invoked as each EP downloads. ``percent`` is 0-100. + + Returns: + ``EpDownloadResult`` describing operation status and per-EP outcomes. + + Raises: + FoundryLocalException: If the operation fails or response JSON is invalid. + """ + request = None + if names is not None and len(names) > 0: + request = InteropRequest(params={"Names": ",".join(names)}) + + if progress_callback is not None: + def _on_chunk(chunk: str) -> None: + sep = chunk.find("|") + if sep >= 0: + ep_name = chunk[:sep] or "" + try: + percent = float(chunk[sep + 1:]) + progress_callback(ep_name, percent) + except ValueError: + pass + + response = self._core_interop.execute_command_with_callback( + "download_and_register_eps", request, _on_chunk + ) + else: + response = self._core_interop.execute_command("download_and_register_eps", request) + + if response.error is not None: + raise FoundryLocalException(f"Error downloading execution providers: {response.error}") + + if response.data: + try: + adapter = TypeAdapter(EpDownloadResult) + ep_result = adapter.validate_json(response.data) + except Exception as e: + raise FoundryLocalException( + "Failed to decode JSON response from download_and_register_eps: " + f"{e}. Response was: {response.data}" + ) from e + else: + ep_result = EpDownloadResult( + Success=True, Status="Completed", RegisteredEps=[], FailedEps=[] + ) + + # Invalidate the catalog cache if any EP was newly registered so the next access + # re-fetches models with the updated set of available EPs. + if ep_result.success or len(ep_result.registered_eps) > 0: + self.catalog._invalidate_cache() + + return ep_result + + def start_web_service(self): + """Start the optional web service. + + If provided, the service will be bound to the value of Configuration.web.urls. + The default of http://127.0.0.1:0 will be used otherwise, which binds to a random ephemeral port. + + FoundryLocalManager.urls will be updated with the actual URL/s the service is listening on. + """ + with FoundryLocalManager._lock: + response = self._core_interop.execute_command("start_service") + + if response.error is not None: + raise FoundryLocalException(f"Error starting web service: {response.error}") + + bound_urls = json.loads(response.data) + if bound_urls is None or len(bound_urls) == 0: + raise FoundryLocalException("Failed to get bound URLs from web service start response.") + + self.urls = bound_urls + + def stop_web_service(self): + """Stop the optional web service.""" + + with FoundryLocalManager._lock: + if self.urls is None: + raise FoundryLocalException("Web service is not running.") + + response = self._core_interop.execute_command("stop_service") + + if response.error is not None: + raise FoundryLocalException(f"Error stopping web service: {response.error}") + + self.urls = None diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py new file mode 100644 index 00000000..8237aeb4 --- /dev/null +++ b/sdk/python/src/imodel.py @@ -0,0 +1,145 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Callable, List, Optional + +from .openai.chat_client import ChatClient +from .openai.audio_client import AudioClient +from .detail.model_data_types import ModelInfo + +class IModel(ABC): + """Abstract interface for a model that can be downloaded, loaded, and used for inference.""" + + @property + @abstractmethod + def id(self) -> str: + """Unique model id.""" + pass + + @property + @abstractmethod + def alias(self) -> str: + """Model alias.""" + pass + + @property + @abstractmethod + def info(self) -> ModelInfo: + """Full model metadata.""" + pass + + @property + @abstractmethod + def is_cached(self) -> bool: + """True if the model is present in the local cache.""" + pass + + @property + @abstractmethod + def is_loaded(self) -> bool: + """True if the model is loaded into memory.""" + pass + + @property + @abstractmethod + def context_length(self) -> Optional[int]: + """Maximum context length (in tokens) supported by the model, or ``None`` if unknown.""" + pass + + @property + @abstractmethod + def input_modalities(self) -> Optional[str]: + """Comma-separated input modalities (e.g. ``"text,image"``), or ``None`` if unknown.""" + pass + + @property + @abstractmethod + def output_modalities(self) -> Optional[str]: + """Comma-separated output modalities (e.g. ``"text"``), or ``None`` if unknown.""" + pass + + @property + @abstractmethod + def capabilities(self) -> Optional[str]: + """Comma-separated capability tags (e.g. ``"chat,completion"``), or ``None`` if unknown.""" + pass + + @property + @abstractmethod + def supports_tool_calling(self) -> Optional[bool]: + """Whether the model supports tool/function calling, or ``None`` if unknown.""" + pass + + @abstractmethod + def download(self, progress_callback: Callable[[float], None] = None) -> None: + """ + Download the model to local cache if not already present. + :param progress_callback: Optional callback function for download progress as a percentage (0.0 to 100.0). + """ + pass + + @abstractmethod + def get_path(self) -> str: + """ + Gets the model path if cached. + :return: Path of model directory. + """ + pass + + @abstractmethod + def load(self) -> None: + """ + Load the model into memory if not already loaded. + """ + pass + + @abstractmethod + def remove_from_cache(self) -> None: + """ + Remove the model from the local cache. + """ + pass + + @abstractmethod + def unload(self) -> None: + """ + Unload the model if loaded. + """ + pass + + @abstractmethod + def get_chat_client(self) -> ChatClient: + """ + Get an OpenAI API based ChatClient. + :return: ChatClient instance. + """ + pass + + @abstractmethod + def get_audio_client(self) -> AudioClient: + """ + Get an OpenAI API based AudioClient. + :return: AudioClient instance. + """ + pass + + @property + @abstractmethod + def variants(self) -> List['IModel']: + """Variants of the model that are available. Variants of the model are optimized for different devices.""" + pass + + @abstractmethod + def select_variant(self, variant: 'IModel') -> None: + """ + Select a model variant from ``variants`` to use for IModel operations. + An IModel from ``variants`` can also be used directly. + + :param variant: Model variant to select. Must be one of the variants in ``variants``. + :raises FoundryLocalException: If variant is not valid for this model. + """ + pass diff --git a/sdk/python/src/logging_helper.py b/sdk/python/src/logging_helper.py new file mode 100644 index 00000000..e476f62b --- /dev/null +++ b/sdk/python/src/logging_helper.py @@ -0,0 +1,30 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +import logging + +from enum import StrEnum + +# Map the python logging levels to the Foundry Local Core names +class LogLevel(StrEnum): + VERBOSE = "Verbose" + DEBUG = "Debug" + INFORMATION = "Information" + WARNING = "Warning" + ERROR = "Error" + FATAL = "Fatal" + +LOG_LEVEL_MAP = { + LogLevel.VERBOSE: logging.DEBUG, # No direct equivalent for Trace in Python logging + LogLevel.DEBUG: logging.DEBUG, + LogLevel.INFORMATION: logging.INFO, + LogLevel.WARNING: logging.WARNING, + LogLevel.ERROR: logging.ERROR, + LogLevel.FATAL: logging.CRITICAL, +} + +def set_default_logger_severity(config_level: LogLevel): + py_level = LOG_LEVEL_MAP.get(config_level, logging.INFO) + logger = logging.getLogger(__name__.split(".", maxsplit=1)[0]) + logger.setLevel(py_level) diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py new file mode 100644 index 00000000..e445ba1d --- /dev/null +++ b/sdk/python/src/openai/__init__.py @@ -0,0 +1,10 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""OpenAI-compatible clients for chat completions and audio transcription.""" + +from .chat_client import ChatClient, ChatClientSettings +from .audio_client import AudioClient + +__all__ = ["AudioClient", "ChatClient", "ChatClientSettings"] diff --git a/sdk/python/src/openai/audio_client.py b/sdk/python/src/openai/audio_client.py new file mode 100644 index 00000000..8d3ffa29 --- /dev/null +++ b/sdk/python/src/openai/audio_client.py @@ -0,0 +1,153 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass +from typing import Callable, Optional + +from ..detail.core_interop import CoreInterop, InteropRequest +from ..exception import FoundryLocalException + +logger = logging.getLogger(__name__) + + +class AudioSettings: + """Settings supported by Foundry Local for audio transcription. + + Attributes: + language: Language of the audio (e.g. ``"en"``). + temperature: Sampling temperature (0.0 for deterministic results). + """ + + def __init__( + self, + language: Optional[str] = None, + temperature: Optional[float] = None, + ): + self.language = language + self.temperature = temperature + + +@dataclass +class AudioTranscriptionResponse: + """Response from an audio transcription request. + + Attributes: + text: The transcribed text. + """ + + text: str + + +class AudioClient: + """OpenAI-compatible audio transcription client backed by Foundry Local Core. + + Supports non-streaming and streaming transcription of audio files. + + Attributes: + model_id: The ID of the loaded Whisper model variant. + settings: Tunable ``AudioSettings`` (language, temperature). + """ + + def __init__(self, model_id: str, core_interop: CoreInterop): + self.model_id = model_id + self.settings = AudioSettings() + self._core_interop = core_interop + + @staticmethod + def _validate_audio_file_path(audio_file_path: str) -> None: + """Validate that the audio file path is a non-empty string.""" + if not isinstance(audio_file_path, str) or audio_file_path.strip() == "": + raise ValueError("Audio file path must be a non-empty string.") + + def _create_request_json(self, audio_file_path: str) -> str: + """Build the JSON payload for the ``audio_transcribe`` native command.""" + request: dict = { + "Model": self.model_id, + "FileName": audio_file_path, + } + + metadata: dict[str, str] = {} + + if self.settings.language is not None: + request["Language"] = self.settings.language + metadata["language"] = self.settings.language + + if self.settings.temperature is not None: + request["Temperature"] = self.settings.temperature + metadata["temperature"] = str(self.settings.temperature) + + if metadata: + request["metadata"] = metadata + + return json.dumps(request) + + def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse: + """Transcribe an audio file (non-streaming). + + Args: + audio_file_path: Path to the audio file to transcribe. + + Returns: + An ``AudioTranscriptionResponse`` containing the transcribed text. + + Raises: + ValueError: If *audio_file_path* is not a non-empty string. + FoundryLocalException: If the underlying native transcription command fails. + """ + self._validate_audio_file_path(audio_file_path) + + request_json = self._create_request_json(audio_file_path) + request = InteropRequest(params={"OpenAICreateRequest": request_json}) + + response = self._core_interop.execute_command("audio_transcribe", request) + if response.error is not None: + raise FoundryLocalException( + f"Audio transcription failed for model '{self.model_id}': {response.error}" + ) + + data = json.loads(response.data) + return AudioTranscriptionResponse(text=data.get("text", "")) + + def transcribe_streaming( + self, + audio_file_path: str, + callback: Callable[[AudioTranscriptionResponse], None], + ) -> None: + """Transcribe an audio file with streaming chunks. + + Each chunk is passed to *callback* as an ``AudioTranscriptionResponse``. + + Args: + audio_file_path: Path to the audio file to transcribe. + callback: Called with each incremental transcription chunk. + + Raises: + ValueError: If *audio_file_path* is not a non-empty string. + FoundryLocalException: If the underlying native transcription command fails. + """ + self._validate_audio_file_path(audio_file_path) + + if not callable(callback): + raise TypeError("Callback must be a valid function.") + + request_json = self._create_request_json(audio_file_path) + request = InteropRequest(params={"OpenAICreateRequest": request_json}) + + def callback_handler(chunk_str: str): + chunk_data = json.loads(chunk_str) + chunk = AudioTranscriptionResponse(text=chunk_data.get("text", "")) + callback(chunk) + + response = self._core_interop.execute_command_with_callback( + "audio_transcribe", request, callback_handler + ) + if response.error is not None: + raise FoundryLocalException( + f"Streaming audio transcription failed for model '{self.model_id}': {response.error}" + ) \ No newline at end of file diff --git a/sdk/python/src/openai/chat_client.py b/sdk/python/src/openai/chat_client.py new file mode 100644 index 00000000..0b0d58bc --- /dev/null +++ b/sdk/python/src/openai/chat_client.py @@ -0,0 +1,290 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +from __future__ import annotations + +import logging +import json +import queue +import threading + +from ..detail.core_interop import CoreInterop, InteropRequest +from ..exception import FoundryLocalException +from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam +from openai.types.chat.completion_create_params import CompletionCreateParamsBase, \ + CompletionCreateParamsNonStreaming, \ + CompletionCreateParamsStreaming +from openai.types.chat import ChatCompletion +from openai.types.chat.chat_completion_chunk import ChatCompletionChunk +from typing import Any, Dict, Generator, List, Optional + +logger = logging.getLogger(__name__) + + +class ChatClientSettings: + """Settings for chat completion requests. + + Attributes match the OpenAI chat completion API parameters. + Foundry-specific settings (``top_k``, ``random_seed``) are sent via metadata. + """ + + def __init__( + self, + frequency_penalty: Optional[float] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + temperature: Optional[float] = None, + presence_penalty: Optional[float] = None, + random_seed: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + response_format: Optional[Dict[str, Any]] = None, + tool_choice: Optional[Dict[str, Any]] = None, + ): + self.frequency_penalty = frequency_penalty + self.max_tokens = max_tokens + self.n = n + self.temperature = temperature + self.presence_penalty = presence_penalty + self.random_seed = random_seed + self.top_k = top_k + self.top_p = top_p + self.response_format = response_format + self.tool_choice = tool_choice + + def _serialize(self) -> Dict[str, Any]: + """Serialize settings into an OpenAI-compatible request dict.""" + self._validate_response_format(self.response_format) + self._validate_tool_choice(self.tool_choice) + + result: Dict[str, Any] = { + k: v for k, v in { + "frequency_penalty": self.frequency_penalty, + "max_tokens": self.max_tokens, + "n": self.n, + "presence_penalty": self.presence_penalty, + "temperature": self.temperature, + "top_p": self.top_p, + "response_format": self.response_format, + "tool_choice": self.tool_choice, + }.items() if v is not None + } + + metadata: Dict[str, str] = {} + if self.top_k is not None: + metadata["top_k"] = str(self.top_k) + if self.random_seed is not None: + metadata["random_seed"] = str(self.random_seed) + + if metadata: + result["metadata"] = metadata + + return result + + def _validate_response_format(self, response_format: Optional[Dict[str, Any]]) -> None: + if response_format is None: + return + valid_types = ["text", "json_object", "json_schema", "lark_grammar"] + fmt_type = response_format.get("type") + if fmt_type not in valid_types: + raise ValueError(f"ResponseFormat type must be one of: {', '.join(valid_types)}") + grammar_types = ["json_schema", "lark_grammar"] + if fmt_type in grammar_types: + if fmt_type == "json_schema" and ( + not isinstance(response_format.get("json_schema"), str) + or not response_format["json_schema"].strip() + ): + raise ValueError('ResponseFormat with type "json_schema" must have a valid json_schema string.') + if fmt_type == "lark_grammar" and ( + not isinstance(response_format.get("lark_grammar"), str) + or not response_format["lark_grammar"].strip() + ): + raise ValueError('ResponseFormat with type "lark_grammar" must have a valid lark_grammar string.') + elif response_format.get("json_schema") or response_format.get("lark_grammar"): + raise ValueError( + f'ResponseFormat with type "{fmt_type}" should not have json_schema or lark_grammar properties.' + ) + + def _validate_tool_choice(self, tool_choice: Optional[Dict[str, Any]]) -> None: + if tool_choice is None: + return + valid_types = ["none", "auto", "required", "function"] + choice_type = tool_choice.get("type") + if choice_type not in valid_types: + raise ValueError(f"ToolChoice type must be one of: {', '.join(valid_types)}") + if choice_type == "function" and ( + not isinstance(tool_choice.get("name"), str) or not tool_choice.get("name", "").strip() + ): + raise ValueError('ToolChoice with type "function" must have a valid name string.') + elif choice_type != "function" and tool_choice.get("name"): + raise ValueError(f'ToolChoice with type "{choice_type}" should not have a name property.') + +class ChatClient: + """OpenAI-compatible chat completions client backed by Foundry Local Core. + + Supports non-streaming and streaming completions with optional tool calling. + + Attributes: + model_id: The ID of the loaded model variant. + settings: Tunable ``ChatClientSettings`` (temperature, max tokens, etc.). + """ + + def __init__(self, model_id: str, core_interop: CoreInterop): + self.model_id = model_id + self.settings = ChatClientSettings() + self._core_interop = core_interop + + def _validate_messages(self, messages: List[ChatCompletionMessageParam]) -> None: + """Validate the messages list before sending to the native layer.""" + if not messages: + raise ValueError("messages must be a non-empty list.") + for i, msg in enumerate(messages): + if not isinstance(msg, dict): + raise ValueError(f"messages[{i}] must be a dict, got {type(msg).__name__}.") + if "role" not in msg: + raise ValueError(f"messages[{i}] is missing required key 'role'.") + if "content" not in msg: + raise ValueError(f"messages[{i}] is missing required key 'content'.") + + def _validate_tools(self, tools: Optional[List[Dict[str, Any]]]) -> None: + """Validate the tools list before sending to the native layer.""" + if not tools: + return + if not isinstance(tools, list): + raise ValueError("tools must be a list if provided.") + for i, tool in enumerate(tools): + if not isinstance(tool, dict) or not tool: + raise ValueError( + f"tools[{i}] must be a non-null object with a valid 'type' and 'function' definition." + ) + if not isinstance(tool.get("type"), str) or not tool["type"].strip(): + raise ValueError(f"tools[{i}] must have a 'type' property that is a non-empty string.") + fn = tool.get("function") + if not isinstance(fn, dict): + raise ValueError(f"tools[{i}] must have a 'function' property that is a non-empty object.") + if not isinstance(fn.get("name"), str) or not fn["name"].strip(): + raise ValueError( + f"tools[{i}]'s function must have a 'name' property that is a non-empty string." + ) + + def _create_request( + self, + messages: List[ChatCompletionMessageParam], + streaming: bool, + tools: Optional[List[Dict[str, Any]]] = None, + ) -> str: + request: Dict[str, Any] = { + "model": self.model_id, + "messages": messages, + **({ + "tools": tools} if tools else {}), + **({ + "stream": True} if streaming else {}), + **self.settings._serialize(), + } + + if streaming: + chat_request = CompletionCreateParamsStreaming(request) + else: + chat_request = CompletionCreateParamsNonStreaming(request) + + return json.dumps(chat_request) + + def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optional[List[Dict[str, Any]]] = None): + """Perform a non-streaming chat completion. + + Args: + messages: Conversation history as a list of OpenAI message dicts. + tools: Optional list of tool definitions for function calling. + + Returns: + A ``ChatCompletion`` response. + + Raises: + ValueError: If messages is None, empty, or contains malformed entries. + FoundryLocalException: If the native command returns an error. + """ + self._validate_messages(messages) + self._validate_tools(tools) + chat_request_json = self._create_request(messages, streaming=False, tools=tools) + + # Send the request to the chat API + request = InteropRequest(params={"OpenAICreateRequest": chat_request_json}) + response = self._core_interop.execute_command("chat_completions", request) + if response.error is not None: + raise FoundryLocalException(f"Error during chat completion: {response.error}") + + completion = ChatCompletion.model_validate_json(response.data) + + return completion + + def _stream_chunks(self, chat_request_json: str) -> Generator[ChatCompletionChunk, None, None]: + """Background-thread generator that yields parsed chunks from the native streaming call.""" + _SENTINEL = object() + chunk_queue: queue.Queue = queue.Queue() + errors: List[Exception] = [] + + def _on_chunk(response_str: str) -> None: + raw = json.loads(response_str) + # Foundry Local returns tool call chunks with "message.tool_calls" instead + # of the standard streaming "delta.tool_calls". Normalize to delta format + # so ChatCompletionChunk parses correctly. + for choice in raw.get("choices", []): + if "message" in choice and "delta" not in choice: + msg = choice.pop("message") + # ChoiceDeltaToolCall requires "index"; add if missing + for i, tc in enumerate(msg.get("tool_calls", [])): + tc.setdefault("index", i) + choice["delta"] = msg + chunk_queue.put(ChatCompletionChunk.model_validate(raw)) + + def _run() -> None: + try: + resp = self._core_interop.execute_command_with_callback( + "chat_completions", + InteropRequest(params={"OpenAICreateRequest": chat_request_json}), + _on_chunk, + ) + if resp.error is not None: + errors.append(FoundryLocalException(f"Error during streaming chat completion: {resp.error}")) + except Exception as exc: + errors.append(exc) + finally: + chunk_queue.put(_SENTINEL) + + threading.Thread(target=_run, daemon=True).start() + while (item := chunk_queue.get()) is not _SENTINEL: + yield item + if errors: + raise errors[0] + + def complete_streaming_chat( + self, + messages: List[ChatCompletionMessageParam], + tools: Optional[List[Dict[str, Any]]] = None, + ) -> Generator[ChatCompletionChunk, None, None]: + """Perform a streaming chat completion, yielding chunks as they arrive. + + Consume with a standard ``for`` loop:: + + for chunk in client.complete_streaming_chat(messages): + if chunk.choices[0].delta.content: + print(chunk.choices[0].delta.content, end="", flush=True) + + Args: + messages: Conversation history as a list of OpenAI message dicts. + tools: Optional list of tool definitions for function calling. + + Returns: + A generator of ``ChatCompletionChunk`` objects. + + Raises: + ValueError: If messages or tools are malformed. + FoundryLocalException: If the native layer returns an error. + """ + self._validate_messages(messages) + self._validate_tools(tools) + chat_request_json = self._create_request(messages, streaming=True, tools=tools) + return self._stream_chunks(chat_request_json) diff --git a/sdk/python/src/version.py b/sdk/python/src/version.py new file mode 100644 index 00000000..ba1036bb --- /dev/null +++ b/sdk/python/src/version.py @@ -0,0 +1,6 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +__version__ = "1.0.0.dev0" diff --git a/sdk/python/test/README.md b/sdk/python/test/README.md new file mode 100644 index 00000000..92f389a8 --- /dev/null +++ b/sdk/python/test/README.md @@ -0,0 +1,79 @@ +# Foundry Local Python SDK – Test Suite + +This test suite mirrors the structure of the JS (`sdk_v2/js/test/`) and C# (`sdk_v2/cs/test/`) SDK test suites. + +## Prerequisites + +1. **Python 3.10+** (tested with 3.12/3.13) +2. **SDK installed in editable mode** from the `sdk/python` directory: + ```bash + pip install -e . + ``` +3. **Test dependencies**: + ```bash + pip install -r requirements-dev.txt + ``` +4. **Test model data** – the `test-data-shared` folder must exist as a sibling of the git repo root + (e.g. `../test-data-shared` relative to the repo). It should contain cached models for + `qwen2.5-0.5b` and `whisper-tiny`. + +## Running the tests + +From the `sdk/python` directory: + +```bash +# Run all tests +python -m pytest test/ + +# Run with verbose output +python -m pytest test/ -v + +# Run a specific test file +python -m pytest test/test_catalog.py + +# Run a specific test class or function +python -m pytest test/test_catalog.py::TestCatalog::test_should_list_models + +# List all collected tests without running them +python -m pytest test/ --collect-only +``` + +## Test structure + +``` +test/ +├── conftest.py # Shared fixtures & config (equivalent to testUtils.ts) +├── test_foundry_local_manager.py # FoundryLocalManager initialization (2 tests) +├── test_catalog.py # Catalog listing, lookup, error cases (9 tests) +├── test_model.py # Model caching & load/unload lifecycle (2 tests) +├── detail/ +│ └── test_model_load_manager.py # ModelLoadManager core interop & web service (5 tests) +└── openai/ + ├── test_chat_client.py # Chat completions, streaming, error validation (7 tests) + └── test_audio_client.py # Audio transcription (7 tests) +``` + +**Total: 32 tests** + +## Key conventions + +| Concept | Python (pytest) | JS (Mocha) | C# (TUnit) | +|---|---|---|---| +| Shared setup | `conftest.py` (auto-discovered) | `testUtils.ts` (explicit import) | `Utils.cs` (`[Before(Assembly)]`) | +| Session fixture | `@pytest.fixture(scope="session")` | manual singleton | `[Before(Assembly)]` static | +| Teardown | `yield` + cleanup in fixture | `after()` hook | `[After(Assembly)]` | +| Skip in CI | `@skip_in_ci` marker | `IS_RUNNING_IN_CI` + `this.skip()` | `[SkipInCI]` attribute | +| Expected failure | `@pytest.mark.xfail` | N/A | N/A | +| Timeout | `@pytest.mark.timeout(30)` | `this.timeout(30000)` | `[Timeout(30000)]` | + +## CI environment detection + +Tests that require the web service are skipped when either `TF_BUILD=true` (Azure DevOps) or +`GITHUB_ACTIONS=true` is set. + +## Test models + +| Alias | Use | Variant | +|---|---|---| +| `qwen2.5-0.5b` | Chat completions | `qwen2.5-0.5b-instruct-generic-cpu:4` | +| `whisper-tiny` | Audio transcription | `openai-whisper-tiny-generic-cpu:2` | diff --git a/sdk/python/test/__init__.py b/sdk/python/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py new file mode 100644 index 00000000..b7e22c97 --- /dev/null +++ b/sdk/python/test/conftest.py @@ -0,0 +1,145 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Shared test configuration and fixtures for Foundry Local Python SDK tests. + +NOTE: "conftest.py" is a special filename that pytest uses to auto-discover +fixtures and shared utilities. All fixtures defined here are automatically +available to every test file without needing an explicit import. +This serves the same role as testUtils.ts in the JS SDK. +""" + +from __future__ import annotations + +import os +import logging + +import pytest + +from pathlib import Path + +from foundry_local_sdk.configuration import Configuration, LogLevel +from foundry_local_sdk.foundry_local_manager import FoundryLocalManager + +logger = logging.getLogger(__name__) + +TEST_MODEL_ALIAS = "qwen2.5-0.5b" +AUDIO_MODEL_ALIAS = "whisper-tiny" + +def get_git_repo_root() -> Path: + """Walk upward from __file__ until we find a .git directory.""" + current = Path(__file__).resolve().parent + while True: + if (current / ".git").exists(): + return current + parent = current.parent + if parent == current: + raise RuntimeError("Could not find git repo root") + current = parent + + +def get_test_data_shared_path() -> str: + """Return absolute path to the test-data-shared folder (sibling of the repo root).""" + repo_root = get_git_repo_root() + return str(repo_root.parent / "test-data-shared") + + +def is_running_in_ci() -> bool: + """Check TF_BUILD (Azure DevOps) and GITHUB_ACTIONS env vars.""" + azure_devops = os.environ.get("TF_BUILD", "false").lower() == "true" + github_actions = os.environ.get("GITHUB_ACTIONS", "false").lower() == "true" + return azure_devops or github_actions + + +IS_RUNNING_IN_CI = is_running_in_ci() + +skip_in_ci = pytest.mark.skipif(IS_RUNNING_IN_CI, reason="Skipped in CI environments") + + +def get_test_config() -> Configuration: + """Build a Configuration suitable for integration tests.""" + repo_root = get_git_repo_root() + return Configuration( + app_name="FoundryLocalTest", + model_cache_dir=get_test_data_shared_path(), + log_level=LogLevel.WARNING, + logs_dir=str(repo_root / "sdk" / "python" / "logs"), + additional_settings={"Bootstrap": "false"}, + ) + + +def get_multiply_tool(): + """Tool definition for the multiply_numbers function-calling test.""" + return { + "type": "function", + "function": { + "name": "multiply_numbers", + "description": "A tool for multiplying two numbers.", + "parameters": { + "type": "object", + "properties": { + "first": { + "type": "integer", + "description": "The first number in the operation", + }, + "second": { + "type": "integer", + "description": "The second number in the operation", + }, + }, + "required": ["first", "second"], + }, + }, + } + + +# --------------------------------------------------------------------------- +# Session-scoped fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture(scope="session") +def manager(): + """Initialize FoundryLocalManager once for the entire test session.""" + # Reset singleton in case a previous run left state + FoundryLocalManager.instance = None + + config = get_test_config() + FoundryLocalManager.initialize(config) + mgr = FoundryLocalManager.instance + assert mgr is not None, "FoundryLocalManager.initialize did not set instance" + + yield mgr + + # Teardown: unload all loaded models + try: + catalog = mgr.catalog + loaded = catalog.get_loaded_models() + for model_variant in loaded: + try: + model_variant.unload() + except Exception as e: + logger.warning("Failed to unload model %s during teardown: %s", model_variant.id, e) + except Exception as e: + logger.warning("Failed to get loaded models during teardown: %s", e) + + # Reset the singleton so that other test sessions start clean + FoundryLocalManager.instance = None + + +@pytest.fixture(scope="session") +def catalog(manager): + """Return the Catalog from the session-scoped manager.""" + return manager.catalog + + +@pytest.fixture(scope="session") +def core_interop(manager): + """Return the CoreInterop from the session-scoped manager (internal, for component tests).""" + return manager._core_interop + + +@pytest.fixture(scope="session") +def model_load_manager(manager): + """Return the ModelLoadManager from the session-scoped manager (internal, for component tests).""" + return manager._model_load_manager diff --git a/sdk/python/test/detail/__init__.py b/sdk/python/test/detail/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sdk/python/test/detail/test_model_load_manager.py b/sdk/python/test/detail/test_model_load_manager.py new file mode 100644 index 00000000..a5a231e3 --- /dev/null +++ b/sdk/python/test/detail/test_model_load_manager.py @@ -0,0 +1,144 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for ModelLoadManager – mirrors modelLoadManager.test.ts.""" + +from __future__ import annotations + +import pytest + +from foundry_local_sdk.detail.model_load_manager import ModelLoadManager +from ..conftest import TEST_MODEL_ALIAS, IS_RUNNING_IN_CI, skip_in_ci + + +class TestModelLoadManagerCoreInterop: + """ModelLoadManager tests using Core Interop (no external URL).""" + + def _get_model_id(self, catalog) -> str: + """Resolve the variant ID for the test model alias.""" + cached = catalog.get_cached_models() + variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert variant is not None, f"{TEST_MODEL_ALIAS} should be cached" + return variant.id + + def test_should_load_model(self, catalog, core_interop): + """Load model via core interop and verify it appears in loaded list.""" + model_id = self._get_model_id(catalog) + mlm = ModelLoadManager(core_interop) + + mlm.load(model_id) + loaded = mlm.list_loaded() + assert model_id in loaded + + # Cleanup + mlm.unload(model_id) + + def test_should_unload_model(self, catalog, core_interop): + """Load then unload model via core interop.""" + model_id = self._get_model_id(catalog) + mlm = ModelLoadManager(core_interop) + + mlm.load(model_id) + loaded = mlm.list_loaded() + assert model_id in loaded + + mlm.unload(model_id) + loaded = mlm.list_loaded() + assert model_id not in loaded + + def test_should_list_loaded_models(self, catalog, core_interop): + """list_loaded() should return an array containing the loaded model.""" + model_id = self._get_model_id(catalog) + mlm = ModelLoadManager(core_interop) + + mlm.load(model_id) + loaded = mlm.list_loaded() + + assert isinstance(loaded, list) + assert model_id in loaded + + # Cleanup + mlm.unload(model_id) + + +class TestModelLoadManagerExternalService: + """ModelLoadManager tests using external web service URL (skipped in CI).""" + + @skip_in_ci + def test_should_load_and_unload_via_external_service(self, manager, catalog, core_interop): + """Load/unload model through the web service endpoint.""" + cached = catalog.get_cached_models() + variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert variant is not None + model_id = variant.id + + # Start web service + try: + manager.start_web_service() + except Exception as e: + pytest.skip(f"Failed to start web service: {e}") + + urls = manager.urls + if not urls or len(urls) == 0: + pytest.skip("Web service started but no URLs returned") + + service_url = urls[0] + + try: + # Setup: load via core interop + setup_mlm = ModelLoadManager(core_interop) + setup_mlm.load(model_id) + loaded = setup_mlm.list_loaded() + assert model_id in loaded + + # Unload via external service + ext_mlm = ModelLoadManager(core_interop, service_url) + ext_mlm.unload(model_id) + + # Verify via core interop + loaded = setup_mlm.list_loaded() + assert model_id not in loaded + finally: + try: + manager.stop_web_service() + except Exception: + pass + + @skip_in_ci + def test_should_list_loaded_via_external_service(self, manager, catalog, core_interop): + """list_loaded() through the web service endpoint should match core interop.""" + cached = catalog.get_cached_models() + variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert variant is not None + model_id = variant.id + + try: + manager.start_web_service() + except Exception as e: + pytest.skip(f"Failed to start web service: {e}") + + urls = manager.urls + if not urls or len(urls) == 0: + pytest.skip("Web service started but no URLs returned") + + service_url = urls[0] + + try: + # Setup: load via core + setup_mlm = ModelLoadManager(core_interop) + setup_mlm.load(model_id) + + # Verify via external service + ext_mlm = ModelLoadManager(core_interop, service_url) + loaded = ext_mlm.list_loaded() + assert isinstance(loaded, list) + assert model_id in loaded + + # Cleanup + setup_mlm.unload(model_id) + finally: + try: + manager.stop_web_service() + except Exception: + pass diff --git a/sdk/python/test/openai/__init__.py b/sdk/python/test/openai/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sdk/python/test/openai/test_audio_client.py b/sdk/python/test/openai/test_audio_client.py new file mode 100644 index 00000000..f430d8d5 --- /dev/null +++ b/sdk/python/test/openai/test_audio_client.py @@ -0,0 +1,156 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for AudioClient – mirrors audioClient.test.ts.""" + +from __future__ import annotations + +import pytest + +from ..conftest import AUDIO_MODEL_ALIAS, get_git_repo_root + +# Recording.mp3 lives at sdk/testdata/Recording.mp3 relative to the repo root +AUDIO_FILE_PATH = str(get_git_repo_root() / "sdk" / "testdata" / "Recording.mp3") +EXPECTED_TEXT = ( + " And lots of times you need to give people more than one link at a time." + " You a band could give their fans a couple new videos from the live concert" + " behind the scenes photo gallery and album to purchase like these next few links." +) + + +def _get_loaded_audio_model(catalog): + """Helper: ensure the whisper model is selected, loaded, and return Model.""" + cached = catalog.get_cached_models() + assert len(cached) > 0 + + cached_variant = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None) + assert cached_variant is not None, f"{AUDIO_MODEL_ALIAS} should be cached" + + model = catalog.get_model(AUDIO_MODEL_ALIAS) + assert model is not None + + model.select_variant(cached_variant) + model.load() + return model + + +class TestAudioClient: + """Audio Client Tests.""" + + def test_should_transcribe_audio(self, catalog): + """Non-streaming transcription of Recording.mp3.""" + model = _get_loaded_audio_model(catalog) + try: + audio_client = model.get_audio_client() + assert audio_client is not None + + audio_client.settings.language = "en" + audio_client.settings.temperature = 0.0 + + response = audio_client.transcribe(AUDIO_FILE_PATH) + + assert response is not None + assert hasattr(response, "text") + assert isinstance(response.text, str) + assert len(response.text) > 0 + assert response.text == EXPECTED_TEXT + finally: + model.unload() + + def test_should_transcribe_audio_with_temperature(self, catalog): + """Non-streaming transcription with explicit temperature.""" + model = _get_loaded_audio_model(catalog) + try: + audio_client = model.get_audio_client() + assert audio_client is not None + + audio_client.settings.language = "en" + audio_client.settings.temperature = 0.0 + + response = audio_client.transcribe(AUDIO_FILE_PATH) + + assert response is not None + assert isinstance(response.text, str) + assert len(response.text) > 0 + assert response.text == EXPECTED_TEXT + finally: + model.unload() + + def test_should_transcribe_audio_streaming(self, catalog): + """Streaming transcription of Recording.mp3.""" + model = _get_loaded_audio_model(catalog) + try: + audio_client = model.get_audio_client() + assert audio_client is not None + + audio_client.settings.language = "en" + audio_client.settings.temperature = 0.0 + + chunks = [] + + def on_chunk(chunk): + assert chunk is not None + assert hasattr(chunk, "text") + assert isinstance(chunk.text, str) + assert len(chunk.text) > 0 + chunks.append(chunk.text) + + audio_client.transcribe_streaming(AUDIO_FILE_PATH, on_chunk) + + full_text = "".join(chunks) + assert full_text == EXPECTED_TEXT + finally: + model.unload() + + def test_should_transcribe_audio_streaming_with_temperature(self, catalog): + """Streaming transcription with explicit temperature.""" + model = _get_loaded_audio_model(catalog) + try: + audio_client = model.get_audio_client() + assert audio_client is not None + + audio_client.settings.language = "en" + audio_client.settings.temperature = 0.0 + + chunks = [] + + def on_chunk(chunk): + assert chunk is not None + assert isinstance(chunk.text, str) + chunks.append(chunk.text) + + audio_client.transcribe_streaming(AUDIO_FILE_PATH, on_chunk) + + full_text = "".join(chunks) + assert full_text == EXPECTED_TEXT + finally: + model.unload() + + def test_should_raise_for_empty_audio_file_path(self, catalog): + """transcribe('') should raise.""" + model = catalog.get_model(AUDIO_MODEL_ALIAS) + assert model is not None + audio_client = model.get_audio_client() + + with pytest.raises(ValueError, match="Audio file path must be a non-empty string"): + audio_client.transcribe("") + + def test_should_raise_for_streaming_empty_audio_file_path(self, catalog): + """transcribe_streaming('') should raise.""" + model = catalog.get_model(AUDIO_MODEL_ALIAS) + assert model is not None + audio_client = model.get_audio_client() + + with pytest.raises(ValueError, match="Audio file path must be a non-empty string"): + audio_client.transcribe_streaming("", lambda chunk: None) + + def test_should_raise_for_streaming_invalid_callback(self, catalog): + """transcribe_streaming with invalid callback should raise.""" + model = catalog.get_model(AUDIO_MODEL_ALIAS) + assert model is not None + audio_client = model.get_audio_client() + + for invalid_callback in [None, 42, {}, "not a function"]: + with pytest.raises(TypeError, match="Callback must be a valid function"): + audio_client.transcribe_streaming(AUDIO_FILE_PATH, invalid_callback) diff --git a/sdk/python/test/openai/test_chat_client.py b/sdk/python/test/openai/test_chat_client.py new file mode 100644 index 00000000..d96891b9 --- /dev/null +++ b/sdk/python/test/openai/test_chat_client.py @@ -0,0 +1,243 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for ChatClient – mirrors chatClient.test.ts.""" + +from __future__ import annotations + +import json + +import pytest + +from ..conftest import TEST_MODEL_ALIAS, get_multiply_tool + + +def _get_loaded_chat_model(catalog): + """Helper: ensure the test model is selected, loaded, and return Model + ChatClient.""" + cached = catalog.get_cached_models() + assert len(cached) > 0 + + cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert cached_variant is not None, f"{TEST_MODEL_ALIAS} should be cached" + + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + + model.select_variant(cached_variant) + model.load() + return model + + +class TestChatClient: + """Chat Client Tests.""" + + def test_should_perform_chat_completion(self, catalog): + """Non-streaming chat: 7 * 6 should include '42' in the response.""" + model = _get_loaded_chat_model(catalog) + try: + client = model.get_chat_client() + client.settings.max_tokens = 500 + client.settings.temperature = 0.0 # deterministic + + result = client.complete_chat([ + {"role": "user", + "content": "You are a calculator. Be precise. What is the answer to 7 multiplied by 6?"} + ]) + + assert result is not None + assert result.choices is not None + assert len(result.choices) > 0 + assert result.choices[0].message is not None + content = result.choices[0].message.content + assert isinstance(content, str) + assert "42" in content + finally: + model.unload() + + def test_should_perform_streaming_chat_completion(self, catalog): + """Streaming chat: 7 * 6 = 42, then follow-up +25 = 67.""" + model = _get_loaded_chat_model(catalog) + try: + client = model.get_chat_client() + client.settings.max_tokens = 500 + client.settings.temperature = 0.0 + + messages = [ + {"role": "user", + "content": "You are a calculator. Be precise. What is the answer to 7 multiplied by 6?"} + ] + + # ---- First question ---- + chunks = list(client.complete_streaming_chat(messages)) + assert len(chunks) > 0 + first_response = "".join( + c.choices[0].delta.content + for c in chunks + if c.choices and c.choices[0].delta and c.choices[0].delta.content + ) + assert "42" in first_response + + # ---- Follow-up question ---- + messages.append({"role": "assistant", "content": first_response}) + messages.append({"role": "user", "content": "Add 25 to the previous answer. Think hard to be sure of the answer."}) + + chunks = list(client.complete_streaming_chat(messages)) + assert len(chunks) > 0 + second_response = "".join( + c.choices[0].delta.content + for c in chunks + if c.choices and c.choices[0].delta and c.choices[0].delta.content + ) + assert "67" in second_response + finally: + model.unload() + + def test_should_raise_for_empty_messages(self, catalog): + """complete_chat with empty list should raise.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + client = model.get_chat_client() + + with pytest.raises(ValueError): + client.complete_chat([]) + + def test_should_raise_for_none_messages(self, catalog): + """complete_chat with None should raise.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + client = model.get_chat_client() + + with pytest.raises(ValueError): + client.complete_chat(None) + + def test_should_raise_for_streaming_empty_messages(self, catalog): + """complete_streaming_chat with empty list should raise.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + client = model.get_chat_client() + + with pytest.raises(ValueError): + client.complete_streaming_chat([]) + + def test_should_raise_for_streaming_none_messages(self, catalog): + """complete_streaming_chat with None should raise.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + client = model.get_chat_client() + + with pytest.raises(ValueError): + client.complete_streaming_chat(None) + + def test_should_perform_tool_calling_chat_completion(self, catalog): + """Tool calling (non-streaming): model uses multiply_numbers tool to answer 7 * 6.""" + model = _get_loaded_chat_model(catalog) + try: + client = model.get_chat_client() + client.settings.max_tokens = 500 + client.settings.temperature = 0.0 + client.settings.tool_choice = {"type": "required"} + + messages = [ + {"role": "system", "content": "You are a helpful AI assistant. If necessary, you can use any provided tools to answer the question."}, + {"role": "user", "content": "What is the answer to 7 multiplied by 6?"}, + ] + tools = [get_multiply_tool()] + + # First turn: model should respond with a tool call + response = client.complete_chat(messages, tools) + + assert response is not None + assert response.choices is not None + assert len(response.choices) > 0 + assert response.choices[0].finish_reason == "tool_calls" + assert response.choices[0].message is not None + assert response.choices[0].message.tool_calls is not None + assert len(response.choices[0].message.tool_calls) > 0 + + tool_call = response.choices[0].message.tool_calls[0] + assert tool_call.type == "function" + assert tool_call.function.name == "multiply_numbers" + + args = json.loads(tool_call.function.arguments) + assert args["first"] == 7 + assert args["second"] == 6 + + # Second turn: provide tool result and ask model to continue + messages.append({"role": "tool", "content": "7 x 6 = 42."}) + messages.append({"role": "system", "content": "Respond only with the answer generated by the tool."}) + + client.settings.tool_choice = {"type": "auto"} + response = client.complete_chat(messages, tools) + + assert response.choices[0].message.content is not None + assert "42" in response.choices[0].message.content + finally: + model.unload() + + def test_should_perform_tool_calling_streaming_chat_completion(self, catalog): + """Tool calling (streaming): model uses multiply_numbers tool, then continue conversation.""" + model = _get_loaded_chat_model(catalog) + try: + client = model.get_chat_client() + client.settings.max_tokens = 500 + client.settings.temperature = 0.0 + client.settings.tool_choice = {"type": "required"} + + messages = [ + {"role": "system", "content": "You are a helpful AI assistant. If necessary, you can use any provided tools to answer the question."}, + {"role": "user", "content": "What is the answer to 7 multiplied by 6?"}, + ] + tools = [get_multiply_tool()] + + # First turn: collect chunks and find the tool call + chunks = list(client.complete_streaming_chat(messages, tools)) + last_tool_call_chunk = next( + (c for c in reversed(chunks) + if c.choices and c.choices[0].delta and c.choices[0].delta.tool_calls), + None, + ) + assert last_tool_call_chunk is not None + + tool_call_choice = last_tool_call_chunk.choices[0] + assert tool_call_choice.finish_reason == "tool_calls" + + tool_call = tool_call_choice.delta.tool_calls[0] + assert tool_call.type == "function" + assert tool_call.function.name == "multiply_numbers" + + args = json.loads(tool_call.function.arguments) + assert args["first"] == 7 + assert args["second"] == 6 + + # Second turn: provide tool result and continue + messages.append({"role": "tool", "content": "7 x 6 = 42."}) + messages.append({"role": "system", "content": "Respond only with the answer generated by the tool."}) + + client.settings.tool_choice = {"type": "auto"} + + chunks = list(client.complete_streaming_chat(messages, tools)) + second_response = "".join( + c.choices[0].delta.content + for c in chunks + if c.choices and c.choices[0].delta and c.choices[0].delta.content + ) + assert "42" in second_response + finally: + model.unload() + + def test_should_return_generator(self, catalog): + """complete_streaming_chat returns a generator that yields chunks.""" + model = _get_loaded_chat_model(catalog) + try: + client = model.get_chat_client() + client.settings.max_tokens = 50 + client.settings.temperature = 0.0 + + result = client.complete_streaming_chat([{"role": "user", "content": "Say hi."}]) + + assert result is not None + chunks = list(result) + assert len(chunks) > 0 + finally: + model.unload() \ No newline at end of file diff --git a/sdk/python/test/test_catalog.py b/sdk/python/test/test_catalog.py new file mode 100644 index 00000000..2e5968cc --- /dev/null +++ b/sdk/python/test/test_catalog.py @@ -0,0 +1,167 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for Catalog – mirrors catalog.test.ts.""" + +from __future__ import annotations + +import json + +from foundry_local_sdk.catalog import Catalog +from foundry_local_sdk.detail.core_interop import Response + +from .conftest import TEST_MODEL_ALIAS + + +class TestCatalog: + """Catalog Tests.""" + + def test_should_initialize_with_catalog_name(self, catalog): + """Catalog should expose a non-empty name string.""" + assert isinstance(catalog.name, str) + assert len(catalog.name) > 0 + + def test_should_list_models(self, catalog): + """list_models() should return a non-empty list containing the test model.""" + models = catalog.list_models() + assert isinstance(models, list) + assert len(models) > 0 + + # Verify test model is present + aliases = {m.alias for m in models} + assert TEST_MODEL_ALIAS in aliases + + def test_should_get_model_by_alias(self, catalog): + """get_model() should return a Model whose alias matches.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + assert model.alias == TEST_MODEL_ALIAS + + def test_should_return_none_for_empty_alias(self, catalog): + """get_model('') should return None (unknown alias).""" + result = catalog.get_model("") + assert result is None + + def test_should_return_none_for_unknown_alias(self, catalog): + """get_model() with a random alias should return None.""" + result = catalog.get_model("definitely-not-a-real-model-alias-12345") + assert result is None + + def test_should_get_cached_models(self, catalog): + """get_cached_models() should return a list with at least the test model.""" + cached = catalog.get_cached_models() + assert isinstance(cached, list) + assert len(cached) > 0 + + # At least the test model should be cached + aliases = {m.alias for m in cached} + assert TEST_MODEL_ALIAS in aliases + + def test_should_get_model_variant_by_id(self, catalog): + """get_model_variant() with a valid ID should return the variant.""" + cached = catalog.get_cached_models() + assert len(cached) > 0 + variant = cached[0] + + result = catalog.get_model_variant(variant.id) + assert result is not None + assert result.id == variant.id + + def test_should_return_none_for_empty_variant_id(self, catalog): + """get_model_variant('') should return None.""" + result = catalog.get_model_variant("") + assert result is None + + def test_should_return_none_for_unknown_variant_id(self, catalog): + """get_model_variant() with a random ID should return None.""" + result = catalog.get_model_variant("definitely-not-a-real-model-id-12345") + assert result is None + + def test_should_resolve_latest_version_for_model_and_variant_inputs(self): + """get_latest_version() should resolve latest variant and preserve Model input when already latest.""" + + test_model_infos = [ + { + "id": "test-model:3", + "name": "test-model", + "version": 3, + "alias": "test-alias", + "displayName": "Test Model", + "providerType": "test", + "uri": "test://model/3", + "modelType": "ONNX", + "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"}, + "cached": False, + "createdAt": 1700000003, + }, + { + "id": "test-model:2", + "name": "test-model", + "version": 2, + "alias": "test-alias", + "displayName": "Test Model", + "providerType": "test", + "uri": "test://model/2", + "modelType": "ONNX", + "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"}, + "cached": False, + "createdAt": 1700000002, + }, + { + "id": "test-model:1", + "name": "test-model", + "version": 1, + "alias": "test-alias", + "displayName": "Test Model", + "providerType": "test", + "uri": "test://model/1", + "modelType": "ONNX", + "runtime": {"deviceType": "CPU", "executionProvider": "CPUExecutionProvider"}, + "cached": False, + "createdAt": 1700000001, + }, + ] + + class _MockCoreInterop: + def execute_command(self, command_name, command_input=None): + if command_name == "get_catalog_name": + return Response(data="TestCatalog", error=None) + if command_name == "get_model_list": + return Response(data=json.dumps(test_model_infos), error=None) + if command_name == "get_cached_models": + return Response(data="[]", error=None) + return Response(data=None, error=f"Unexpected command: {command_name}") + + class _MockModelLoadManager: + def list_loaded(self): + return [] + + catalog = Catalog(_MockModelLoadManager(), _MockCoreInterop()) + + model = catalog.get_model("test-alias") + assert model is not None + + variants = model.variants + assert len(variants) == 3 + + latest_variant = variants[0] + middle_variant = variants[1] + oldest_variant = variants[2] + + assert latest_variant.id == "test-model:3" + assert middle_variant.id == "test-model:2" + assert oldest_variant.id == "test-model:1" + + result1 = catalog.get_latest_version(latest_variant) + assert result1.id == "test-model:3" + + result2 = catalog.get_latest_version(middle_variant) + assert result2.id == "test-model:3" + + result3 = catalog.get_latest_version(oldest_variant) + assert result3.id == "test-model:3" + + model.select_variant(latest_variant) + result4 = catalog.get_latest_version(model) + assert result4 is model diff --git a/sdk/python/test/test_foundry_local_manager.py b/sdk/python/test/test_foundry_local_manager.py new file mode 100644 index 00000000..31528891 --- /dev/null +++ b/sdk/python/test/test_foundry_local_manager.py @@ -0,0 +1,83 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for FoundryLocalManager – mirrors foundryLocalManager.test.ts.""" + +from __future__ import annotations + + +class _Response: + def __init__(self, data=None, error=None): + self.data = data + self.error = error + + +class _FakeCoreInterop: + def __init__(self, responses): + self._responses = responses + self.calls = [] + + def execute_command(self, command_name, command_input=None): + self.calls.append((command_name, command_input)) + return self._responses[command_name] + + +class TestFoundryLocalManager: + """Foundry Local Manager Tests.""" + + def test_should_initialize_successfully(self, manager): + """Manager singleton should be non-None after initialize().""" + assert manager is not None + + def test_should_return_catalog(self, manager): + """Manager should expose a Catalog with a non-empty name.""" + catalog = manager.catalog + assert catalog is not None + assert isinstance(catalog.name, str) + assert len(catalog.name) > 0 + + def test_discover_eps_returns_ep_info(self, manager): + original_core = manager._core_interop + manager._core_interop = _FakeCoreInterop( + { + "discover_eps": _Response( + data='[{"Name":"CUDAExecutionProvider","IsRegistered":true}]', + error=None, + ) + } + ) + + try: + eps = manager.discover_eps() + finally: + manager._core_interop = original_core + + assert isinstance(eps, list) + assert len(eps) == 1 + assert eps[0].name == "CUDAExecutionProvider" + assert eps[0].is_registered is True + + def test_download_and_register_eps_returns_result(self, manager): + original_core = manager._core_interop + manager._core_interop = _FakeCoreInterop( + { + "download_and_register_eps": _Response( + data=( + '{"Success":true,"Status":"ok",' + '"RegisteredEps":["CUDAExecutionProvider"],"FailedEps":[]}' + ), + error=None, + ) + } + ) + + try: + result = manager.download_and_register_eps(["CUDAExecutionProvider"]) + finally: + manager._core_interop = original_core + + assert result.success is True + assert result.status == "ok" + assert result.registered_eps == ["CUDAExecutionProvider"] + assert result.failed_eps == [] diff --git a/sdk/python/test/test_model.py b/sdk/python/test/test_model.py new file mode 100644 index 00000000..e2ea1509 --- /dev/null +++ b/sdk/python/test/test_model.py @@ -0,0 +1,88 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Tests for Model – mirrors model.test.ts.""" + +from __future__ import annotations + +from .conftest import TEST_MODEL_ALIAS, AUDIO_MODEL_ALIAS + + +class TestModel: + """Model Tests.""" + + def test_should_verify_cached_models(self, catalog): + """Cached models from test-data-shared should include qwen and whisper.""" + cached = catalog.get_cached_models() + assert isinstance(cached, list) + assert len(cached) > 0 + + # Check qwen model is cached + qwen = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert qwen is not None, f"{TEST_MODEL_ALIAS} should be cached" + assert qwen.is_cached is True + + # Check whisper model is cached + whisper = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None) + assert whisper is not None, f"{AUDIO_MODEL_ALIAS} should be cached" + assert whisper.is_cached is True + + def test_should_load_and_unload_model(self, catalog): + """Load/unload cycle should toggle is_loaded on the selected variant.""" + cached = catalog.get_cached_models() + assert len(cached) > 0 + + cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert cached_variant is not None + + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + + model.select_variant(cached_variant) + + # Ensure it's not loaded initially (or unload if it is) + if model.is_loaded: + model.unload() + assert model.is_loaded is False + + try: + model.load() + assert model.is_loaded is True + + model.unload() + assert model.is_loaded is False + finally: + # Safety cleanup + if model.is_loaded: + model.unload() + + def test_should_expose_context_length(self, catalog): + """Model should expose context_length from ModelInfo metadata.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + # context_length should be None or a positive integer + ctx = model.context_length + assert ctx is None or (isinstance(ctx, int) and ctx > 0) + + def test_should_expose_modalities(self, catalog): + """Model should expose input_modalities and output_modalities.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + # Modalities should be None or non-empty strings + for val in (model.input_modalities, model.output_modalities): + assert val is None or (isinstance(val, str) and len(val) > 0) + + def test_should_expose_capabilities(self, catalog): + """Model should expose capabilities metadata.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + caps = model.capabilities + assert caps is None or (isinstance(caps, str) and len(caps) > 0) + + def test_should_expose_supports_tool_calling(self, catalog): + """Model should expose supports_tool_calling metadata.""" + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + stc = model.supports_tool_calling + assert stc is None or isinstance(stc, bool) diff --git a/sdk/rust/.cargo/config.toml b/sdk/rust/.cargo/config.toml new file mode 100644 index 00000000..84c57445 --- /dev/null +++ b/sdk/rust/.cargo/config.toml @@ -0,0 +1,7 @@ +[registries] + +[source.crates-io] +replace-with = "ORT-Nightly" + +[source.ORT-Nightly] +registry = "sparse+https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/Cargo/index/" diff --git a/sdk/rust/README.md b/sdk/rust/README.md index d76a7589..d3983430 100644 --- a/sdk/rust/README.md +++ b/sdk/rust/README.md @@ -60,6 +60,54 @@ foundry-local-sdk = { version = "0.1", features = ["winml"] } > **Note:** The `winml` feature is only relevant on Windows. On macOS and Linux, the standard build is used regardless. No code changes are needed — your application code stays the same. +### Explicit EP Management + +You can explicitly discover and download execution providers: + +```rust +use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; + +let manager = FoundryLocalManager::create(FoundryLocalConfig::new("my_app"))?; + +// Discover available EPs and their status +let eps = manager.discover_eps()?; +for ep in &eps { + println!("{} — registered: {}", ep.name, ep.is_registered); +} + +// Download and register all available EPs +let result = manager.download_and_register_eps(None).await?; +println!("Success: {}, Status: {}", result.success, result.status); + +// Download only specific EPs +let result = manager.download_and_register_eps(Some(&[eps[0].name.as_str()])).await?; +``` + +#### Per-EP download progress + +Use `download_and_register_eps_with_progress` to receive typed `(ep_name, percent)` updates +as each EP downloads (`percent` is 0.0–100.0): + +```rust +use std::sync::{Arc, Mutex}; + +let current_ep = Arc::new(Mutex::new(String::new())); +let ep = Arc::clone(¤t_ep); +manager.download_and_register_eps_with_progress(None, move |ep_name: &str, percent: f64| { + let mut current = ep.lock().unwrap(); + if ep_name != current.as_str() { + if !current.is_empty() { + println!(); + } + *current = ep_name.to_string(); + } + print!("\r {} {:5.1}%", ep_name, percent); +}).await?; +println!(); +``` + +Catalog access does not block on EP downloads. Call `download_and_register_eps` when you need hardware-accelerated execution providers. + ## Quick Start ```rust @@ -127,15 +175,15 @@ let loaded = catalog.get_loaded_models().await?; ### Model Lifecycle -Each `Model` wraps one or more `ModelVariant` entries (different quantizations, hardware targets). The SDK auto-selects the best available variant, preferring cached versions. +Each model may have multiple variants (different quantizations, hardware targets). The SDK auto-selects the best available variant, preferring cached versions. All models implement the `IModel` trait. ```rust let model = catalog.get_model("phi-3.5-mini").await?; // Inspect available variants -println!("Selected: {}", model.selected_variant().id()); +println!("Selected: {}", model.id()); for v in model.variants() { - println!(" {} (cached: {})", v.id(), v.info().cached); + println!(" {} (info.cached: {})", v.id(), v.info().cached); } ``` @@ -143,10 +191,10 @@ Download, load, and unload: ```rust // Download with progress reporting -model.download(Some(|progress: &str| { +model.download(Some(Box::new(|progress: &str| { print!("\r{progress}"); std::io::Write::flush(&mut std::io::stdout()).ok(); -})).await?; +}))).await?; // Load into memory model.load().await?; diff --git a/sdk/rust/build.rs b/sdk/rust/build.rs index 0f9726d5..660985c8 100644 --- a/sdk/rust/build.rs +++ b/sdk/rust/build.rs @@ -7,9 +7,9 @@ const NUGET_FEED: &str = "https://api.nuget.org/v3/index.json"; const ORT_NIGHTLY_FEED: &str = "https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json"; -const CORE_VERSION: &str = "0.9.0.8-rc3"; +const CORE_VERSION: &str = "1.0.0-rc1"; const ORT_VERSION: &str = "1.24.3"; -const GENAI_VERSION: &str = "0.12.2"; +const GENAI_VERSION: &str = "0.13.0-dev-20260319-1131106-439ca0d5"; const WINML_ORT_VERSION: &str = "1.23.2.3"; @@ -42,29 +42,18 @@ fn native_lib_extension() -> &'static str { fn get_packages(rid: &str) -> Vec { let winml = env::var("CARGO_FEATURE_WINML").is_ok(); - let nightly = env::var("CARGO_FEATURE_NIGHTLY").is_ok(); let is_linux = rid.starts_with("linux"); - let core_version = if nightly { - resolve_latest_version("Microsoft.AI.Foundry.Local.Core", ORT_NIGHTLY_FEED) - .unwrap_or_else(|| CORE_VERSION.to_string()) - } else { - CORE_VERSION.to_string() - }; + // Use pinned versions directly — dynamic resolution via resolve_latest_version + // is unreliable (feed returns versions in unexpected order, and some old versions + // require authentication). let mut packages = Vec::new(); if winml { - let winml_core_version = if nightly { - resolve_latest_version("Microsoft.AI.Foundry.Local.Core.WinML", ORT_NIGHTLY_FEED) - .unwrap_or_else(|| CORE_VERSION.to_string()) - } else { - CORE_VERSION.to_string() - }; - packages.push(NuGetPackage { name: "Microsoft.AI.Foundry.Local.Core.WinML", - version: winml_core_version, + version: CORE_VERSION.to_string(), feed_url: ORT_NIGHTLY_FEED, }); packages.push(NuGetPackage { @@ -75,12 +64,12 @@ fn get_packages(rid: &str) -> Vec { packages.push(NuGetPackage { name: "Microsoft.ML.OnnxRuntimeGenAI.WinML", version: GENAI_VERSION.to_string(), - feed_url: NUGET_FEED, + feed_url: ORT_NIGHTLY_FEED, }); } else { packages.push(NuGetPackage { name: "Microsoft.AI.Foundry.Local.Core", - version: core_version, + version: CORE_VERSION.to_string(), feed_url: ORT_NIGHTLY_FEED, }); @@ -101,7 +90,7 @@ fn get_packages(rid: &str) -> Vec { packages.push(NuGetPackage { name: "Microsoft.ML.OnnxRuntimeGenAI.Foundry", version: GENAI_VERSION.to_string(), - feed_url: NUGET_FEED, + feed_url: ORT_NIGHTLY_FEED, }); } @@ -143,24 +132,6 @@ fn resolve_base_address(feed_url: &str) -> Result { )) } -/// Resolve the latest version of a package from a NuGet feed. -fn resolve_latest_version(package_name: &str, feed_url: &str) -> Option { - let base_address = resolve_base_address(feed_url).ok()?; - let lower_name = package_name.to_lowercase(); - let index_url = format!("{base_address}{lower_name}/index.json"); - - let body: String = ureq::get(&index_url) - .call() - .ok()? - .body_mut() - .read_to_string() - .ok()?; - - let index: serde_json::Value = serde_json::from_str(&body).ok()?; - let versions = index["versions"].as_array()?; - versions.last()?.as_str().map(|s| s.to_string()) -} - /// Download a .nupkg and extract native libraries for the given RID into `out_dir`. fn download_and_extract(pkg: &NuGetPackage, rid: &str, out_dir: &Path) -> Result<(), String> { let base_address = resolve_base_address(pkg.feed_url)?; diff --git a/sdk/rust/docs/api.md b/sdk/rust/docs/api.md index bdc86974..278402fb 100644 --- a/sdk/rust/docs/api.md +++ b/sdk/rust/docs/api.md @@ -149,7 +149,7 @@ pub struct Model { /* private fields */ } |--------|-----------|-------------| | `alias` | `fn alias(&self) -> &str` | Alias shared by all variants. | | `id` | `fn id(&self) -> &str` | Unique identifier of the selected variant. | -| `variants` | `fn variants(&self) -> &[ModelVariant]` | All variants in this model. | +| `variants` | `fn variants(&self) -> &[Arc]` | All variants in this model. | | `selected_variant` | `fn selected_variant(&self) -> &ModelVariant` | Currently selected variant. | | `select_variant` | `fn select_variant(&self, id: &str) -> Result<(), FoundryLocalError>` | Select a variant by id. | | `is_cached` | `async fn is_cached(&self) -> Result` | Whether the selected variant is cached on disk. | diff --git a/sdk/rust/examples/tool_calling.rs b/sdk/rust/examples/tool_calling.rs index 192b9ff0..fecf6bc5 100644 --- a/sdk/rust/examples/tool_calling.rs +++ b/sdk/rust/examples/tool_calling.rs @@ -61,7 +61,7 @@ async fn main() -> Result<()> { let models = manager.catalog().get_models().await?; let model = models .iter() - .find(|m| m.selected_variant().info().supports_tool_calling == Some(true)) + .find(|m| m.info().supports_tool_calling == Some(true)) .or_else(|| models.first()) .expect("No models available"); diff --git a/sdk/rust/src/catalog.rs b/sdk/rust/src/catalog.rs index 78485bff..26a737e9 100644 --- a/sdk/rust/src/catalog.rs +++ b/sdk/rust/src/catalog.rs @@ -6,10 +6,10 @@ use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; use crate::detail::core_interop::CoreInterop; +use crate::detail::model::Model; +use crate::detail::model_variant::ModelVariant; use crate::detail::ModelLoadManager; use crate::error::{FoundryLocalError, Result}; -use crate::model::Model; -use crate::model_variant::ModelVariant; use crate::types::ModelInfo; /// How long the catalog cache remains valid before a refresh. @@ -39,7 +39,7 @@ impl CacheInvalidator { /// All mutable catalog data behind a single lock to prevent split-brain reads. struct CatalogState { models_by_alias: HashMap>, - variants_by_id: HashMap>, + variants_by_id: HashMap>, last_refresh: Option, } @@ -87,6 +87,11 @@ impl Catalog { &self.name } + /// Invalidate the catalog cache so the next access re-fetches models. + pub(crate) fn invalidate_cache(&self) { + self.invalidator.invalidate(); + } + /// Refresh the catalog from the native core if the cache has expired or /// has been explicitly invalidated (e.g. after a download or removal). pub async fn update_models(&self) -> Result<()> { @@ -135,7 +140,7 @@ impl Catalog { self.update_models().await?; let s = self.lock_state()?; s.models_by_alias.get(alias).cloned().ok_or_else(|| { - let available: Vec<&String> = s.models_by_alias.keys().collect(); + let available: Vec<&str> = s.models_by_alias.keys().map(|k| k.as_str()).collect(); FoundryLocalError::ModelOperation { reason: format!("Unknown model alias '{alias}'. Available: {available:?}"), } @@ -143,7 +148,11 @@ impl Catalog { } /// Look up a specific model variant by its unique id. - pub async fn get_model_variant(&self, id: &str) -> Result> { + /// + /// NOTE: This will return a `Model` representing a single variant. Use + /// [`get_model`](Catalog::get_model) to obtain a `Model` with all + /// available variants. + pub async fn get_model_variant(&self, id: &str) -> Result> { if id.trim().is_empty() { return Err(FoundryLocalError::Validation { reason: "Variant id must be a non-empty string".into(), @@ -152,7 +161,7 @@ impl Catalog { self.update_models().await?; let s = self.lock_state()?; s.variants_by_id.get(id).cloned().ok_or_else(|| { - let available: Vec<&String> = s.variants_by_id.keys().collect(); + let available: Vec<&str> = s.variants_by_id.keys().map(|k| k.as_str()).collect(); FoundryLocalError::ModelOperation { reason: format!("Unknown variant id '{id}'. Available: {available:?}"), } @@ -160,7 +169,7 @@ impl Catalog { } /// Return only the model variants that are currently cached on disk. - pub async fn get_cached_models(&self) -> Result>> { + pub async fn get_cached_models(&self) -> Result>> { self.update_models().await?; let raw = self .core @@ -178,7 +187,7 @@ impl Catalog { } /// Return model variants that are currently loaded into memory. - pub async fn get_loaded_models(&self) -> Result>> { + pub async fn get_loaded_models(&self) -> Result>> { self.update_models().await?; let loaded_ids = self.model_load_manager.list_loaded().await?; let s = self.lock_state()?; @@ -188,6 +197,36 @@ impl Catalog { .collect()) } + /// Resolve the latest catalog version for the provided model or variant. + pub async fn get_latest_version(&self, model_or_model_variant: &Model) -> Result> { + self.update_models().await?; + let s = self.lock_state()?; + + let model = s + .models_by_alias + .get(model_or_model_variant.alias()) + .ok_or_else(|| FoundryLocalError::ModelOperation { + reason: format!( + "Model with alias '{}' not found in catalog.", + model_or_model_variant.alias() + ), + })?; + + let latest = model + .variants() + .into_iter() + .find(|variant| variant.info().name == model_or_model_variant.info().name) + .ok_or_else(|| FoundryLocalError::Internal { + reason: format!( + "Mismatch between model (alias:{}) and model variant (alias:{}).", + model.alias(), + model_or_model_variant.alias() + ), + })?; + + Ok(latest) + } + async fn force_refresh(&self) -> Result<()> { let raw = self .core @@ -211,7 +250,7 @@ impl Catalog { }; let mut alias_map_build: HashMap = HashMap::new(); - let mut id_map: HashMap> = HashMap::new(); + let mut id_map: HashMap> = HashMap::new(); for info in infos { let id = info.id.clone(); @@ -222,12 +261,11 @@ impl Catalog { Arc::clone(&self.model_load_manager), self.invalidator.clone(), ); - let variant_arc = Arc::new(variant.clone()); - id_map.insert(id, variant_arc); + id_map.insert(id, Arc::new(Model::from_variant(variant.clone()))); alias_map_build - .entry(alias.clone()) - .or_insert_with(|| Model::new(alias, Arc::clone(&self.core))) + .entry(alias) + .or_insert_with_key(|a| Model::from_group(a.clone(), Arc::clone(&self.core))) .add_variant(variant); } diff --git a/sdk/rust/src/configuration.rs b/sdk/rust/src/configuration.rs index d23d5986..c1ec2964 100644 --- a/sdk/rust/src/configuration.rs +++ b/sdk/rust/src/configuration.rs @@ -183,31 +183,24 @@ impl Configuration { let mut params = HashMap::new(); params.insert("AppName".into(), app_name); - if let Some(v) = config.app_data_dir { - params.insert("AppDataDir".into(), v); - } - if let Some(v) = config.model_cache_dir { - params.insert("ModelCacheDir".into(), v); - } - if let Some(v) = config.logs_dir { - params.insert("LogsDir".into(), v); - } - if let Some(level) = config.log_level { - params.insert("LogLevel".into(), level.as_core_str().into()); - } - if let Some(v) = config.web_service_urls { - params.insert("WebServiceUrls".into(), v); - } - if let Some(v) = config.service_endpoint { - params.insert("WebServiceExternalUrl".into(), v); - } - if let Some(v) = config.library_path { - params.insert("FoundryLocalCorePath".into(), v); + let optional_fields = [ + ("AppDataDir", config.app_data_dir), + ("ModelCacheDir", config.model_cache_dir), + ("LogsDir", config.logs_dir), + ("LogLevel", config.log_level.map(|l| l.as_core_str().into())), + ("WebServiceUrls", config.web_service_urls), + ("WebServiceExternalUrl", config.service_endpoint), + ("FoundryLocalCorePath", config.library_path), + ]; + + for (key, value) in optional_fields { + if let Some(v) = value { + params.insert(key.into(), v); + } } + if let Some(extra) = config.additional_settings { - for (k, v) in extra { - params.insert(k, v); - } + params.extend(extra); } Ok((Self { params }, config.logger)) diff --git a/sdk/rust/src/detail/core_interop.rs b/sdk/rust/src/detail/core_interop.rs index e69a6e98..75146164 100644 --- a/sdk/rust/src/detail/core_interop.rs +++ b/sdk/rust/src/detail/core_interop.rs @@ -137,25 +137,42 @@ impl<'a> StreamingCallbackState<'a> { /// Append raw bytes, decode as much valid UTF-8 as possible, and forward /// complete text to the callback. Any trailing incomplete multi-byte - /// sequence is kept in the buffer for the next call. + /// sequence is kept in the buffer for the next call. Invalid byte + /// sequences are skipped to prevent the buffer from growing unboundedly. fn push(&mut self, bytes: &[u8]) { self.buf.extend_from_slice(bytes); - let valid_up_to = match std::str::from_utf8(&self.buf) { - Ok(s) => { - (self.callback)(s); - s.len() - } - Err(e) => { - let n = e.valid_up_to(); - if n > 0 { - // SAFETY: `valid_up_to` guarantees this prefix is valid UTF-8. - let valid = unsafe { std::str::from_utf8_unchecked(&self.buf[..n]) }; - (self.callback)(valid); + loop { + match std::str::from_utf8(&self.buf) { + Ok(s) => { + if !s.is_empty() { + (self.callback)(s); + } + self.buf.clear(); + break; + } + Err(e) => { + let n = e.valid_up_to(); + if n > 0 { + // SAFETY: `valid_up_to` guarantees this prefix is valid UTF-8. + let valid = unsafe { std::str::from_utf8_unchecked(&self.buf[..n]) }; + (self.callback)(valid); + } + match e.error_len() { + Some(err_len) => { + // Definite invalid sequence — skip past it and + // continue decoding the remainder. + self.buf.drain(..n + err_len); + } + None => { + // Incomplete multi-byte sequence at the end — + // keep it for the next push. + self.buf.drain(..n); + break; + } + } } - n } - }; - self.buf.drain(..valid_up_to); + } } /// Flush any remaining bytes as lossy UTF-8 (called once after the native diff --git a/sdk/rust/src/detail/mod.rs b/sdk/rust/src/detail/mod.rs index c7f2fd32..b153ed5b 100644 --- a/sdk/rust/src/detail/mod.rs +++ b/sdk/rust/src/detail/mod.rs @@ -1,4 +1,6 @@ pub(crate) mod core_interop; +pub(crate) mod model; mod model_load_manager; +pub(crate) mod model_variant; pub use self::model_load_manager::ModelLoadManager; diff --git a/sdk/rust/src/detail/model.rs b/sdk/rust/src/detail/model.rs new file mode 100644 index 00000000..196ebe35 --- /dev/null +++ b/sdk/rust/src/detail/model.rs @@ -0,0 +1,300 @@ +//! Public model type backed by an internal enum. +//! +//! Users interact solely with [`Model`]. The internal representation +//! distinguishes between a single variant and a group of variants sharing +//! the same alias, but callers never need to know which kind they hold. + +use std::fmt; +use std::path::PathBuf; +use std::sync::atomic::{AtomicUsize, Ordering::Relaxed}; +use std::sync::Arc; + +use super::core_interop::CoreInterop; +use super::model_variant::ModelVariant; +use crate::error::{FoundryLocalError, Result}; +use crate::openai::AudioClient; +use crate::openai::ChatClient; +use crate::types::ModelInfo; + +/// The public model type. +/// +/// A `Model` may represent either a group of variants (as returned by +/// [`Catalog::get_model`](crate::Catalog::get_model)) or a single variant (as +/// returned by [`Catalog::get_model_variant`](crate::Catalog::get_model_variant) +/// or [`Model::variants`]). +/// +/// When a `Model` groups multiple variants, operations are forwarded to +/// the currently selected variant. Use [`variants`](Model::variants) to +/// inspect the available variants and [`select_variant`](Model::select_variant) +/// to change the selection. +pub struct Model { + inner: ModelKind, +} + +#[allow(clippy::large_enum_variant)] +enum ModelKind { + /// A single model variant (from `get_model_variant` or `variants()`). + ModelVariant(ModelVariant), + /// A group of variants sharing the same alias (from `get_model`). + Model { + alias: String, + core: Arc, + variants: Vec, + selected: AtomicUsize, + }, +} + +impl Clone for Model { + fn clone(&self) -> Self { + Self { + inner: match &self.inner { + ModelKind::ModelVariant(v) => ModelKind::ModelVariant(v.clone()), + ModelKind::Model { + alias, + core, + variants, + selected, + } => ModelKind::Model { + alias: alias.clone(), + core: Arc::clone(core), + variants: variants.clone(), + selected: AtomicUsize::new(selected.load(Relaxed)), + }, + }, + } + } +} + +impl fmt::Debug for Model { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.inner { + ModelKind::ModelVariant(v) => f + .debug_struct("Model::ModelVariant") + .field("id", &v.id()) + .field("alias", &v.alias()) + .finish(), + ModelKind::Model { + alias, + variants, + selected, + .. + } => f + .debug_struct("Model::Model") + .field("alias", alias) + .field("id", &variants[selected.load(Relaxed)].id()) + .field("variants_count", &variants.len()) + .field("selected_index", &selected.load(Relaxed)) + .finish(), + } + } +} + +// ── Construction (crate-internal) ──────────────────────────────────────────── + +impl Model { + /// Create a `Model` wrapping a single variant. + pub(crate) fn from_variant(variant: ModelVariant) -> Self { + Self { + inner: ModelKind::ModelVariant(variant), + } + } + + /// Create a `Model` grouping multiple variants under one alias. + pub(crate) fn from_group(alias: String, core: Arc) -> Self { + Self { + inner: ModelKind::Model { + alias, + core, + variants: Vec::new(), + selected: AtomicUsize::new(0), + }, + } + } + + /// Add a variant to a group. Panics if called on a `ModelVariant` kind. + /// + /// If the new variant is cached and the current selection is not, the new + /// variant becomes the selected one. + pub(crate) fn add_variant(&mut self, variant: ModelVariant) { + match &mut self.inner { + ModelKind::Model { + variants, selected, .. + } => { + variants.push(variant); + let new_idx = variants.len() - 1; + let current = selected.load(Relaxed); + if variants[new_idx].info_ref().cached && !variants[current].info_ref().cached { + selected.store(new_idx, Relaxed); + } + } + ModelKind::ModelVariant(_) => { + panic!("add_variant called on a single-variant Model"); + } + } + } +} + +// ── Private helpers ────────────────────────────────────────────────────────── + +impl Model { + fn selected_variant(&self) -> &ModelVariant { + match &self.inner { + ModelKind::ModelVariant(v) => v, + ModelKind::Model { + variants, selected, .. + } => &variants[selected.load(Relaxed)], + } + } +} + +// ── Public API ─────────────────────────────────────────────────────────────── + +impl Model { + /// Unique identifier of the (selected) variant. + pub fn id(&self) -> &str { + self.selected_variant().id() + } + + /// Alias shared by all variants of this model. + pub fn alias(&self) -> &str { + match &self.inner { + ModelKind::ModelVariant(v) => v.alias(), + ModelKind::Model { alias, .. } => alias, + } + } + + /// Full catalog metadata for the (selected) variant. + pub fn info(&self) -> &ModelInfo { + self.selected_variant().info() + } + + /// Maximum context length (in tokens), or `None` if unknown. + pub fn context_length(&self) -> Option { + self.selected_variant().info().context_length + } + + /// Comma-separated input modalities (e.g. `"text,image"`), or `None`. + pub fn input_modalities(&self) -> Option<&str> { + self.selected_variant().info().input_modalities.as_deref() + } + + /// Comma-separated output modalities (e.g. `"text"`), or `None`. + pub fn output_modalities(&self) -> Option<&str> { + self.selected_variant().info().output_modalities.as_deref() + } + + /// Capability tags (e.g. `"reasoning"`), or `None`. + pub fn capabilities(&self) -> Option<&str> { + self.selected_variant().info().capabilities.as_deref() + } + + /// Whether the model supports tool/function calling, or `None`. + pub fn supports_tool_calling(&self) -> Option { + self.selected_variant().info().supports_tool_calling + } + + /// Whether the (selected) variant is cached on disk. + pub async fn is_cached(&self) -> Result { + self.selected_variant().is_cached().await + } + + /// Whether the (selected) variant is loaded into memory. + pub async fn is_loaded(&self) -> Result { + self.selected_variant().is_loaded().await + } + + /// Download the (selected) variant. If `progress` is provided it + /// receives human-readable progress strings as they arrive. + pub async fn download(&self, progress: Option) -> Result<()> + where + F: FnMut(&str) + Send + 'static, + { + self.selected_variant().download(progress).await + } + + /// Return the local file-system path of the (selected) variant. + pub async fn path(&self) -> Result { + self.selected_variant().path().await + } + + /// Load the (selected) variant into memory. + pub async fn load(&self) -> Result<()> { + self.selected_variant().load().await + } + + /// Unload the (selected) variant from memory. + pub async fn unload(&self) -> Result { + self.selected_variant().unload().await + } + + /// Remove the (selected) variant from the local cache. + pub async fn remove_from_cache(&self) -> Result { + self.selected_variant().remove_from_cache().await + } + + /// Create a [`ChatClient`] bound to the (selected) variant. + pub fn create_chat_client(&self) -> ChatClient { + self.selected_variant().create_chat_client() + } + + /// Create an [`AudioClient`] bound to the (selected) variant. + pub fn create_audio_client(&self) -> AudioClient { + self.selected_variant().create_audio_client() + } + + /// Available variants of this model. + /// + /// For a single-variant model (e.g. from + /// [`Catalog::get_model_variant`](crate::Catalog::get_model_variant)), + /// this returns a single-element list containing itself. + pub fn variants(&self) -> Vec> { + match &self.inner { + ModelKind::ModelVariant(v) => { + vec![Arc::new(Model::from_variant(v.clone()))] + } + ModelKind::Model { variants, .. } => variants + .iter() + .map(|v| Arc::new(Model::from_variant(v.clone()))) + .collect(), + } + } + + /// Select a variant by its unique id. + /// + /// # Errors + /// + /// Returns an error if no variant with the given id exists. + /// For single-variant models this always returns an error — use + /// [`Catalog::get_model`](crate::Catalog::get_model) to obtain a model + /// with all variants available. + pub fn select_variant(&self, id: &str) -> Result<()> { + match &self.inner { + ModelKind::ModelVariant(v) => Err(FoundryLocalError::ModelOperation { + reason: format!( + "select_variant is not supported on a single variant. \ + Call Catalog::get_model(\"{}\") to get a model with all variants available.", + v.alias() + ), + }), + ModelKind::Model { + variants, + selected, + alias, + .. + } => match variants.iter().position(|v| v.id() == id) { + Some(pos) => { + selected.store(pos, Relaxed); + Ok(()) + } + None => { + let available: Vec<&str> = variants.iter().map(|v| v.id()).collect(); + Err(FoundryLocalError::ModelOperation { + reason: format!( + "Variant '{id}' not found for model '{alias}'. Available: {available:?}", + ), + }) + } + }, + } + } +} diff --git a/sdk/rust/src/detail/model_load_manager.rs b/sdk/rust/src/detail/model_load_manager.rs index 41507cbd..57eb3cfb 100644 --- a/sdk/rust/src/detail/model_load_manager.rs +++ b/sdk/rust/src/detail/model_load_manager.rs @@ -34,12 +34,12 @@ impl ModelLoadManager { let encoded_id = urlencoding::encode(model_id); self.http_get(&format!("{base_url}/models/load/{encoded_id}")) .await?; - return Ok(()); + } else { + let params = json!({ "Params": { "Model": model_id } }); + self.core + .execute_command_async("load_model".into(), Some(params)) + .await?; } - let params = json!({ "Params": { "Model": model_id } }); - self.core - .execute_command_async("load_model".into(), Some(params)) - .await?; Ok(()) } @@ -47,14 +47,14 @@ impl ModelLoadManager { pub async fn unload(&self, model_id: &str) -> Result { if let Some(base_url) = &self.external_service_url { let encoded_id = urlencoding::encode(model_id); - return self - .http_get(&format!("{base_url}/models/unload/{encoded_id}")) - .await; + self.http_get(&format!("{base_url}/models/unload/{encoded_id}")) + .await + } else { + let params = json!({ "Params": { "Model": model_id } }); + self.core + .execute_command_async("unload_model".into(), Some(params)) + .await } - let params = json!({ "Params": { "Model": model_id } }); - self.core - .execute_command_async("unload_model".into(), Some(params)) - .await } /// Return the list of currently loaded model identifiers. @@ -67,11 +67,11 @@ impl ModelLoadManager { .await? }; - if raw.trim().is_empty() { - return Ok(Vec::new()); - } - - let ids: Vec = serde_json::from_str(&raw)?; + let ids: Vec = if raw.trim().is_empty() { + Vec::new() + } else { + serde_json::from_str(&raw)? + }; Ok(ids) } diff --git a/sdk/rust/src/model_variant.rs b/sdk/rust/src/detail/model_variant.rs similarity index 60% rename from sdk/rust/src/model_variant.rs rename to sdk/rust/src/detail/model_variant.rs index c4be6822..636c5d5b 100644 --- a/sdk/rust/src/model_variant.rs +++ b/sdk/rust/src/detail/model_variant.rs @@ -1,4 +1,7 @@ //! A single model variant backed by [`ModelInfo`]. +//! +//! This type is an implementation detail. Public APIs return +//! [`Arc`](crate::Model) instead. use std::fmt; use std::path::PathBuf; @@ -6,9 +9,9 @@ use std::sync::Arc; use serde_json::json; +use super::core_interop::CoreInterop; +use super::ModelLoadManager; use crate::catalog::CacheInvalidator; -use crate::detail::core_interop::CoreInterop; -use crate::detail::ModelLoadManager; use crate::error::Result; use crate::openai::AudioClient; use crate::openai::ChatClient; @@ -16,8 +19,10 @@ use crate::types::ModelInfo; /// Represents one specific variant of a model (a particular id within an alias /// group). +/// +/// This is an implementation detail — callers should use [`Model`](crate::Model). #[derive(Clone)] -pub struct ModelVariant { +pub(crate) struct ModelVariant { info: ModelInfo, core: Arc, model_load_manager: Arc, @@ -27,8 +32,8 @@ pub struct ModelVariant { impl fmt::Debug for ModelVariant { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ModelVariant") - .field("id", &self.id()) - .field("alias", &self.alias()) + .field("id", &self.info.id) + .field("alias", &self.info.alias) .finish() } } @@ -48,28 +53,23 @@ impl ModelVariant { } } - /// The full [`ModelInfo`] metadata for this variant. - pub fn info(&self) -> &ModelInfo { - &self.info - } - - /// Unique identifier. - pub fn id(&self) -> &str { + pub(crate) fn id(&self) -> &str { &self.info.id } - /// Alias shared with sibling variants. - pub fn alias(&self) -> &str { + pub(crate) fn alias(&self) -> &str { &self.info.alias } - /// Check whether the variant is cached locally by querying the native - /// core. - /// - /// Each call performs a full IPC round-trip. When checking many variants, - /// prefer [`Catalog::get_cached_models`] which fetches the full list in a - /// single call. - pub async fn is_cached(&self) -> Result { + pub(crate) fn info(&self) -> &ModelInfo { + &self.info + } + + pub(crate) fn info_ref(&self) -> &ModelInfo { + &self.info + } + + pub(crate) async fn is_cached(&self) -> Result { let raw = self .core .execute_command_async("get_cached_models".into(), None) @@ -81,15 +81,12 @@ impl ModelVariant { Ok(cached_ids.iter().any(|id| id == &self.info.id)) } - /// Check whether the variant is currently loaded into memory. - pub async fn is_loaded(&self) -> Result { + pub(crate) async fn is_loaded(&self) -> Result { let loaded = self.model_load_manager.list_loaded().await?; Ok(loaded.iter().any(|id| id == &self.info.id)) } - /// Download the model variant. If `progress` is provided, it receives - /// human-readable progress strings as the download proceeds. - pub async fn download(&self, progress: Option) -> Result<()> + pub(crate) async fn download(&self, progress: Option) -> Result<()> where F: FnMut(&str) + Send + 'static, { @@ -110,8 +107,7 @@ impl ModelVariant { Ok(()) } - /// Return the local file-system path where this variant is stored. - pub async fn path(&self) -> Result { + pub(crate) async fn path(&self) -> Result { let params = json!({ "Params": { "Model": self.info.id } }); let path_str = self .core @@ -120,18 +116,15 @@ impl ModelVariant { Ok(PathBuf::from(path_str)) } - /// Load the variant into memory. - pub async fn load(&self) -> Result<()> { + pub(crate) async fn load(&self) -> Result<()> { self.model_load_manager.load(&self.info.id).await } - /// Unload the variant from memory. - pub async fn unload(&self) -> Result { + pub(crate) async fn unload(&self) -> Result { self.model_load_manager.unload(&self.info.id).await } - /// Remove the variant from the local cache. - pub async fn remove_from_cache(&self) -> Result { + pub(crate) async fn remove_from_cache(&self) -> Result { let params = json!({ "Params": { "Model": self.info.id } }); let result = self .core @@ -141,13 +134,11 @@ impl ModelVariant { Ok(result) } - /// Create a [`ChatClient`] bound to this variant. - pub fn create_chat_client(&self) -> ChatClient { - ChatClient::new(self.info.id.clone(), Arc::clone(&self.core)) + pub(crate) fn create_chat_client(&self) -> ChatClient { + ChatClient::new(&self.info.id, Arc::clone(&self.core)) } - /// Create an [`AudioClient`] bound to this variant. - pub fn create_audio_client(&self) -> AudioClient { - AudioClient::new(self.info.id.clone(), Arc::clone(&self.core)) + pub(crate) fn create_audio_client(&self) -> AudioClient { + AudioClient::new(&self.info.id, Arc::clone(&self.core)) } } diff --git a/sdk/rust/src/foundry_local_manager.rs b/sdk/rust/src/foundry_local_manager.rs index f80a7176..0c22ef15 100644 --- a/sdk/rust/src/foundry_local_manager.rs +++ b/sdk/rust/src/foundry_local_manager.rs @@ -13,6 +13,7 @@ use crate::configuration::{Configuration, FoundryLocalConfig, Logger}; use crate::detail::core_interop::CoreInterop; use crate::detail::ModelLoadManager; use crate::error::{FoundryLocalError, Result}; +use crate::types::{EpDownloadResult, EpInfo}; /// Global singleton holder — only stores a successfully initialised manager. static INSTANCE: OnceLock = OnceLock::new(); @@ -133,4 +134,93 @@ impl FoundryLocalManager { .clear(); Ok(()) } + + /// Discover available execution providers and their registration status. + pub fn discover_eps(&self) -> Result> { + let raw = self.core.execute_command("discover_eps", None)?; + let eps: Vec = serde_json::from_str(&raw)?; + Ok(eps) + } + + /// Download and register execution providers. + /// + /// If `names` is `None` or empty, all available EPs are downloaded. + /// Otherwise only the named EPs are downloaded and registered. + pub async fn download_and_register_eps( + &self, + names: Option<&[&str]>, + ) -> Result { + self.download_and_register_eps_impl(names, None::) + .await + } + + /// Download and register execution providers, reporting per-EP progress. + /// + /// If `names` is `None` or empty, all available EPs are downloaded. + /// Otherwise only the named EPs are downloaded and registered. + /// + /// `progress_callback` receives `(ep_name, percent)` where `percent` + /// ranges from 0.0 to 100.0 as each EP downloads. + pub async fn download_and_register_eps_with_progress( + &self, + names: Option<&[&str]>, + progress_callback: F, + ) -> Result + where + F: FnMut(&str, f64) + Send + 'static, + { + self.download_and_register_eps_impl(names, Some(progress_callback)) + .await + } + + async fn download_and_register_eps_impl( + &self, + names: Option<&[&str]>, + progress_callback: Option, + ) -> Result + where + F: FnMut(&str, f64) + Send + 'static, + { + let params = match names { + Some(n) if !n.is_empty() => Some(json!({ "Params": { "Names": n.join(",") } })), + _ => None, + }; + + let raw = match progress_callback { + Some(cb) => { + let mut callback = cb; + let wrapper = move |chunk: &str| { + if let Some(sep) = chunk.find('|') { + let name = &chunk[..sep]; + if let Ok(percent) = chunk[sep + 1..].parse::() { + callback(if name.is_empty() { "" } else { name }, percent); + } + } + }; + + self.core + .execute_command_streaming_async( + "download_and_register_eps".into(), + params, + wrapper, + ) + .await? + } + None => { + self.core + .execute_command_async("download_and_register_eps".into(), params) + .await? + } + }; + + let result: EpDownloadResult = serde_json::from_str(&raw)?; + + // Invalidate the catalog cache if any EP was newly registered so the next + // access re-fetches models with the updated set of available EPs. + if result.success || !result.registered_eps.is_empty() { + self.catalog.invalidate_cache(); + } + + Ok(result) + } } diff --git a/sdk/rust/src/lib.rs b/sdk/rust/src/lib.rs index c6d6e6c4..872a875c 100644 --- a/sdk/rust/src/lib.rs +++ b/sdk/rust/src/lib.rs @@ -6,8 +6,6 @@ mod catalog; mod configuration; mod error; mod foundry_local_manager; -mod model; -mod model_variant; mod types; pub(crate) mod detail; @@ -15,13 +13,12 @@ pub mod openai; pub use self::catalog::Catalog; pub use self::configuration::{FoundryLocalConfig, LogLevel, Logger}; +pub use self::detail::model::Model; pub use self::error::FoundryLocalError; pub use self::foundry_local_manager::FoundryLocalManager; -pub use self::model::Model; -pub use self::model_variant::ModelVariant; pub use self::types::{ - ChatResponseFormat, ChatToolChoice, DeviceType, ModelInfo, ModelSettings, Parameter, - PromptTemplate, Runtime, + ChatResponseFormat, ChatToolChoice, DeviceType, EpDownloadResult, EpInfo, ModelInfo, + ModelSettings, Parameter, PromptTemplate, Runtime, }; // Re-export OpenAI request types so callers can construct typed messages. diff --git a/sdk/rust/src/model.rs b/sdk/rust/src/model.rs deleted file mode 100644 index 4a197e3f..00000000 --- a/sdk/rust/src/model.rs +++ /dev/null @@ -1,154 +0,0 @@ -//! High-level model abstraction that wraps one or more [`ModelVariant`]s -//! sharing the same alias. - -use std::fmt; -use std::path::PathBuf; -use std::sync::atomic::{AtomicUsize, Ordering::Relaxed}; -use std::sync::Arc; - -use crate::detail::core_interop::CoreInterop; -use crate::error::{FoundryLocalError, Result}; -use crate::model_variant::ModelVariant; -use crate::openai::AudioClient; -use crate::openai::ChatClient; - -/// A model groups one or more [`ModelVariant`]s that share the same alias. -/// -/// By default the variant that is already cached locally is selected. You -/// can override the selection with [`Model::select_variant`]. -pub struct Model { - alias: String, - core: Arc, - variants: Vec, - selected_index: AtomicUsize, -} - -impl Clone for Model { - fn clone(&self) -> Self { - Self { - alias: self.alias.clone(), - core: Arc::clone(&self.core), - variants: self.variants.clone(), - selected_index: AtomicUsize::new(self.selected_index.load(Relaxed)), - } - } -} - -impl fmt::Debug for Model { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Model") - .field("alias", &self.alias()) - .field("id", &self.id()) - .field("variants_count", &self.variants.len()) - .field("selected_index", &self.selected_index.load(Relaxed)) - .finish() - } -} - -impl Model { - pub(crate) fn new(alias: String, core: Arc) -> Self { - Self { - alias, - core, - variants: Vec::new(), - selected_index: AtomicUsize::new(0), - } - } - - /// Add a variant. If the new variant is cached and the current selection - /// is not, the new variant becomes the selected one. - pub(crate) fn add_variant(&mut self, variant: ModelVariant) { - self.variants.push(variant); - let new_idx = self.variants.len() - 1; - let current = self.selected_index.load(Relaxed); - - // Prefer a cached variant over a non-cached one. - if self.variants[new_idx].info().cached && !self.variants[current].info().cached { - self.selected_index.store(new_idx, Relaxed); - } - } - - /// Select a variant by its unique id. - pub fn select_variant(&self, id: &str) -> Result<()> { - if let Some(pos) = self.variants.iter().position(|v| v.id() == id) { - self.selected_index.store(pos, Relaxed); - return Ok(()); - } - let available: Vec = self.variants.iter().map(|v| v.id().to_string()).collect(); - Err(FoundryLocalError::ModelOperation { - reason: format!( - "Variant '{id}' not found for model '{}'. Available: {available:?}", - self.alias - ), - }) - } - - /// Returns a reference to the currently selected variant. - pub fn selected_variant(&self) -> &ModelVariant { - &self.variants[self.selected_index.load(Relaxed)] - } - - /// Returns all variants that belong to this model. - pub fn variants(&self) -> &[ModelVariant] { - &self.variants - } - - /// Alias shared by all variants in this model. - pub fn alias(&self) -> &str { - &self.alias - } - - /// Unique identifier of the selected variant. - pub fn id(&self) -> &str { - self.selected_variant().id() - } - - /// Whether the selected variant is cached on disk. - pub async fn is_cached(&self) -> Result { - self.selected_variant().is_cached().await - } - - /// Whether the selected variant is loaded into memory. - pub async fn is_loaded(&self) -> Result { - self.selected_variant().is_loaded().await - } - - /// Download the selected variant. If `progress` is provided, it receives - /// human-readable progress strings as they arrive from the native core. - pub async fn download(&self, progress: Option) -> Result<()> - where - F: FnMut(&str) + Send + 'static, - { - self.selected_variant().download(progress).await - } - - /// Return the local file-system path of the selected variant. - pub async fn path(&self) -> Result { - self.selected_variant().path().await - } - - /// Load the selected variant into memory. - pub async fn load(&self) -> Result<()> { - self.selected_variant().load().await - } - - /// Unload the selected variant from memory. - pub async fn unload(&self) -> Result { - self.selected_variant().unload().await - } - - /// Remove the selected variant from the local cache. - pub async fn remove_from_cache(&self) -> Result { - self.selected_variant().remove_from_cache().await - } - - /// Create a [`ChatClient`] bound to the selected variant. - pub fn create_chat_client(&self) -> ChatClient { - ChatClient::new(self.id().to_string(), Arc::clone(&self.core)) - } - - /// Create an [`AudioClient`] bound to the selected variant. - pub fn create_audio_client(&self) -> AudioClient { - AudioClient::new(self.id().to_string(), Arc::clone(&self.core)) - } -} diff --git a/sdk/rust/src/openai/audio_client.rs b/sdk/rust/src/openai/audio_client.rs index da0f9f5b..0319da38 100644 --- a/sdk/rust/src/openai/audio_client.rs +++ b/sdk/rust/src/openai/audio_client.rs @@ -116,9 +116,9 @@ pub struct AudioClient { } impl AudioClient { - pub(crate) fn new(model_id: String, core: Arc) -> Self { + pub(crate) fn new(model_id: &str, core: Arc) -> Self { Self { - model_id, + model_id: model_id.to_owned(), core, settings: AudioClientSettings::default(), } diff --git a/sdk/rust/src/openai/chat_client.rs b/sdk/rust/src/openai/chat_client.rs index 62d0be5b..6597de82 100644 --- a/sdk/rust/src/openai/chat_client.rs +++ b/sdk/rust/src/openai/chat_client.rs @@ -132,9 +132,9 @@ pub struct ChatClient { } impl ChatClient { - pub(crate) fn new(model_id: String, core: Arc) -> Self { + pub(crate) fn new(model_id: &str, core: Arc) -> Self { Self { - model_id, + model_id: model_id.to_owned(), core, settings: ChatClientSettings::default(), } diff --git a/sdk/rust/src/types.rs b/sdk/rust/src/types.rs index d1d1f002..28b37ed2 100644 --- a/sdk/rust/src/types.rs +++ b/sdk/rust/src/types.rs @@ -87,6 +87,14 @@ pub struct ModelInfo { pub min_fl_version: Option, #[serde(default)] pub created_at_unix: u64, + #[serde(skip_serializing_if = "Option::is_none")] + pub context_length: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub input_modalities: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub output_modalities: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub capabilities: Option, } /// Desired response format for chat completions. @@ -117,3 +125,27 @@ pub enum ChatToolChoice { /// Model must call the named function. Function(String), } + +/// Information about an available execution provider bootstrapper. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub struct EpInfo { + /// The name of the execution provider. + pub name: String, + /// Whether this EP is currently registered and ready for use. + pub is_registered: bool, +} + +/// Result of a download-and-register execution-provider operation. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub struct EpDownloadResult { + /// Whether all requested EPs were successfully registered. + pub success: bool, + /// Human-readable status message. + pub status: String, + /// Names of EPs that were successfully registered. + pub registered_eps: Vec, + /// Names of EPs that failed to register. + pub failed_eps: Vec, +} diff --git a/sdk/rust/tests/integration/model_test.rs b/sdk/rust/tests/integration/model_test.rs index d2b68b77..4e3b371b 100644 --- a/sdk/rust/tests/integration/model_test.rs +++ b/sdk/rust/tests/integration/model_test.rs @@ -111,11 +111,12 @@ async fn should_have_selected_variant_matching_id() { .await .expect("get_model failed"); - let selected = model.selected_variant(); + // The model's id() should return the selected variant's id + // info() delegates to the selected variant, so id() and info().id must agree assert_eq!( - selected.id(), model.id(), - "selected_variant().id() should match model.id()" + model.info().id, + "model.id() should match model.info().id (the selected variant's metadata)" ); } @@ -177,7 +178,7 @@ async fn should_select_variant_by_id() { ); // Restore the original variant so other tests sharing this - // Arc via the catalog are not affected. + // model via the catalog are not affected. model .select_variant(&original_id) .expect("restoring original variant should succeed"); diff --git a/sdk_legacy/cs/NuGet.config b/sdk_legacy/cs/NuGet.config new file mode 100644 index 00000000..420497e9 --- /dev/null +++ b/sdk_legacy/cs/NuGet.config @@ -0,0 +1,7 @@ + + + + + + + diff --git a/sdk_legacy/js/.npmrc b/sdk_legacy/js/.npmrc new file mode 100644 index 00000000..114ea2a4 --- /dev/null +++ b/sdk_legacy/js/.npmrc @@ -0,0 +1,2 @@ +registry=https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/npm/registry/ +always-auth=true diff --git a/sdk_legacy/rust/.cargo/config.toml b/sdk_legacy/rust/.cargo/config.toml new file mode 100644 index 00000000..84c57445 --- /dev/null +++ b/sdk_legacy/rust/.cargo/config.toml @@ -0,0 +1,7 @@ +[registries] + +[source.crates-io] +replace-with = "ORT-Nightly" + +[source.ORT-Nightly] +registry = "sparse+https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/Cargo/index/" diff --git a/www/.npmrc b/www/.npmrc deleted file mode 100644 index b6f27f13..00000000 --- a/www/.npmrc +++ /dev/null @@ -1 +0,0 @@ -engine-strict=true diff --git a/www/package.json b/www/package.json index 8a311947..5454236d 100644 --- a/www/package.json +++ b/www/package.json @@ -12,7 +12,7 @@ }, "license": "MIT", "engines": { - "node": ">=22.0.0", + "node": ">=22.0.0 <23.0.0", "npm": ">=9.0.0" }, "scripts": { diff --git a/www/src/lib/components/home/footer.svelte b/www/src/lib/components/home/footer.svelte index 44bc8df2..03a4df2d 100644 --- a/www/src/lib/components/home/footer.svelte +++ b/www/src/lib/components/home/footer.svelte @@ -111,7 +111,7 @@ © {new Date().getFullYear()} Microsoft Corporation. All rights reserved.