Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<PackageVersion Include="Microsoft.Extensions.Hosting.Abstractions" Version="8.0.1" />
<PackageVersion Include="Microsoft.Extensions.Http" Version="9.0.3" />
<PackageVersion Include="Microsoft.Extensions.Logging" Version="8.0.0" />
<PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.5.2" />
<PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.6.0" />
<PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.3" />
<PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.5.2" />
<PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.5.2" />
Expand All @@ -34,7 +34,7 @@
<PackageVersion Include="OllamaSharp" Version="5.1.7" />
<PackageVersion Include="PdfPig" Version="0.1.10" />
<PackageVersion Include="Polly.Core" Version="8.5.2" />
<PackageVersion Include="RabbitMQ.Client" Version="7.0.0" />
<PackageVersion Include="RabbitMQ.Client" Version="7.1.2" />
<PackageVersion Include="ReadLine" Version="2.0.1" />
<PackageVersion Include="Swashbuckle.AspNetCore" Version="8.0.0" />
<PackageVersion Include="System.Linq.Async" Version="6.0.1" />
Expand Down
6 changes: 6 additions & 0 deletions KernelMemory.sln
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "InteractiveSetup", "tools\I
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "testapps", "testapps", "{AEF463F6-F813-498C-830A-3B4CED6DC4A7}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "213-onnx", "examples\213-onnx\213-onnx.csproj", "{E7ECB0D7-A4AA-4529-B191-3FDFE8674784}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -530,6 +532,9 @@ Global
{D6BC74A5-41C7-4A60-9C2E-F246DC40145A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D6BC74A5-41C7-4A60-9C2E-F246DC40145A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D6BC74A5-41C7-4A60-9C2E-F246DC40145A}.Release|Any CPU.Build.0 = Release|Any CPU
{E7ECB0D7-A4AA-4529-B191-3FDFE8674784}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E7ECB0D7-A4AA-4529-B191-3FDFE8674784}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E7ECB0D7-A4AA-4529-B191-3FDFE8674784}.Release|Any CPU.ActiveCfg = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -630,6 +635,7 @@ Global
{82670921-FDCD-4672-84BD-4353F5AC24A0} = {AEF463F6-F813-498C-830A-3B4CED6DC4A7}
{CCA96699-483E-4B2A-95DF-25F0C98E3BB6} = {AEF463F6-F813-498C-830A-3B4CED6DC4A7}
{AEF463F6-F813-498C-830A-3B4CED6DC4A7} = {5E7DD43D-B5E7-4827-B57D-447E5B428589}
{E7ECB0D7-A4AA-4529-B191-3FDFE8674784} = {0A43C65C-6007-4BB4-B3FE-8D439FC91841}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {CC136C62-115C-41D1-B414-F9473EFF6EA8}
Expand Down
2 changes: 1 addition & 1 deletion examples/212-dotnet-ollama/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public static async Task Main()
// Import some text
await memory.ImportTextAsync("Today is October 32nd, 2476");

// Generate an answer - This uses OpenAI for embeddings and finding relevant data, and LM Studio to generate an answer
// Generate an answer
var answer = await memory.AskAsync("What's the current date (don't check for validity)?");
Console.WriteLine("-------------------");
Console.WriteLine(answer.Question);
Expand Down
13 changes: 13 additions & 0 deletions examples/213-onnx/213-onnx.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<RollForward>LatestMajor</RollForward>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\extensions\KM\KernelMemory\KernelMemory.csproj" />
</ItemGroup>

</Project>
53 changes: 53 additions & 0 deletions examples/213-onnx/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright (c) Microsoft. All rights reserved.

using Microsoft.KernelMemory;

/* This example shows how to use KM with Ollama
*
* 1. Download phi4 model from https://huggingface.co/microsoft/phi-4-onnx
*
* 2. Edit appsettings.json (or appsettings.Development.json) and set the model path.
*
* 3. Run the code
*/
public static class Program
{
public static async Task Main()
{
var onnxCfg = new OnnxConfig();
var azureOpenAIEmbeddingConfig = new AzureOpenAIConfig();

new ConfigurationBuilder()
.AddJsonFile("appsettings.json")
.AddJsonFile("appsettings.development.json", optional: true)
.AddJsonFile("appsettings.Development.json", optional: true)
.Build()
.BindSection("KernelMemory:Services:Onnx", onnxCfg)
.BindSection("KernelMemory:Services:AzureOpenAIEmbedding", azureOpenAIEmbeddingConfig);

var memory = new KernelMemoryBuilder()
.WithOnnxTextGeneration(onnxCfg)
.WithAzureOpenAITextEmbeddingGeneration(azureOpenAIEmbeddingConfig)
.Configure(builder => builder.Services.AddLogging(l =>
{
l.SetMinimumLevel(LogLevel.Warning);
l.AddSimpleConsole(c => c.SingleLine = true);
}))
.Build();

// Import some text
await memory.ImportTextAsync("Yesterday was October 21st, 2476");
await memory.ImportTextAsync("Tomorrow will be October 23rd, 2476");

// Generate an answer
var answer = await memory.AskAsync("What's the current date?");
Console.WriteLine(answer.Result);

/*

-- Output using phi-4-onnx:

Based on the provided information, if yesterday was October 21st, 2476, then today is October 22nd, 2476.
*/
}
}
11 changes: 11 additions & 0 deletions examples/213-onnx/Properties/launchSettings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"profiles": {
"console": {
"commandName": "Project",
"launchBrowser": false,
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
}
}
}
}
46 changes: 46 additions & 0 deletions examples/213-onnx/appsettings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"KernelMemory": {
"Services": {
"Onnx": {
// Source: https://huggingface.co/microsoft/phi-4-onnx/tree/main
"TextModelDir": "/tmp/onnx/phi-4-onnx",
"MaxTokens": 16384
},
"AzureOpenAIEmbedding": {
// "ApiKey" or "AzureIdentity"
// AzureIdentity: use automatic Entra (AAD) authentication mechanism.
// You can test locally using the AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET env vars.
"Auth": "AzureIdentity",
// Optional when Auth == AzureIdentity. Leave it null to use the default.
// in which case use this to change the client audience.
"AzureIdentityAudience": null,
"Endpoint": "https://<...>.openai.azure.com/",
"APIKey": "",
// Your Azure Deployment name
"Deployment": "",
// The max number of tokens supported by model deployed
// See https://learn.microsoft.com/azure/ai-services/openai/concepts/models
"MaxTokenTotal": 8191,
// Which tokenizer to use to correctly measure the size of chunks.
// Supported values: "p50k", "cl100k", "o200k". Leave it empty if unsure.
// - Use p50k for the old text-davinci-003 models
// - Use cl100k for the old gpt-3.4 and gpt-4 family, and for text embedding models
// - Use o200k for the most recent gpt-4o family
"Tokenizer": "cl100k",
// The number of dimensions output embeddings should have.
// Only supported in "text-embedding-3" and later models developed with
// MRL, see https://arxiv.org/abs/2205.13147
"EmbeddingDimensions": null,
// How many embeddings to calculate in parallel. The max value depends on
// the model and deployment in use.
// See https://learn.microsoft.com/azure/ai-services/openai/reference#embeddings
"MaxEmbeddingBatchSize": 1,
// How many times to retry in case of throttling.
"MaxRetries": 10,
// Thumbprints of certificates that should be trusted for HTTPS requests when SSL policy errors are detected.
// This should only be used for local development when using a proxy to call the OpenAI endpoints.
"TrustedCertificateThumbprints": []
}
}
}
}
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Some examples about how to use Kernel Memory.
8. Local models and external connectors
* [Using custom LLMs](104-dotnet-custom-LLM)
* [Using local LLMs with Ollama](212-dotnet-ollama)
* [Using local LLMs with ONNX models](213-onnx)
* [Using local LLMs with llama.cpp via LlamaSharp](105-dotnet-serverless-llamasharp)
* [Using local models with LM Studio](208-dotnet-lmstudio)
* [Using Semantic Kernel LLM connectors](107-dotnet-SemanticKernel-TextCompletion)
Expand Down
1 change: 1 addition & 0 deletions extensions/Chunkers/Chunkers.UnitTests/doc2.md
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ Examples and Tools
8. Local models and external connectors
* [Using custom LLMs](examples/104-dotnet-custom-LLM)
* [Using local LLMs with Ollama](examples/212-dotnet-ollama)
* [Using local LLMs with ONNX models](examples/213-onnx)
* [Using local LLMs with llama.cpp via LlamaSharp](examples/105-dotnet-serverless-llamasharp)
* [Using local models with LM Studio](examples/208-dotnet-lmstudio)
* [Using Semantic Kernel LLM connectors](examples/107-dotnet-SemanticKernel-TextCompletion)
Expand Down
88 changes: 42 additions & 46 deletions extensions/ONNX/Onnx/OnnxTextGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public sealed class OnnxTextGenerator : ITextGenerator, IDisposable
/// Tokenizer used with the Onnx Generator and Model classes to produce tokens.
/// This has the potential to contain a null value, depending on the contents of the Model Directory.
/// </summary>
private readonly Tokenizer? _tokenizer = default;
private readonly Tokenizer _tokenizer;

/// <summary>
/// Tokenizer used for GetTokens() and CountTokens()
Expand Down Expand Up @@ -84,15 +84,55 @@ public OnnxTextGenerator(
this._log.LogDebug("Onnx model loaded");
}

/// <inheritdoc/>
public int CountTokens(string text)
{
// TODO: Implement with _tokenizer and remove _textTokenizer
return this._textTokenizer.CountTokens(text);
}

/// <inheritdoc/>
public IReadOnlyList<string> GetTokens(string text)
{
// TODO: Implement with _tokenizer and remove _textTokenizer
return this._textTokenizer.GetTokens(text);
}

/// <inheritdoc/>
public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(
string prompt,
TextGenerationOptions? options = null,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
{
var tokens = this._tokenizer?.Encode(prompt);
// TODO: the prompt format should be configurable
using var sequences = this._tokenizer.Encode($"<|user|>{prompt}<|end|><|assistant|>");

using var generatorParams = new GeneratorParams(this._model);
this.SetGeneratorParams(generatorParams, options);

using var tokenizerStream = this._tokenizer.CreateStream();
using var generator = new Generator(this._model, generatorParams);
generator.AppendTokenSequences(sequences);

while (!generator.IsDone())
{
generator.GenerateNextToken();
var x = tokenizerStream.Decode(generator.GetSequence(0)[^1]);
yield return new GeneratedTextContent(x);
}

await Task.CompletedTask.ConfigureAwait(false);
}

/// <inheritdoc/>
public void Dispose()
{
this._model.Dispose();
this._tokenizer.Dispose();
}

private void SetGeneratorParams(GeneratorParams generatorParams, TextGenerationOptions? options)
{
generatorParams.SetSearchOption("max_length", this.MaxTokenTotal);
generatorParams.SetSearchOption("min_length", this._config.MinLength);
generatorParams.SetSearchOption("num_return_sequences", this._config.ResultsPerPrompt);
Expand Down Expand Up @@ -145,49 +185,5 @@ public async IAsyncEnumerable<GeneratedTextContent> GenerateTextAsync(

break;
}

generatorParams.SetInputSequences(tokens);

using (var generator = new Generator(this._model, generatorParams))
{
List<int> outputTokens = [];

while (!generator.IsDone() && cancellationToken.IsCancellationRequested == false)
{
generator.ComputeLogits();
generator.GenerateNextToken();

outputTokens.AddRange(generator.GetSequence(0));

if (outputTokens.Count > 0 && this._tokenizer != null)
{
var newToken = outputTokens[^1];
yield return this._tokenizer.Decode([newToken]);
}
}
}

await Task.CompletedTask.ConfigureAwait(false);
}

/// <inheritdoc/>
public int CountTokens(string text)
{
// TODO: Implement with _tokenizer and remove _textTokenizer
return this._textTokenizer.CountTokens(text);
}

/// <inheritdoc/>
public IReadOnlyList<string> GetTokens(string text)
{
// TODO: Implement with _tokenizer and remove _textTokenizer
return this._textTokenizer.GetTokens(text);
}

/// <inheritdoc/>
public void Dispose()
{
this._model?.Dispose();
this._tokenizer?.Dispose();
}
}