Description
I am creating a local LLM app, which works OK, but on response it stops at 170 words which are = 256 tokens.
The models I have tried are so far: Meta-Llama-3.1-8B-Instruct-Q8_0.gguf and mistral-7b-instruct-v0.2.Q8_0.gguf.
I have tried everything I am able to, and have used gemini and chatgpt to show solutions, but nothing seems to work.
The response stops, but the agent is not: Agent is thinking...
I am not using Python to work with the model but instead, I am using using:
LLama;using LLama.Common;using LLamaSharp.SemanticKernel;using LLamaSharp.SemanticKernel.ChatCompletion;using Microsoft.Extensions.Configuration;using Microsoft.Extensions.DependencyInjection;using Microsoft.Extensions.Logging;using Microsoft.Identity.Client;using Microsoft.SemanticKernel;using Microsoft.SemanticKernel.ChatCompletion;Any help is highly appreciated.
Question
What is the correct way to configure this for the model to generate longer responses?
Is this a limitation of LLamaSharp?
Would using python work better? If it does, How to use python instead of LLamaSharp?
This is my appsettings.json:
"AI": {"Local": {"Url": "http://localhost:11434/v1","ModelName": "llama3","ModelPath": "D:/AI/Models/mistral-7b-instruct-v0.2.Q8_0.gguf","ContextSize": 8192, // <-- This MUST match the model's training context"GpuLayerCount": 50, // Set to 0 if you are CPU-only"DefaultMaxTokens": 8096,"max_new_tokens": 600,"max_position_embeddings": 131072,"context_length' : 2048": null,"KernelStrategy": "Bare","Temperature": 0.2 // set temperature to 0.2 },"Azure": {"DeploymentName": "your-gpt4-deployment-name-in-azure-studio","Endpoint": "https://your-azure-resource-name.openai.azure.com/","ApiKey": "your-azure-openai-api-key" },"OpenAI": {"ModelId": "gpt-4-turbo","ApiKey": "sk-your-openai-api-key","OrgId": "org-your-openai-organization-id" },"AutoTrainingEnabled": true},This is my kernel code:
namespace Infrastructure.AI.Orchestration;public class KernelFactory : IKernelFactory{ private readonly IServiceProvider _sp; private readonly IConfiguration _config; private readonly ILogger<KernelFactory> _logger; public KernelFactory(IServiceProvider sp, IConfiguration config, ILogger<KernelFactory> logger) { _sp = sp; _config = config; _logger = logger; } public Kernel CreateKernel(AgentType agentRole, ModelCapability capability, bool bare = false) { var builder = Kernel.CreateBuilder(); // This call now automatically applies the InferenceParams fix via the extension method. builder.ConfigureLlm(_config, _logger); return builder.Build(); }}And these are my ConfigureLlm settings:
using LLama;using LLama.Common;using LLamaSharp.SemanticKernel;using LLamaSharp.SemanticKernel.ChatCompletion;namespace Infrastructure.AI.Models;public static class LlmProviderExtensions{ public static IKernelBuilder ConfigureLlm(this IKernelBuilder builder, IConfiguration config, ILogger logger) { var provider = config["AI:Provider"] ?? "Local"; logger.LogInformation("Configuring LLM provider based on appsettings: {Provider}", provider); switch (provider) { case "Local": var localConfig = config.GetSection("AI:Local"); var modelPath = ResolveModelPath(localConfig["ModelPath"]); if (!string.IsNullOrEmpty(modelPath) && File.Exists(modelPath)) { logger.LogInformation(">> LOADING LOCAL GGUF MODEL: {Path}", modelPath); try { var modelParams = new ModelParams(modelPath) { ContextSize = localConfig.GetValue<uint>("ContextSize", 4096), GpuLayerCount = localConfig.GetValue<int>("GpuLayerCount", 0), }; var weights = LLamaWeights.LoadFromFile(modelParams); var context = weights.CreateContext(modelParams); var inferenceParams = new InferenceParams() { MaxTokens = 4096 }; logger.LogInformation("Applying session-level MaxTokens override of {MaxTokens} to local model executor.", inferenceParams.MaxTokens); var executor = new InteractiveExecutor(context); var chatService = new LLamaSharpChatCompletion(executor); builder.Services.AddSingleton<IChatCompletionService>(chatService); logger.LogInformation(">> GGUF MODEL LOADED SUCCESSFULLY."); } catch (Exception ex) { logger.LogCritical(ex, "FATAL ERROR INITIALIZING LLAMASHARP. The application cannot start without a valid local model configuration."); throw; } } else { logger.LogCritical("Local model file not found at path specified in AI:Local:ModelPath. Application cannot start."); throw new FileNotFoundException("Local GGUF model file not found.", localConfig["ModelPath"]); } break; case "Azure": var azureConfig = config.GetSection("AI:Azure"); var azureDeploymentName = azureConfig["DeploymentName"]; var azureEndpoint = azureConfig["Endpoint"]; var azureApiKey = azureConfig["ApiKey"]; if (string.IsNullOrEmpty(azureDeploymentName) || string.IsNullOrEmpty(azureEndpoint) || string.IsNullOrEmpty(azureApiKey)) { throw new InvalidOperationException("Azure OpenAI configuration is incomplete. 'DeploymentName', 'Endpoint', and 'ApiKey' are required in the 'AI:Azure' section of appsettings.json."); } logger.LogInformation("Configuring LLM with Azure OpenAI. Deployment: {Deployment}", azureDeploymentName); builder.AddAzureOpenAIChatCompletion(azureDeploymentName, azureEndpoint, azureApiKey); break; case "OpenAI": default: var openAIConfig = config.GetSection("AI:OpenAI"); var openAIModelId = openAIConfig["ModelId"]; var openAIApiKey = openAIConfig["ApiKey"]; var openAIOrgId = openAIConfig["OrgId"]; // Optional if (string.IsNullOrEmpty(openAIModelId) || string.IsNullOrEmpty(openAIApiKey)) { throw new InvalidOperationException("OpenAI configuration is incomplete. 'ModelId' and 'ApiKey' are required in the 'AI:OpenAI' section of appsettings.json."); } logger.LogInformation("Configuring LLM with OpenAI. Model: {Model}", openAIModelId); builder.AddOpenAIChatCompletion(openAIModelId, openAIApiKey, openAIOrgId); break; } return builder; } private static string? ResolveModelPath(string? path) { if (string.IsNullOrEmpty(path)) return null; if (File.Exists(path)) return path; var relative = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, path); if (File.Exists(relative)) return relative; var projectRoot = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "../../../../../")); var rootPath = Path.Combine(projectRoot, path); if (File.Exists(rootPath)) return rootPath; return null; }}