innFactory · pull · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@librechat/agents",
-  "version": "3.1.65",
+  "version": "3.1.67",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",

diff --git a/src/agents/AgentContext.ts b/src/agents/AgentContext.ts
@@ -664,6 +664,17 @@ export class AgentContext {
     this.indexTokenCountMap = { ...baseTokenMap };
   }
 
+  /** Active tool definitions for token accounting (excludes deferred-and-undiscovered entries). */
+  private getActiveToolDefinitions(): t.LCTool[] {
+    if (!this.toolDefinitions) {
+      return [];
+    }
+    return this.toolDefinitions.filter(
+      (def) =>
+        def.defer_loading !== true || this.discoveredToolNames.has(def.name)
+    );
+  }
+
   /**
    * Calculate tool tokens and add to instruction tokens
    * Note: System message tokens are calculated during systemRunnable creation
@@ -697,21 +708,19 @@ export class AgentContext {
       }
     }
 
-    if (this.toolDefinitions && this.toolDefinitions.length > 0) {
-      for (const def of this.toolDefinitions) {
-        if (countedToolNames.has(def.name)) {
-          continue;
-        }
-        const schema = {
-          type: 'function',
-          function: {
-            name: def.name,
-            description: def.description ?? '',
-            parameters: def.parameters ?? {},
-          },
-        };
-        toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
+    for (const def of this.getActiveToolDefinitions()) {
+      if (countedToolNames.has(def.name)) {
+        continue;
       }
+      const schema = {
+        type: 'function',
+        function: {
+          name: def.name,
+          description: def.description ?? '',
+          parameters: def.parameters ?? {},
+        },
+      };
+      toolTokens += tokenCounter(new SystemMessage(JSON.stringify(schema)));
     }
 
     const isAnthropic =
@@ -860,11 +869,15 @@ export class AgentContext {
   /**
    * Returns a structured breakdown of how the context token budget is consumed.
    * Useful for diagnostics when context overflow or pruning issues occur.
+   *
+   * Note: `toolCount` reflects discoveries immediately, but `toolSchemaTokens`
+   * is a snapshot taken during `calculateInstructionTokens` and is not
+   * recomputed when `markToolsAsDiscovered` is called mid-run.
    */
   getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
     const maxContextTokens = this.maxContextTokens ?? 0;
     const toolCount =
-      (this.tools?.length ?? 0) + (this.toolDefinitions?.length ?? 0);
+      (this.tools?.length ?? 0) + this.getActiveToolDefinitions().length;
     const messageCount = messages?.length ?? 0;
 
     let messageTokens = 0;

diff --git a/src/agents/__tests__/AgentContext.test.ts b/src/agents/__tests__/AgentContext.test.ts
@@ -375,6 +375,116 @@ describe('AgentContext', () => {
 
       expect(ctx.instructionTokens).toBeGreaterThan(initialTokens);
     });
+
+    it('excludes deferred-undiscovered toolDefinitions from toolSchemaTokens', async () => {
+      const activeDef: t.LCTool = {
+        name: 'active_tool',
+        description: 'Always loaded',
+        parameters: { type: 'object', properties: {} },
+      };
+      const deferredDef: t.LCTool = {
+        name: 'deferred_tool',
+        description: 'Loaded via tool search',
+        parameters: { type: 'object', properties: {} },
+        defer_loading: true,
+      };
+
+      const ctxBase = createBasicContext({
+        agentConfig: { toolDefinitions: [activeDef] },
+        tokenCounter: mockTokenCounter,
+      });
+      const ctxWithDeferred = createBasicContext({
+        agentConfig: { toolDefinitions: [activeDef, deferredDef] },
+        tokenCounter: mockTokenCounter,
+      });
+
+      await ctxBase.tokenCalculationPromise;
+      await ctxWithDeferred.tokenCalculationPromise;
+
+      expect(ctxWithDeferred.toolSchemaTokens).toBe(ctxBase.toolSchemaTokens);
+    });
+
+    it('includes deferred toolDefinitions once discovered via discoveredTools input', async () => {
+      const toolDefinitions: t.LCTool[] = [
+        {
+          name: 'deferred_tool',
+          description: 'Loaded via tool search',
+          parameters: { type: 'object', properties: {} },
+          defer_loading: true,
+        },
+      ];
+
+      const ctxUndiscovered = createBasicContext({
+        agentConfig: { toolDefinitions },
+        tokenCounter: mockTokenCounter,
+      });
+      const ctxDiscovered = createBasicContext({
+        agentConfig: { toolDefinitions, discoveredTools: ['deferred_tool'] },
+        tokenCounter: mockTokenCounter,
+      });
+
+      await ctxUndiscovered.tokenCalculationPromise;
+      await ctxDiscovered.tokenCalculationPromise;
+
+      expect(ctxUndiscovered.toolSchemaTokens).toBe(0);
+      expect(ctxDiscovered.toolSchemaTokens).toBeGreaterThan(0);
+    });
+
+    it('getTokenBudgetBreakdown toolCount excludes deferred-undiscovered toolDefinitions', () => {
+      const toolDefinitions: t.LCTool[] = [
+        {
+          name: 'active',
+          parameters: { type: 'object', properties: {} },
+        },
+        {
+          name: 'deferred',
+          defer_loading: true,
+          parameters: { type: 'object', properties: {} },
+        },
+      ];
+
+      const ctx = createBasicContext({ agentConfig: { toolDefinitions } });
+
+      expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(1);
+    });
+
+    it('getTokenBudgetBreakdown toolCount reflects newly discovered deferred tools', () => {
+      const toolDefinitions: t.LCTool[] = [
+        {
+          name: 'deferred',
+          defer_loading: true,
+          parameters: { type: 'object', properties: {} },
+        },
+      ];
+
+      const ctx = createBasicContext({ agentConfig: { toolDefinitions } });
+
+      expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(0);
+      ctx.markToolsAsDiscovered(['deferred']);
+      expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(1);
+    });
+
+    it('toolSchemaTokens snapshot does not auto-update after markToolsAsDiscovered', async () => {
+      const toolDefinitions: t.LCTool[] = [
+        {
+          name: 'deferred',
+          description: 'Loaded via tool search',
+          parameters: { type: 'object', properties: {} },
+          defer_loading: true,
+        },
+      ];
+
+      const ctx = createBasicContext({
+        agentConfig: { toolDefinitions },
+        tokenCounter: mockTokenCounter,
+      });
+
+      await ctx.tokenCalculationPromise;
+      expect(ctx.toolSchemaTokens).toBe(0);
+
+      ctx.markToolsAsDiscovered(['deferred']);
+      expect(ctx.toolSchemaTokens).toBe(0);
+    });
   });
 
   describe('reset()', () => {

diff --git a/src/types/llm.ts b/src/types/llm.ts
@@ -45,7 +45,20 @@ export type AzureClientOptions = Partial<OpenAIChatInput> &
   } & BaseChatModelParams & {
     configuration?: OAIClientOptions;
   };
-export type ThinkingConfig = AnthropicInput['thinking'];
+/**
+ * Controls whether Claude's reasoning content is returned in adaptive
+ * thinking responses. Added for Claude Opus 4.7, which omits thinking by
+ * default unless the caller opts in with `'summarized'`.
+ * @see https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7#thinking-content-omitted-by-default
+ */
+export type ThinkingDisplay = 'summarized' | 'omitted';
+export type ThinkingConfigAdaptive = {
+  type: 'adaptive';
+  display?: ThinkingDisplay;
+};
+export type ThinkingConfig =
+  | NonNullable<AnthropicInput['thinking']>
+  | ThinkingConfigAdaptive;
 export type ChatOpenAIToolType =
   | BindToolsInput
   | OpenAIClient.ChatCompletionTool;
@@ -60,7 +73,8 @@ export type GoogleThinkingConfig = {
   thinkingLevel?: string;
 };
 export type OpenAIClientOptions = ChatOpenAIFields;
-export type AnthropicClientOptions = AnthropicInput & {
+export type AnthropicClientOptions = Omit<AnthropicInput, 'thinking'> & {
+  thinking?: ThinkingConfig;
   promptCache?: boolean;
 };
 export type MistralAIClientOptions = ChatMistralAIInput;