VYPR
High severity7.5OSV Advisory· Published Jul 8, 2025· Updated Apr 15, 2026

CVE-2025-53372

CVE-2025-53372

Description

node-code-sandbox-mcp is a Node.js–based Model Context Protocol server that spins up disposable Docker containers to execute arbitrary JavaScript. Prior to 1.3.0, a command injection vulnerability exists in the node-code-sandbox-mcp MCP Server. The vulnerability is caused by the unsanitized use of input parameters within a call to child_process.execSync, enabling an attacker to inject arbitrary system commands. Successful exploitation can lead to remote code execution under the server process's privileges on the host machine, bypassing the sandbox protection of running code inside docker. This vulnerability is fixed in 1.3.0.

Affected packages

Versions sourced from the GitHub Security Advisory.

PackageAffected versionsPatched versions
node-code-sandbox-mcpnpm
< 1.3.01.3.0

Affected products

1

Patches

4
e461a74ecb18

Merge pull request #93 from alfonsograziano/fix-cve-input-sanitization

21 files changed · +538 175
  • eslint.config.js+0 4 modified
    @@ -51,10 +51,6 @@ export default [
             ecmaVersion: 12,
             sourceType: 'module',
           },
    -      env: {
    -        node: true,
    -        es2021: true,
    -      },
         },
         plugins: {
           '@typescript-eslint': tsPlugin,
    
  • examples/playwright.js+1 1 modified
    @@ -30,7 +30,7 @@ async function main() {
         name: 'run_js_ephemeral',
         arguments: {
           // Use the ofcicial MS playwright image
    -      image: 'mcr.microsoft.com/playwright:v1.52.0-noble',
    +      image: 'mcr.microsoft.com/playwright:v1.53.2-noble',
           code: `
             import { chromium } from 'playwright';
       
    
  • .github/workflows/test.yaml+1 1 modified
    @@ -23,7 +23,7 @@ jobs:
           - name: Pull required Docker images
             run: |
               docker pull --platform=linux/amd64 node:lts-slim
    -          docker pull --platform=linux/amd64 mcr.microsoft.com/playwright:v1.52.0-noble
    +          docker pull --platform=linux/amd64 mcr.microsoft.com/playwright:v1.53.2-noble
               docker pull --platform=linux/amd64 alfonsograziano/node-code-sandbox-mcp:latest
     
           - name: Run tests
    
  • README.md+5 4 modified
    @@ -33,7 +33,7 @@ To use this MCP server, Docker must be installed and running on your machine.
     Example recommended images:
     
     - node:lts-slim
    -- mcr.microsoft.com/playwright:v1.52.0-noble
    +- mcr.microsoft.com/playwright:v1.53.2-noble
     - alfonsograziano/node-chartjs-canvas:latest
     
     ## Getting started
    @@ -130,11 +130,12 @@ docker run --rm -it \
     This bind-mounts your host folder into the container at the **same absolute path** and makes `FILES_DIR` available inside the MCP server.
     
     #### Ephemeral usage – **no persistent storage**
    +
     ```bash
     docker run --rm -it \
       -v /var/run/docker.sock:/var/run/docker.sock \
    -  alfonsograziano/node-code-sandbox-mcp stdio 
    -  ```
    +  alfonsograziano/node-code-sandbox-mcp stdio
    +```
     
     ### Usage with VS Code
     
    @@ -156,7 +157,7 @@ Install js-sandbox-mcp (NPX) Install js-sandbox-mcp (Docker)
                     "-v", "/var/run/docker.sock:/var/run/docker.sock",
                     "-v", "$HOME/Desktop/sandbox-output:/root", // optional
                     "-e", "FILES_DIR=$HOME/Desktop/sandbox-output",  // optional
    -                "-e", "SANDBOX_MEMORY_LIMIT=512m", 
    +                "-e", "SANDBOX_MEMORY_LIMIT=512m",
                     "-e", "SANDBOX_CPU_LIMIT=1",
                     "mcp/node-code-sandbox"
                   ]
    
  • src/dockerUtils.ts+16 7 modified
    @@ -1,10 +1,11 @@
    -import { exec, execSync } from 'child_process';
    +import { execFile, execFileSync } from 'child_process';
     import util from 'util';
     import { logger } from './logger.ts';
     import { getConfig } from './config.ts';
     import { textContent } from './types.ts';
    +import { sanitizeContainerId, sanitizeShellCommand } from './utils.ts';
     
    -const execPromise = util.promisify(exec);
    +const execFilePromise = util.promisify(execFile);
     
     /**
      * Attempts to forcefully stop and remove a Docker container by its ID.
    @@ -17,10 +18,13 @@ export async function forceStopContainer(containerId: string): Promise<void> {
         `Attempting to stop and remove container via dockerUtils: ${containerId}`
       );
       try {
    +    // Sanitize containerId
    +    const safeId = sanitizeContainerId(containerId);
    +    if (!safeId) throw new Error('Invalid containerId');
         // Force stop the container (ignores errors if already stopped)
    -    await execPromise(`docker stop ${containerId} || true`);
    +    await execFilePromise('docker', ['stop', safeId]);
         // Force remove the container (ignores errors if already removed)
    -    await execPromise(`docker rm -f ${containerId} || true`);
    +    await execFilePromise('docker', ['rm', '-f', safeId]);
         logger.info(
           `Successfully issued stop/remove commands for container: ${containerId}`
         );
    @@ -54,10 +58,15 @@ export function safeExecNodeInContainer({
       command?: string;
     }): NodeExecResult {
       const runStart = Date.now();
    -
    +  // Sanitize command
    +  const safeCmd = sanitizeShellCommand(command);
    +  if (!safeCmd) {
    +    return { output: null, error: new Error('Invalid command'), duration: 0 };
    +  }
       try {
    -    const output = execSync(
    -      `docker exec ${containerId} /bin/sh -c  ${JSON.stringify(command)}`,
    +    const output = execFileSync(
    +      'docker',
    +      ['exec', containerId, '/bin/sh', '-c', safeCmd],
           { encoding: 'utf8', timeout: timeoutMs }
         );
         return { output, error: null, duration: Date.now() - runStart };
    
  • src/tools/exec.ts+22 8 modified
    @@ -1,7 +1,12 @@
     import { z } from 'zod';
    -import { execSync } from 'node:child_process';
    +import { execFileSync } from 'node:child_process';
     import { type McpResponse, textContent } from '../types.ts';
    -import { DOCKER_NOT_RUNNING_ERROR, isDockerRunning } from '../utils.ts';
    +import {
    +  DOCKER_NOT_RUNNING_ERROR,
    +  isDockerRunning,
    +  sanitizeContainerId,
    +  sanitizeShellCommand,
    +} from '../utils.ts';
     
     export const argSchema = {
       container_id: z.string(),
    @@ -15,6 +20,13 @@ export default async function execInSandbox({
       container_id: string;
       commands: string[];
     }): Promise<McpResponse> {
    +  const validId = sanitizeContainerId(container_id);
    +  if (!validId) {
    +    return {
    +      content: [textContent('Invalid container ID')],
    +    };
    +  }
    +
       if (!isDockerRunning()) {
         return {
           content: [textContent(DOCKER_NOT_RUNNING_ERROR)],
    @@ -23,13 +35,15 @@ export default async function execInSandbox({
     
       const output: string[] = [];
       for (const cmd of commands) {
    +    const sanitizedCmd = sanitizeShellCommand(cmd);
    +    if (!sanitizedCmd)
    +      throw new Error(
    +        'Cannot run command as it contains dangerous metacharacters'
    +      );
         output.push(
    -      execSync(
    -        `docker exec ${container_id} /bin/sh -c ${JSON.stringify(cmd)}`,
    -        {
    -          encoding: 'utf8',
    -        }
    -      )
    +      execFileSync('docker', ['exec', validId, '/bin/sh', '-c', sanitizedCmd], {
    +        encoding: 'utf8',
    +      })
         );
       }
       return { content: [textContent(output.join('\n'))] };
    
  • src/tools/initialize.ts+23 10 modified
    @@ -1,5 +1,5 @@
     import { z } from 'zod';
    -import { execSync } from 'node:child_process';
    +import { execFileSync } from 'node:child_process';
     import { randomUUID } from 'node:crypto';
     import { type McpResponse, textContent } from '../types.ts';
     import {
    @@ -11,6 +11,7 @@ import {
     import { getMountFlag } from '../runUtils.ts';
     import { activeSandboxContainers } from '../containerUtils.ts';
     import { logger } from '../logger.ts';
    +import stopSandbox from './stop.ts';
     
     // Instead of importing serverRunId directly, we'll have a variable that gets set
     let serverRunId = 'unknown';
    @@ -52,17 +53,29 @@ export default async function initializeSandbox({
         `mcp-server-run-id=${serverRunId}`,
         `mcp-creation-timestamp=${creationTimestamp}`,
       ];
    -  const labelArgs = labels.map((label) => `--label "${label}"`).join(' ');
       const { memFlag, cpuFlag } = computeResourceLimits(image);
       const mountFlag = getMountFlag();
     
       try {
    -    execSync(
    -      `docker run -d ${portOption} ${memFlag} ${cpuFlag} ` +
    -      `--workdir /workspace ${mountFlag} ` +
    -      `${labelArgs} ` + // Add labels here
    -      `--name ${containerId} ${image} tail -f /dev/null`
    -    );
    +    const args = [
    +      'run',
    +      '-d',
    +      ...portOption.split(' '),
    +      ...memFlag.split(' '),
    +      ...cpuFlag.split(' '),
    +      '--workdir',
    +      '/workspace',
    +      ...mountFlag.split(' '),
    +      ...labels.flatMap((label) => ['--label', label]),
    +      '--name',
    +      containerId,
    +      image,
    +      'tail',
    +      '-f',
    +      '/dev/null',
    +    ].filter(Boolean);
    +
    +    execFileSync('docker', args, { stdio: 'ignore' });
     
         // Register the container only after successful creation
         activeSandboxContainers.set(containerId, creationTimestamp);
    @@ -73,9 +86,9 @@ export default async function initializeSandbox({
         };
       } catch (error) {
         logger.error(`Failed to initialize container ${containerId}`, error);
    -    // Ensure partial cleanup if execSync fails after container might be created but before registration
    +    // Ensure partial cleanup if execFileSync fails after container might be created but before registration
         try {
    -      execSync(`docker rm -f ${containerId}`);
    +      stopSandbox({ container_id: containerId });
         } catch (cleanupError: unknown) {
           // Ignore cleanup errors - log it just in case
           logger.warning(
    
  • src/tools/runJsEphemeral.ts+30 13 modified
    @@ -1,5 +1,5 @@
     import { z } from 'zod';
    -import { execSync } from 'child_process';
    +import { execFileSync } from 'child_process';
     import tmp from 'tmp';
     import { randomUUID } from 'crypto';
     import { type McpResponse, textContent } from '../types.ts';
    @@ -35,7 +35,7 @@ export const argSchema = {
         .default(DEFAULT_NODE_IMAGE)
         .describe(
           'Docker image to use for ephemeral execution. e.g. ' +
    -      generateSuggestedImages()
    +        generateSuggestedImages()
         ),
       // We use an array of { name, version } items instead of a record
       // because the OpenAI function-calling schema doesn’t reliably support arbitrary
    @@ -47,8 +47,8 @@ export const argSchema = {
         .default([])
         .describe(
           'A list of npm dependencies to install before running the code. ' +
    -      'Each item must have a `name` (package) and `version` (range). ' +
    -      'If none, returns an empty array.'
    +        'Each item must have a `name` (package) and `version` (range). ' +
    +        'If none, returns an empty array.'
         ),
       code: z
         .string()
    @@ -79,15 +79,31 @@ export default async function runJsEphemeral({
     
       try {
         // Start an ephemeral container
    -    execSync(
    -      `docker run -d --network host ${memFlag} ${cpuFlag} ` +
    -      `--workdir /workspace ${mountFlag} ` +
    -      `--name ${containerId} ${image} tail -f /dev/null`
    -    );
    +    execFileSync('docker', [
    +      'run',
    +      '-d',
    +      '--network',
    +      'host',
    +      ...memFlag.split(' ').filter(Boolean),
    +      ...cpuFlag.split(' ').filter(Boolean),
    +      '--workdir',
    +      '/workspace',
    +      ...mountFlag.split(' ').filter(Boolean),
    +      '--name',
    +      containerId,
    +      image,
    +      'tail',
    +      '-f',
    +      '/dev/null',
    +    ]);
     
         // Prepare workspace locally
         const localWorkspace = await prepareWorkspace({ code, dependenciesRecord });
    -    execSync(`docker cp ${localWorkspace.name}/. ${containerId}:/workspace`);
    +    execFileSync('docker', [
    +      'cp',
    +      `${localWorkspace.name}/.`,
    +      `${containerId}:/workspace`,
    +    ]);
     
         // Generate snapshot of the workspace
         const snapshotStartTime = Date.now();
    @@ -99,8 +115,9 @@ export default async function runJsEphemeral({
     
         if (dependencies.length > 0) {
           const installStart = Date.now();
    -      const installOutput = execSync(
    -        `docker exec ${containerId} /bin/sh -c ${JSON.stringify(installCmd)}`,
    +      const installOutput = execFileSync(
    +        'docker',
    +        ['exec', containerId, '/bin/sh', '-c', installCmd],
             { encoding: 'utf8' }
           );
           telemetry.installTimeMs = Date.now() - installStart;
    @@ -134,7 +151,7 @@ export default async function runJsEphemeral({
           ],
         };
       } finally {
    -    execSync(`docker rm -f ${containerId}`);
    +    execFileSync('docker', ['rm', '-f', containerId]);
         tmpDir.removeCallback();
       }
     }
    
  • src/tools/runJs.ts+28 11 modified
    @@ -1,11 +1,12 @@
     import { z } from 'zod';
    -import { execSync } from 'node:child_process';
    +import { execFileSync } from 'node:child_process';
     import { type McpResponse, textContent } from '../types.ts';
     import { prepareWorkspace } from '../runUtils.ts';
     import {
       DOCKER_NOT_RUNNING_ERROR,
       isDockerRunning,
       waitForPortHttp,
    +  sanitizeContainerId,
     } from '../utils.ts';
     import {
       changesToMcpContent,
    @@ -60,6 +61,11 @@ export default async function runJs({
       dependencies?: DependenciesArray;
       listenOnPort?: number;
     }): Promise<McpResponse> {
    +  const validId = sanitizeContainerId(container_id);
    +  if (!validId) {
    +    return { content: [textContent('Invalid container ID')] };
    +  }
    +
       if (!isDockerRunning()) {
         return { content: [textContent(DOCKER_NOT_RUNNING_ERROR)] };
       }
    @@ -71,7 +77,11 @@ export default async function runJs({
     
       // Create workspace in container
       const localWorkspace = await prepareWorkspace({ code, dependenciesRecord });
    -  execSync(`docker cp ${localWorkspace.name}/. ${container_id}:/workspace`);
    +  execFileSync('docker', [
    +    'cp',
    +    `${localWorkspace.name}/.`,
    +    `${validId}:/workspace`,
    +  ]);
     
       let rawOutput: string = '';
     
    @@ -82,10 +92,15 @@ export default async function runJs({
       if (listenOnPort) {
         if (dependencies.length > 0) {
           const installStart = Date.now();
    -      const installOutput = execSync(
    -        `docker exec ${container_id} /bin/sh -c ${JSON.stringify(
    -          `npm install --omit=dev --prefer-offline --no-audit --loglevel=error`
    -        )}`,
    +      const installOutput = execFileSync(
    +        'docker',
    +        [
    +          'exec',
    +          validId,
    +          '/bin/sh',
    +          '-c',
    +          'npm install --omit=dev --prefer-offline --no-audit --loglevel=error',
    +        ],
             { encoding: 'utf8' }
           );
           telemetry.installTimeMs = Date.now() - installStart;
    @@ -96,7 +111,7 @@ export default async function runJs({
         }
     
         const { error, duration } = safeExecNodeInContainer({
    -      containerId: container_id,
    +      containerId: validId,
           command: `nohup node index.js > output.log 2>&1 &`,
         });
         telemetry.runTimeMs = duration;
    @@ -107,9 +122,11 @@ export default async function runJs({
       } else {
         if (dependencies.length > 0) {
           const installStart = Date.now();
    -      const fullCmd = `npm install --omit=dev --prefer-offline --no-audit --loglevel=error`;
    -      const installOutput = execSync(
    -        `docker exec ${container_id} /bin/sh -c ${JSON.stringify(fullCmd)}`,
    +      const fullCmd =
    +        'npm install --omit=dev --prefer-offline --no-audit --loglevel=error';
    +      const installOutput = execFileSync(
    +        'docker',
    +        ['exec', validId, '/bin/sh', '-c', fullCmd],
             { encoding: 'utf8' }
           );
           telemetry.installTimeMs = Date.now() - installStart;
    @@ -120,7 +137,7 @@ export default async function runJs({
         }
     
         const { output, error, duration } = safeExecNodeInContainer({
    -      containerId: container_id,
    +      containerId: validId,
         });
     
         if (output) rawOutput = output;
    
  • src/tools/stop.ts+20 10 modified
    @@ -1,10 +1,16 @@
     import { z } from 'zod';
    -import { execSync } from 'node:child_process';
    +import { execFileSync } from 'node:child_process';
     import { type McpResponse, textContent } from '../types.ts';
    -import { DOCKER_NOT_RUNNING_ERROR, isDockerRunning } from '../utils.ts';
    +import {
    +  DOCKER_NOT_RUNNING_ERROR,
    +  isDockerRunning,
    +  sanitizeContainerId,
    +} from '../utils.ts';
     import { activeSandboxContainers } from '../containerUtils.ts';
     
    -export const argSchema = { container_id: z.string() };
    +export const argSchema = {
    +  container_id: z.string().regex(/^[a-zA-Z0-9_.-]+$/, 'Invalid container ID'),
    +};
     
     export default async function stopSandbox({
       container_id,
    @@ -17,13 +23,17 @@ export default async function stopSandbox({
         };
       }
     
    +  const validId = sanitizeContainerId(container_id);
    +  if (!validId) {
    +    return {
    +      content: [textContent('Invalid container ID')],
    +    };
    +  }
    +
       try {
    -    // Directly use execSync for removing the container as expected by the test
    -    execSync(`docker rm -f ${container_id}`);
    -    activeSandboxContainers.delete(container_id);
    -    // console.log(
    -    //   `[stopSandbox] Removed container ${container_id} from registry.`
    -    // );
    +    // Use execFileSync with validated container_id
    +    execFileSync('docker', ['rm', '-f', validId]);
    +    activeSandboxContainers.delete(validId);
     
         return {
           content: [textContent(`Container ${container_id} removed.`)],
    @@ -36,7 +46,7 @@ export default async function stopSandbox({
         );
     
         // Still remove from our registry even if Docker command failed
    -    activeSandboxContainers.delete(container_id);
    +    activeSandboxContainers.delete(validId);
     
         return {
           content: [
    
  • src/utils.ts+46 3 modified
    @@ -1,5 +1,5 @@
     import { existsSync, readFileSync } from 'fs';
    -import { execSync } from 'node:child_process';
    +import { execFileSync } from 'node:child_process';
     import { getConfig } from './config.ts';
     
     export function isRunningInDocker() {
    @@ -55,7 +55,7 @@ export const suggestedImages = {
         description: 'Node.js LTS version, slim variant.',
         reason: 'Lightweight and fast for JavaScript execution tasks.',
       },
    -  'mcr.microsoft.com/playwright:v1.52.0-noble': {
    +  'mcr.microsoft.com/playwright:v1.53.2-noble': {
         description: 'Playwright image for browser automation.',
         reason: 'Preconfigured for running Playwright scripts.',
       },
    @@ -103,7 +103,7 @@ export async function waitForPortHttp(
     
     export function isDockerRunning() {
       try {
    -    execSync('docker info');
    +    execFileSync('docker', ['info']);
         return true;
         // eslint-disable-next-line @typescript-eslint/no-unused-vars
       } catch (e) {
    @@ -141,3 +141,46 @@ export function computeResourceLimits(image: string) {
         cpuFlag: cpus ? `--cpus ${cpus}` : '',
       };
     }
    +
    +/**
    + * Sanitizes and validates a Docker container ID or name.
    + * Docker container names/IDs must match [a-zA-Z0-9][a-zA-Z0-9_.-]*
    + * @param id The container ID or name to validate
    + * @returns The sanitized ID if valid, otherwise null
    + */
    +export function sanitizeContainerId(id: string): string | null {
    +  // Docker container names/IDs: https://docs.docker.com/engine/reference/commandline/run/#container-name
    +  // Allow alphanumerics, underscores, periods, dashes. Must start with alphanumeric.
    +  if (typeof id !== 'string') return null;
    +  if (/^[a-zA-Z0-9][a-zA-Z0-9_.-]*$/.test(id)) return id;
    +  return null;
    +}
    +
    +/**
    + * Sanitizes and validates a Docker image name (optionally with tag).
    + * @param image The image name to validate
    + * @returns The sanitized image name if valid, otherwise null
    + */
    +export function sanitizeImageName(image: string): string | null {
    +  // Docker image names: [registry/][user/]repo[:tag]
    +  // Allow alphanumerics, underscores, periods, dashes, slashes, colons
    +  if (typeof image !== 'string') return null;
    +  if (/^[a-zA-Z0-9_.:/-]+$/.test(image)) return image;
    +  return null;
    +}
    +
    +/**
    + * Sanitizes a shell command to be run inside a container. This is a basic check;
    + * for more advanced needs, consider whitelisting allowed commands.
    + * @param cmd The command string
    + * @returns The sanitized command if valid, otherwise null
    + */
    +export function sanitizeShellCommand(cmd: string): string | null {
    +  // For now, just check it's a non-empty string and doesn't contain dangerous metacharacters
    +  if (typeof cmd !== 'string' || !cmd.trim()) return null;
    +  // Disallow command substitution (backticks and $()) which are most dangerous
    +  if (/[`]|\$\([^)]+\)/.test(cmd)) return null;
    +  // Allow >, <, &, | for redirection and backgrounding, as needed for listenOnPort
    +  // Still block backticks and $() for command substitution
    +  return cmd;
    +}
    
  • test/execInSandbox.test.ts+63 1 modified
    @@ -1,4 +1,12 @@
    -import { describe, it, expect, beforeAll, afterAll } from 'vitest';
    +import {
    +  describe,
    +  it,
    +  expect,
    +  beforeAll,
    +  afterAll,
    +  beforeEach,
    +  afterEach,
    +} from 'vitest';
     import initializeSandbox from '../src/tools/initialize.ts';
     import execInSandbox from '../src/tools/exec.ts';
     import stopSandbox from '../src/tools/stop.ts';
    @@ -76,3 +84,57 @@ describe('execInSandbox', () => {
         }
       });
     });
    +
    +describe('Command injection prevention', () => {
    +  beforeEach(() => {
    +    vi.doMock('node:child_process', () => ({
    +      execFileSync: vi.fn(() => Buffer.from('')),
    +      execFile: vi.fn(() => Buffer.from('')),
    +    }));
    +  });
    +
    +  afterEach(() => {
    +    vi.resetModules();
    +    vi.resetAllMocks();
    +    vi.restoreAllMocks();
    +  });
    +
    +  const dangerousIds = [
    +    '$(touch /tmp/pwned)',
    +    '`touch /tmp/pwned`',
    +    'bad;id',
    +    'js-sbx-123 && rm -rf /',
    +    'js-sbx-123 | echo hacked',
    +    'js-sbx-123 > /tmp/pwned',
    +    'js-sbx-123 $(id)',
    +    'js-sbx-123; echo pwned',
    +    'js-sbx-123`echo pwned`',
    +    'js-sbx-123/../../etc/passwd',
    +    'js-sbx-123\nrm -rf /',
    +    '',
    +    ' ',
    +    'js-sbx-123$',
    +    'js-sbx-123#',
    +  ];
    +
    +  dangerousIds.forEach((payload) => {
    +    it(`should reject dangerous container_id: "${payload}"`, async () => {
    +      const { default: execInSandbox } = await import('../src/tools/exec.ts');
    +      const childProcess = await import('node:child_process');
    +      const result = await execInSandbox({
    +        container_id: payload,
    +        commands: ['echo test'],
    +      });
    +      expect(result).toEqual({
    +        content: [
    +          {
    +            type: 'text',
    +            text: 'Invalid container ID',
    +          },
    +        ],
    +      });
    +      const execFileSyncCall = vi.mocked(childProcess.execFileSync).mock.calls;
    +      expect(execFileSyncCall.length).toBe(0);
    +    });
    +  });
    +});
    
  • test/initializeSandbox.test.ts+13 7 modified
    @@ -16,7 +16,7 @@ describe('initializeSandbox', () => {
       beforeEach(() => {
         vi.resetAllMocks();
         vi.spyOn(crypto, 'randomUUID').mockReturnValue(fakeUUID);
    -    vi.spyOn(childProcess, 'execSync').mockImplementation(() =>
    +    vi.spyOn(childProcess, 'execFileSync').mockImplementation(() =>
           Buffer.from('')
         );
         vi.spyOn(types, 'textContent').mockImplementation((name) => ({
    @@ -40,10 +40,14 @@ describe('initializeSandbox', () => {
     
       it('should use the default image when none is provided', async () => {
         const result = await initializeSandbox({});
    -    expect(childProcess.execSync).toHaveBeenCalledWith(
    -      expect.stringContaining(
    -        `--name ${fakeContainerName} ${utils.DEFAULT_NODE_IMAGE}`
    -      )
    +    expect(childProcess.execFileSync).toHaveBeenCalledWith(
    +      'docker',
    +      expect.arrayContaining([
    +        '--name',
    +        fakeContainerName,
    +        utils.DEFAULT_NODE_IMAGE,
    +      ]),
    +      expect.any(Object)
         );
         expect(result).toEqual({
           content: [{ type: 'text', text: fakeContainerName }],
    @@ -53,8 +57,10 @@ describe('initializeSandbox', () => {
       it('should use the provided image', async () => {
         const customImage = 'node:20-alpine';
         const result = await initializeSandbox({ image: customImage });
    -    expect(childProcess.execSync).toHaveBeenCalledWith(
    -      expect.stringContaining(`--name ${fakeContainerName} ${customImage}`)
    +    expect(childProcess.execFileSync).toHaveBeenCalledWith(
    +      'docker',
    +      expect.arrayContaining(['--name', fakeContainerName, customImage]),
    +      expect.any(Object)
         );
         if (result.content[0].type === 'text') {
           expect(result.content[0].text).toBe(fakeContainerName);
    
  • test/initialize.test.ts+45 39 modified
    @@ -1,17 +1,15 @@
     import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
    -import { setServerRunId } from '../src/tools/initialize.ts';
     import * as childProcess from 'node:child_process';
     import * as utils from '../src/utils.ts';
     
    -vi.mock('node:child_process');
    +vi.mock('node:child_process', () => ({
    +  execFileSync: vi.fn(() => Buffer.from('')),
    +}));
     vi.mock('../src/utils');
     vi.mocked(utils).computeResourceLimits = vi
       .fn()
       .mockReturnValue({ memFlag: '', cpuFlag: '' });
    -vi.mock('../src/runUtils', () => ({
    -    getFilesDir: vi.fn().mockReturnValue(undefined),
    -    getMountFlag: vi.fn().mockReturnValue(''),
    -  }));
    +
     vi.mock('../src/containerUtils', () => ({
       activeSandboxContainers: new Map(),
     }));
    @@ -24,9 +22,6 @@ describe('initialize module', () => {
           memFlag: '',
           cpuFlag: '',
         });
    -    vi.spyOn(childProcess, 'execSync').mockImplementation(() =>
    -      Buffer.from('')
    -    );
       });
     
       afterEach(() => {
    @@ -35,10 +30,14 @@ describe('initialize module', () => {
     
       describe('setServerRunId', () => {
         it('should set the server run ID correctly', async () => {
    -      // Import the module that uses the serverRunId
    -      const { default: initializeSandbox } = await import(
    -        '../src/tools/initialize.ts'
    -      );
    +      vi.doMock('../src/runUtils', () => ({
    +        getFilesDir: vi.fn().mockReturnValue(''),
    +        getMountFlag: vi.fn().mockReturnValue(''),
    +      }));
    +      vi.resetModules();
    +      const mod = await import('../src/tools/initialize.ts');
    +      const initializeSandbox = mod.default;
    +      const setServerRunId = mod.setServerRunId;
     
           // Set a test server run ID
           const testId = 'test-server-run-id';
    @@ -47,16 +46,21 @@ describe('initialize module', () => {
           // Call initialize function to create a container
           await initializeSandbox({});
     
    -      // Verify that execSync was called with the correct label containing our test ID
    -      expect(childProcess.execSync).toHaveBeenCalled();
    -      const execSyncCall = vi.mocked(childProcess.execSync).mock
    -        .calls[0][0] as string;
    -
    -      expect(execSyncCall).toContain(`--label "mcp-server-run-id=${testId}"`);
    +      // Verify that execFileSync was called with the correct label containing our test ID
    +      expect(childProcess.execFileSync).toHaveBeenCalled();
    +      const execFileSyncCall = vi.mocked(childProcess.execFileSync).mock
    +        .calls[0][1] as string[];
    +      // Join the args array to a string for easier matching
    +      expect(execFileSyncCall.join(' ')).toContain(
    +        `--label mcp-server-run-id=${testId}`
    +      );
         });
     
         it('should use unknown as the default server run ID if not set', async () => {
    -      // Force re-import of the module to reset the serverRunId
    +      vi.doMock('../src/runUtils', () => ({
    +        getFilesDir: vi.fn().mockReturnValue(''),
    +        getMountFlag: vi.fn().mockReturnValue(''),
    +      }));
           vi.resetModules();
           const { default: initializeSandbox } = await import(
             '../src/tools/initialize.ts'
    @@ -65,47 +69,49 @@ describe('initialize module', () => {
           // Call initialize without setting the server run ID
           await initializeSandbox({});
     
    -      // Verify that execSync was called with the default "unknown" ID
    -      expect(childProcess.execSync).toHaveBeenCalled();
    -      const execSyncCall = vi.mocked(childProcess.execSync).mock
    -        .calls[0][0] as string;
    -
    -      expect(execSyncCall).toContain('--label "mcp-server-run-id=unknown"');
    +      // Verify that execFileSync was called with the default "unknown" ID
    +      expect(childProcess.execFileSync).toHaveBeenCalled();
    +      const execFileSyncCall = vi.mocked(childProcess.execFileSync).mock
    +        .calls[0][1] as string[];
    +      expect(execFileSyncCall.join(' ')).toContain(
    +        '--label mcp-server-run-id=unknown'
    +      );
         });
       });
    -  
    +
       describe('volume mount behaviour', () => {
         it('does NOT include a -v flag when FILES_DIR is unset', async () => {
    -
    +      vi.doMock('../src/runUtils', () => ({
    +        getFilesDir: vi.fn().mockReturnValue(''),
    +        getMountFlag: vi.fn().mockReturnValue(''),
    +      }));
    +      vi.resetModules();
           const { default: initializeSandbox } = await import(
             '../src/tools/initialize.ts'
           );
     
           await initializeSandbox({});
     
    -      const cmd = vi.mocked(childProcess.execSync).mock.calls[0][0] as string;
    -      expect(cmd).not.toContain('-v ');
    +      const args = vi.mocked(childProcess.execFileSync).mock
    +        .calls[0][1] as string[];
    +      expect(args.join(' ')).not.toContain('-v ');
         });
     
         it('includes the -v flag when getMountFlag returns one', async () => {
    -
           vi.doMock('../src/runUtils', () => ({
             getFilesDir: vi.fn().mockReturnValue('/host/dir'),
    -        getMountFlag: vi
    -          .fn()
    -          .mockReturnValue('-v /host/dir:/workspace/files'),
    +        getMountFlag: vi.fn().mockReturnValue('-v /host/dir:/workspace/files'),
           }));
    -      vi.resetModules(); 
    -
    +      vi.resetModules();
           const { default: initializeSandbox } = await import(
             '../src/tools/initialize.ts'
           );
     
           await initializeSandbox({});
     
    -      const cmd = vi.mocked(childProcess.execSync).mock.calls[0][0] as string;
    -      expect(cmd).toContain('-v /host/dir:/workspace/files');
    +      const args = vi.mocked(childProcess.execFileSync).mock
    +        .calls[0][1] as string[];
    +      expect(args.join(' ')).toContain('-v /host/dir:/workspace/files');
         });
       });
     });
    -
    
  • test/runJs-cache.test.ts+2 5 modified
    @@ -3,6 +3,7 @@ import * as tmp from 'tmp';
     import { execSync } from 'node:child_process';
     import runJs from '../src/tools/runJs.ts';
     import { DEFAULT_NODE_IMAGE } from '../src/utils.ts';
    +import { forceStopContainer } from '../src/dockerUtils.ts';
     
     function startSandboxContainer(): string {
       return execSync(
    @@ -12,10 +13,6 @@ function startSandboxContainer(): string {
     }
     let tmpDir: tmp.DirResult;
     
    -function stopSandboxContainer(containerId: string) {
    -  execSync(`docker rm -f ${containerId}`);
    -}
    -
     describe('runJs npm install benchmarking', () => {
       beforeEach(() => {
         tmpDir = tmp.dirSync({ unsafeCleanup: true });
    @@ -80,7 +77,7 @@ describe('runJs npm install benchmarking', () => {
             throw error; // Re-throw the error to fail the test
           }
         } finally {
    -      stopSandboxContainer(containerId);
    +      forceStopContainer(containerId);
         }
       }, 20_000);
     });
    
  • test/runJsEphemeral.test.ts+1 1 modified
    @@ -369,7 +369,7 @@ describe('runJsEphemeral', () => {
                 version: '^1.52.0',
               },
             ],
    -        image: 'mcr.microsoft.com/playwright:v1.52.0-noble',
    +        image: 'mcr.microsoft.com/playwright:v1.53.2-noble',
           });
     
           expect(result).toBeDefined();
    
  • test/runJs.test.ts+62 0 modified
    @@ -5,6 +5,14 @@ import runJs, { argSchema } from '../src/tools/runJs.ts';
     import initializeSandbox from '../src/tools/initialize.ts';
     import stopSandbox from '../src/tools/stop.ts';
     import type { McpContentText } from '../src/types.ts';
    +import { vi } from 'vitest';
    +// import * as childProcess from 'node:child_process'; // REMOVE THIS LINE
    +
    +// REMOVE THIS GLOBAL MOCK:
    +// vi.mock('node:child_process', () => ({
    +//   execFileSync: vi.fn(() => Buffer.from('')),
    +//   execFile: vi.fn(() => Buffer.from('')),
    +// }));
     
     describe('argSchema', () => {
       it('should accept code and container_id and set defaults', () => {
    @@ -217,3 +225,57 @@ describe('runJs basic execution', () => {
         expect(telemetryText).toBeDefined();
       });
     }, 10_000);
    +
    +describe('Command injection prevention', () => {
    +  beforeEach(() => {
    +    vi.doMock('node:child_process', () => ({
    +      execFileSync: vi.fn(() => Buffer.from('')),
    +      execFile: vi.fn(() => Buffer.from('')),
    +    }));
    +  });
    +
    +  afterEach(() => {
    +    vi.resetModules();
    +    vi.resetAllMocks();
    +    vi.restoreAllMocks();
    +  });
    +
    +  const dangerousIds = [
    +    '$(touch /tmp/pwned)',
    +    '`touch /tmp/pwned`',
    +    'bad;id',
    +    'js-sbx-123 && rm -rf /',
    +    'js-sbx-123 | echo hacked',
    +    'js-sbx-123 > /tmp/pwned',
    +    'js-sbx-123 $(id)',
    +    'js-sbx-123; echo pwned',
    +    'js-sbx-123`echo pwned`',
    +    'js-sbx-123/../../etc/passwd',
    +    'js-sbx-123\nrm -rf /',
    +    '',
    +    ' ',
    +    'js-sbx-123$',
    +    'js-sbx-123#',
    +  ];
    +
    +  dangerousIds.forEach((payload) => {
    +    it(`should reject dangerous container_id: "${payload}"`, async () => {
    +      const { default: runJs } = await import('../src/tools/runJs.ts');
    +      const childProcess = await import('node:child_process');
    +      const result = await runJs({
    +        container_id: payload,
    +        code: 'console.log("test")',
    +      });
    +      expect(result).toEqual({
    +        content: [
    +          {
    +            type: 'text',
    +            text: 'Invalid container ID',
    +          },
    +        ],
    +      });
    +      const execFileSyncCall = vi.mocked(childProcess.execFileSync).mock.calls;
    +      expect(execFileSyncCall.length).toBe(0);
    +    });
    +  });
    +});
    
  • test/stopSandbox.test.ts+81 41 modified
    @@ -1,79 +1,71 @@
    -import { describe, it, expect, vi, beforeEach } from 'vitest';
    +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
     import stopSandbox from '../src/tools/stop.ts';
     import * as childProcess from 'node:child_process';
    -import * as types from '../src/types.ts';
     import * as utils from '../src/utils.ts';
     
     vi.mock('node:child_process');
    -vi.mock('../src/types');
    -vi.mock('../src/utils');
    +vi.mock('../src/types', () => ({
    +  textContent: (text: string) => ({ type: 'text', text }),
    +}));
    +vi.mock('../src/utils.ts', async () => {
    +  const actual =
    +    await vi.importActual<typeof import('../src/utils.ts')>('../src/utils.ts');
    +  return {
    +    ...actual,
    +    DOCKER_NOT_RUNNING_ERROR: actual.DOCKER_NOT_RUNNING_ERROR,
    +    isDockerRunning: vi.fn(() => true),
    +    sanitizeContainerId: actual.sanitizeContainerId,
    +  };
    +});
     
     describe('stopSandbox', () => {
    -  const fakeContainerId = 'js-sbx-abc123';
    +  const fakeContainerId = 'js-sbx-test123'; // valid container ID
     
       beforeEach(() => {
         vi.resetAllMocks();
    -    vi.spyOn(childProcess, 'execSync').mockImplementation(() =>
    +    vi.spyOn(childProcess, 'execFileSync').mockImplementation(() =>
           Buffer.from('')
         );
    -    vi.spyOn(types, 'textContent').mockImplementation((msg) => ({
    -      type: 'text',
    -      text: msg,
    -    }));
    -    vi.spyOn(utils, 'isDockerRunning').mockReturnValue(true);
    +  });
    +  afterEach(() => {
    +    vi.restoreAllMocks();
       });
     
       it('should remove the container with the given ID', async () => {
         const result = await stopSandbox({ container_id: fakeContainerId });
    -
    -    expect(childProcess.execSync).toHaveBeenCalledWith(
    -      `docker rm -f ${fakeContainerId}`
    -    );
    -
    +    expect(childProcess.execFileSync).toHaveBeenCalledWith('docker', [
    +      'rm',
    +      '-f',
    +      fakeContainerId,
    +    ]);
         expect(result).toEqual({
           content: [
    -        {
    -          type: 'text',
    -          text: `Container ${fakeContainerId} removed.`,
    -        },
    +        { type: 'text', text: `Container ${fakeContainerId} removed.` },
           ],
         });
       });
     
       it('should return an error message when Docker is not running', async () => {
         vi.mocked(utils.isDockerRunning).mockReturnValue(false);
    -
         const result = await stopSandbox({ container_id: fakeContainerId });
    -
    -    // Docker command should not be called
    -    expect(childProcess.execSync).not.toHaveBeenCalled();
    -
    +    expect(childProcess.execFileSync).not.toHaveBeenCalled();
         expect(result).toEqual({
    -      content: [
    -        {
    -          type: 'text',
    -          text: utils.DOCKER_NOT_RUNNING_ERROR,
    -        },
    -      ],
    +      content: [{ type: 'text', text: utils.DOCKER_NOT_RUNNING_ERROR }],
         });
       });
     
       it('should handle errors when removing the container', async () => {
         vi.spyOn(console, 'error').mockImplementation(() => {});
    -
         const errorMessage = 'Container not found';
    -    vi.mocked(childProcess.execSync).mockImplementation(() => {
    +    vi.mocked(childProcess.execFileSync).mockImplementation(() => {
           throw new Error(errorMessage);
         });
    -
    -    // Even with errors, the function should complete and return a response
         const result = await stopSandbox({ container_id: fakeContainerId });
    -
    -    expect(childProcess.execSync).toHaveBeenCalledWith(
    -      `docker rm -f ${fakeContainerId}`
    -    );
    -
    -    // Function should return an error message in the response
    +    expect(childProcess.execFileSync).toHaveBeenCalledWith('docker', [
    +      'rm',
    +      '-f',
    +      fakeContainerId,
    +    ]);
         expect(result).toEqual({
           content: [
             {
    @@ -83,4 +75,52 @@ describe('stopSandbox', () => {
           ],
         });
       });
    +
    +  it('should reject invalid container_id', async () => {
    +    const result = await stopSandbox({ container_id: 'bad;id$(rm -rf /)' });
    +    expect(result).toEqual({
    +      content: [
    +        {
    +          type: 'text',
    +          text: 'Invalid container ID',
    +        },
    +      ],
    +    });
    +    expect(childProcess.execFileSync).not.toHaveBeenCalled();
    +  });
    +});
    +
    +describe('Command injection prevention', () => {
    +  const dangerousIds = [
    +    '$(touch /tmp/pwned)',
    +    '`touch /tmp/pwned`',
    +    'bad;id',
    +    'js-sbx-123 && rm -rf /',
    +    'js-sbx-123 | echo hacked',
    +    'js-sbx-123 > /tmp/pwned',
    +    'js-sbx-123 $(id)',
    +    'js-sbx-123; echo pwned',
    +    'js-sbx-123`echo pwned`',
    +    'js-sbx-123/../../etc/passwd',
    +    'js-sbx-123\nrm -rf /',
    +    '',
    +    ' ',
    +    'js-sbx-123$',
    +    'js-sbx-123#',
    +  ];
    +
    +  dangerousIds.forEach((payload) => {
    +    it(`should reject dangerous container_id: "${payload}"`, async () => {
    +      const result = await stopSandbox({ container_id: payload });
    +      expect(result).toEqual({
    +        content: [
    +          {
    +            type: 'text',
    +            text: 'Invalid container ID',
    +          },
    +        ],
    +      });
    +      expect(childProcess.execFileSync).not.toHaveBeenCalled();
    +    });
    +  });
     });
    
  • test/utils.test.ts+67 4 modified
    @@ -5,6 +5,7 @@ import {
       isDockerRunning,
       preprocessDependencies,
     } from '../src/utils.ts';
    +import { containerExists, isContainerRunning } from './utils.ts';
     import * as childProcess from 'node:child_process';
     
     vi.mock('fs');
    @@ -95,21 +96,25 @@ describe('utils', () => {
     
       describe('isDockerRunning', () => {
         it('should return true when docker info command succeeds', () => {
    -      vi.spyOn(childProcess, 'execSync').mockImplementation(() =>
    +      vi.spyOn(childProcess, 'execFileSync').mockImplementation(() =>
             Buffer.from('')
           );
     
           expect(isDockerRunning()).toBe(true);
    -      expect(childProcess.execSync).toHaveBeenCalledWith('docker info');
    +      expect(childProcess.execFileSync).toHaveBeenCalledWith('docker', [
    +        'info',
    +      ]);
         });
     
         it('should return false when docker info command fails', () => {
    -      vi.spyOn(childProcess, 'execSync').mockImplementation(() => {
    +      vi.spyOn(childProcess, 'execFileSync').mockImplementation(() => {
             throw new Error('docker daemon not running');
           });
     
           expect(isDockerRunning()).toBe(false);
    -      expect(childProcess.execSync).toHaveBeenCalledWith('docker info');
    +      expect(childProcess.execFileSync).toHaveBeenCalledWith('docker', [
    +        'info',
    +      ]);
         });
       });
     
    @@ -157,3 +162,61 @@ describe('utils', () => {
         });
       });
     });
    +
    +describe('containerExists', () => {
    +  it('should return true for a valid container ID', () => {
    +    vi.spyOn(childProcess, 'execFileSync').mockImplementation(() =>
    +      Buffer.from('')
    +    );
    +    expect(containerExists('js-sbx-valid')).toBe(true);
    +    expect(childProcess.execFileSync).toHaveBeenCalledWith('docker', [
    +      'inspect',
    +      'js-sbx-valid',
    +    ]);
    +  });
    +
    +  it('should return false for a non-existent container ID', () => {
    +    vi.spyOn(childProcess, 'execFileSync').mockImplementation(() => {
    +      throw new Error('No such container');
    +    });
    +    expect(containerExists('not-a-real-container')).toBe(false);
    +  });
    +
    +  it('should return false for a malicious container ID', () => {
    +    vi.spyOn(childProcess, 'execFileSync').mockImplementation(() => {
    +      throw new Error('Invalid container ID');
    +    });
    +    expect(containerExists('bad;id$(rm -rf /)')).toBe(false);
    +  });
    +});
    +
    +describe('isContainerRunning', () => {
    +  it('should return true if docker inspect returns "true"', () => {
    +    vi.spyOn(childProcess, 'execFileSync').mockImplementation(() => 'true');
    +    expect(isContainerRunning('js-sbx-valid')).toBe(true);
    +    expect(childProcess.execFileSync).toHaveBeenCalledWith(
    +      'docker',
    +      ['inspect', '-f', '{{.State.Running}}', 'js-sbx-valid'],
    +      { encoding: 'utf8' }
    +    );
    +  });
    +
    +  it('should return false if docker inspect returns "false"', () => {
    +    vi.spyOn(childProcess, 'execFileSync').mockImplementation(() => 'false');
    +    expect(isContainerRunning('js-sbx-valid')).toBe(false);
    +  });
    +
    +  it('should return false for a non-existent container ID', () => {
    +    vi.spyOn(childProcess, 'execFileSync').mockImplementation(() => {
    +      throw new Error('No such container');
    +    });
    +    expect(isContainerRunning('not-a-real-container')).toBe(false);
    +  });
    +
    +  it('should return false for a malicious container ID', () => {
    +    vi.spyOn(childProcess, 'execFileSync').mockImplementation(() => {
    +      throw new Error('Invalid container ID');
    +    });
    +    expect(isContainerRunning('bad;id$(rm -rf /)')).toBe(false);
    +  });
    +});
    
  • test/utils.ts+5 5 modified
    @@ -1,14 +1,14 @@
    -import { execSync } from 'node:child_process';
    +import { execFileSync } from 'node:child_process';
     import { describe, it } from 'vitest';
     import path from 'path';
     /**
      * Utility to check if a Docker container is running
      */
     export function isContainerRunning(containerId: string): boolean {
       try {
    -    const output = execSync(
    -      // use double‑quotes so Windows cmd strips them correctly
    -      `docker inspect -f "{{.State.Running}}" ${containerId}`,
    +    const output = execFileSync(
    +      'docker',
    +      ['inspect', '-f', '{{.State.Running}}', containerId],
           { encoding: 'utf8' }
         ).trim();
         return output === 'true';
    @@ -22,7 +22,7 @@ export function isContainerRunning(containerId: string): boolean {
      */
     export function containerExists(containerId: string): boolean {
       try {
    -    execSync(`docker inspect ${containerId}`);
    +    execFileSync('docker', ['inspect', containerId]);
         return true;
       } catch {
         return false;
    
  • vitest.config.ts+7 0 added
    @@ -0,0 +1,7 @@
    +import { defineConfig } from 'vitest/config';
    +
    +export default defineConfig({
    +  test: {
    +    include: ['**/*.{test,spec}.{js,ts}'],
    +  },
    +});
    
a5e05fab06b2

fix: improved minor sanitizations

2 files changed · +9 4
  • src/dockerUtils.ts+2 2 modified
    @@ -3,7 +3,7 @@ import util from 'util';
     import { logger } from './logger.ts';
     import { getConfig } from './config.ts';
     import { textContent } from './types.ts';
    -import { sanitizeShellCommand } from './utils.ts';
    +import { sanitizeContainerId, sanitizeShellCommand } from './utils.ts';
     
     const execFilePromise = util.promisify(execFile);
     
    @@ -19,7 +19,7 @@ export async function forceStopContainer(containerId: string): Promise<void> {
       );
       try {
         // Sanitize containerId
    -    const safeId = sanitizeShellCommand(containerId);
    +    const safeId = sanitizeContainerId(containerId);
         if (!safeId) throw new Error('Invalid containerId');
         // Force stop the container (ignores errors if already stopped)
         await execFilePromise('docker', ['stop', safeId]);
    
  • src/tools/exec.ts+7 2 modified
    @@ -5,6 +5,7 @@ import {
       DOCKER_NOT_RUNNING_ERROR,
       isDockerRunning,
       sanitizeContainerId,
    +  sanitizeShellCommand,
     } from '../utils.ts';
     
     export const argSchema = {
    @@ -19,7 +20,6 @@ export default async function execInSandbox({
       container_id: string;
       commands: string[];
     }): Promise<McpResponse> {
    -  console.log('execInSandbox', container_id);
       const validId = sanitizeContainerId(container_id);
       if (!validId) {
         return {
    @@ -35,8 +35,13 @@ export default async function execInSandbox({
     
       const output: string[] = [];
       for (const cmd of commands) {
    +    const sanitizedCmd = sanitizeShellCommand(cmd);
    +    if (!sanitizedCmd)
    +      throw new Error(
    +        'Cannot run command as it contains dangerous metacharacters'
    +      );
         output.push(
    -      execFileSync('docker', ['exec', validId, '/bin/sh', '-c', cmd], {
    +      execFileSync('docker', ['exec', validId, '/bin/sh', '-c', sanitizedCmd], {
             encoding: 'utf8',
           })
         );
    
af860e2258a8

fix: command injection prevention tests

5 files changed · +185 8
  • src/tools/exec.ts+14 2 modified
    @@ -1,7 +1,11 @@
     import { z } from 'zod';
     import { execFileSync } from 'node:child_process';
     import { type McpResponse, textContent } from '../types.ts';
    -import { DOCKER_NOT_RUNNING_ERROR, isDockerRunning } from '../utils.ts';
    +import {
    +  DOCKER_NOT_RUNNING_ERROR,
    +  isDockerRunning,
    +  sanitizeContainerId,
    +} from '../utils.ts';
     
     export const argSchema = {
       container_id: z.string(),
    @@ -15,6 +19,14 @@ export default async function execInSandbox({
       container_id: string;
       commands: string[];
     }): Promise<McpResponse> {
    +  console.log('execInSandbox', container_id);
    +  const validId = sanitizeContainerId(container_id);
    +  if (!validId) {
    +    return {
    +      content: [textContent('Invalid container ID')],
    +    };
    +  }
    +
       if (!isDockerRunning()) {
         return {
           content: [textContent(DOCKER_NOT_RUNNING_ERROR)],
    @@ -24,7 +36,7 @@ export default async function execInSandbox({
       const output: string[] = [];
       for (const cmd of commands) {
         output.push(
    -      execFileSync('docker', ['exec', container_id, '/bin/sh', '-c', cmd], {
    +      execFileSync('docker', ['exec', validId, '/bin/sh', '-c', cmd], {
             encoding: 'utf8',
           })
         );
    
  • src/tools/runJs.ts+11 5 modified
    @@ -6,6 +6,7 @@ import {
       DOCKER_NOT_RUNNING_ERROR,
       isDockerRunning,
       waitForPortHttp,
    +  sanitizeContainerId,
     } from '../utils.ts';
     import {
       changesToMcpContent,
    @@ -60,6 +61,11 @@ export default async function runJs({
       dependencies?: DependenciesArray;
       listenOnPort?: number;
     }): Promise<McpResponse> {
    +  const validId = sanitizeContainerId(container_id);
    +  if (!validId) {
    +    return { content: [textContent('Invalid container ID')] };
    +  }
    +
       if (!isDockerRunning()) {
         return { content: [textContent(DOCKER_NOT_RUNNING_ERROR)] };
       }
    @@ -74,7 +80,7 @@ export default async function runJs({
       execFileSync('docker', [
         'cp',
         `${localWorkspace.name}/.`,
    -    `${container_id}:/workspace`,
    +    `${validId}:/workspace`,
       ]);
     
       let rawOutput: string = '';
    @@ -90,7 +96,7 @@ export default async function runJs({
             'docker',
             [
               'exec',
    -          container_id,
    +          validId,
               '/bin/sh',
               '-c',
               'npm install --omit=dev --prefer-offline --no-audit --loglevel=error',
    @@ -105,7 +111,7 @@ export default async function runJs({
         }
     
         const { error, duration } = safeExecNodeInContainer({
    -      containerId: container_id,
    +      containerId: validId,
           command: `nohup node index.js > output.log 2>&1 &`,
         });
         telemetry.runTimeMs = duration;
    @@ -120,7 +126,7 @@ export default async function runJs({
             'npm install --omit=dev --prefer-offline --no-audit --loglevel=error';
           const installOutput = execFileSync(
             'docker',
    -        ['exec', container_id, '/bin/sh', '-c', fullCmd],
    +        ['exec', validId, '/bin/sh', '-c', fullCmd],
             { encoding: 'utf8' }
           );
           telemetry.installTimeMs = Date.now() - installStart;
    @@ -131,7 +137,7 @@ export default async function runJs({
         }
     
         const { output, error, duration } = safeExecNodeInContainer({
    -      containerId: container_id,
    +      containerId: validId,
         });
     
         if (output) rawOutput = output;
    
  • test/execInSandbox.test.ts+63 1 modified
    @@ -1,4 +1,12 @@
    -import { describe, it, expect, beforeAll, afterAll } from 'vitest';
    +import {
    +  describe,
    +  it,
    +  expect,
    +  beforeAll,
    +  afterAll,
    +  beforeEach,
    +  afterEach,
    +} from 'vitest';
     import initializeSandbox from '../src/tools/initialize.ts';
     import execInSandbox from '../src/tools/exec.ts';
     import stopSandbox from '../src/tools/stop.ts';
    @@ -76,3 +84,57 @@ describe('execInSandbox', () => {
         }
       });
     });
    +
    +describe('Command injection prevention', () => {
    +  beforeEach(() => {
    +    vi.doMock('node:child_process', () => ({
    +      execFileSync: vi.fn(() => Buffer.from('')),
    +      execFile: vi.fn(() => Buffer.from('')),
    +    }));
    +  });
    +
    +  afterEach(() => {
    +    vi.resetModules();
    +    vi.resetAllMocks();
    +    vi.restoreAllMocks();
    +  });
    +
    +  const dangerousIds = [
    +    '$(touch /tmp/pwned)',
    +    '`touch /tmp/pwned`',
    +    'bad;id',
    +    'js-sbx-123 && rm -rf /',
    +    'js-sbx-123 | echo hacked',
    +    'js-sbx-123 > /tmp/pwned',
    +    'js-sbx-123 $(id)',
    +    'js-sbx-123; echo pwned',
    +    'js-sbx-123`echo pwned`',
    +    'js-sbx-123/../../etc/passwd',
    +    'js-sbx-123\nrm -rf /',
    +    '',
    +    ' ',
    +    'js-sbx-123$',
    +    'js-sbx-123#',
    +  ];
    +
    +  dangerousIds.forEach((payload) => {
    +    it(`should reject dangerous container_id: "${payload}"`, async () => {
    +      const { default: execInSandbox } = await import('../src/tools/exec.ts');
    +      const childProcess = await import('node:child_process');
    +      const result = await execInSandbox({
    +        container_id: payload,
    +        commands: ['echo test'],
    +      });
    +      expect(result).toEqual({
    +        content: [
    +          {
    +            type: 'text',
    +            text: 'Invalid container ID',
    +          },
    +        ],
    +      });
    +      const execFileSyncCall = vi.mocked(childProcess.execFileSync).mock.calls;
    +      expect(execFileSyncCall.length).toBe(0);
    +    });
    +  });
    +});
    
  • test/runJs.test.ts+62 0 modified
    @@ -5,6 +5,14 @@ import runJs, { argSchema } from '../src/tools/runJs.ts';
     import initializeSandbox from '../src/tools/initialize.ts';
     import stopSandbox from '../src/tools/stop.ts';
     import type { McpContentText } from '../src/types.ts';
    +import { vi } from 'vitest';
    +// import * as childProcess from 'node:child_process'; // REMOVE THIS LINE
    +
    +// REMOVE THIS GLOBAL MOCK:
    +// vi.mock('node:child_process', () => ({
    +//   execFileSync: vi.fn(() => Buffer.from('')),
    +//   execFile: vi.fn(() => Buffer.from('')),
    +// }));
     
     describe('argSchema', () => {
       it('should accept code and container_id and set defaults', () => {
    @@ -217,3 +225,57 @@ describe('runJs basic execution', () => {
         expect(telemetryText).toBeDefined();
       });
     }, 10_000);
    +
    +describe('Command injection prevention', () => {
    +  beforeEach(() => {
    +    vi.doMock('node:child_process', () => ({
    +      execFileSync: vi.fn(() => Buffer.from('')),
    +      execFile: vi.fn(() => Buffer.from('')),
    +    }));
    +  });
    +
    +  afterEach(() => {
    +    vi.resetModules();
    +    vi.resetAllMocks();
    +    vi.restoreAllMocks();
    +  });
    +
    +  const dangerousIds = [
    +    '$(touch /tmp/pwned)',
    +    '`touch /tmp/pwned`',
    +    'bad;id',
    +    'js-sbx-123 && rm -rf /',
    +    'js-sbx-123 | echo hacked',
    +    'js-sbx-123 > /tmp/pwned',
    +    'js-sbx-123 $(id)',
    +    'js-sbx-123; echo pwned',
    +    'js-sbx-123`echo pwned`',
    +    'js-sbx-123/../../etc/passwd',
    +    'js-sbx-123\nrm -rf /',
    +    '',
    +    ' ',
    +    'js-sbx-123$',
    +    'js-sbx-123#',
    +  ];
    +
    +  dangerousIds.forEach((payload) => {
    +    it(`should reject dangerous container_id: "${payload}"`, async () => {
    +      const { default: runJs } = await import('../src/tools/runJs.ts');
    +      const childProcess = await import('node:child_process');
    +      const result = await runJs({
    +        container_id: payload,
    +        code: 'console.log("test")',
    +      });
    +      expect(result).toEqual({
    +        content: [
    +          {
    +            type: 'text',
    +            text: 'Invalid container ID',
    +          },
    +        ],
    +      });
    +      const execFileSyncCall = vi.mocked(childProcess.execFileSync).mock.calls;
    +      expect(execFileSyncCall.length).toBe(0);
    +    });
    +  });
    +});
    
  • test/stopSandbox.test.ts+35 0 modified
    @@ -89,3 +89,38 @@ describe('stopSandbox', () => {
         expect(childProcess.execFileSync).not.toHaveBeenCalled();
       });
     });
    +
    +describe('Command injection prevention', () => {
    +  const dangerousIds = [
    +    '$(touch /tmp/pwned)',
    +    '`touch /tmp/pwned`',
    +    'bad;id',
    +    'js-sbx-123 && rm -rf /',
    +    'js-sbx-123 | echo hacked',
    +    'js-sbx-123 > /tmp/pwned',
    +    'js-sbx-123 $(id)',
    +    'js-sbx-123; echo pwned',
    +    'js-sbx-123`echo pwned`',
    +    'js-sbx-123/../../etc/passwd',
    +    'js-sbx-123\nrm -rf /',
    +    '',
    +    ' ',
    +    'js-sbx-123$',
    +    'js-sbx-123#',
    +  ];
    +
    +  dangerousIds.forEach((payload) => {
    +    it(`should reject dangerous container_id: "${payload}"`, async () => {
    +      const result = await stopSandbox({ container_id: payload });
    +      expect(result).toEqual({
    +        content: [
    +          {
    +            type: 'text',
    +            text: 'Invalid container ID',
    +          },
    +        ],
    +      });
    +      expect(childProcess.execFileSync).not.toHaveBeenCalled();
    +    });
    +  });
    +});
    

Vulnerability mechanics

Generated by null/stub on May 9, 2026. Inputs: CWE entries + fix-commit diffs from this CVE's patches. Citations validated against bundle.

References

6

News mentions

0

No linked articles in our index yet.