Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added
- Added ask sidebar to homepage. [#721](https://github.com/sourcebot-dev/sourcebot/pull/721)
- Added endpoint for searching commit history for a git repository. [#625](https://github.com/sourcebot-dev/sourcebot/pull/625)

## [4.10.17] - 2026-01-23

Expand Down
4 changes: 2 additions & 2 deletions packages/backend/src/repoIndexManager.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as Sentry from '@sentry/node';
import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db";
import { createLogger, Logger } from "@sourcebot/shared";
import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema } from '@sourcebot/shared';
import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema, getRepoPath } from '@sourcebot/shared';
import { existsSync } from 'fs';
import { readdir, rm } from 'fs/promises';
import { Job, Queue, ReservedJob, Worker } from "groupmq";
Expand All @@ -12,7 +12,7 @@ import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName,
import { captureEvent } from './posthog.js';
import { PromClient } from './promClient.js';
import { RepoWithConnections, Settings } from "./types.js";
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
import { getAuthCredentialsForRepo, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js';
import { indexGitRepository } from './zoekt.js';

const LOG_TAG = 'repo-index-manager';
Expand Down
19 changes: 0 additions & 19 deletions packages/backend/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,25 +53,6 @@ export const arraysEqualShallow = <T>(a?: readonly T[], b?: readonly T[]) => {
return true;
}

// @note: this function is duplicated in `packages/web/src/features/fileTree/actions.ts`.
// @todo: we should move this to a shared package.
export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } => {
// If we are dealing with a local repository, then use that as the path.
// Mark as read-only since we aren't guaranteed to have write access to the local filesystem.
const cloneUrl = new URL(repo.cloneUrl);
if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') {
return {
path: cloneUrl.pathname,
isReadOnly: true,
}
}

return {
path: path.join(REPOS_CACHE_DIR, repo.id.toString()),
isReadOnly: false,
}
}

export const getShardPrefix = (orgId: number, repoId: number) => {
return `${orgId}_${repoId}`;
}
Expand Down
4 changes: 2 additions & 2 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { Repo } from "@sourcebot/db";
import { createLogger, env } from "@sourcebot/shared";
import { createLogger, env, getRepoPath } from "@sourcebot/shared";
import { exec } from "child_process";
import { INDEX_CACHE_DIR } from "./constants.js";
import { Settings } from "./types.js";
import { getRepoPath, getShardPrefix } from "./utils.js";
import { getShardPrefix } from "./utils.js";

const logger = createLogger('zoekt');

Expand Down
4 changes: 4 additions & 0 deletions packages/mcp/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Added `search_commits` tool to search a repos commit history. [#625](https://github.com/sourcebot-dev/sourcebot/pull/625)
- Added `gitRevision` parameter to the `search_code` tool to allow for searching on different branches. [#625](https://github.com/sourcebot-dev/sourcebot/pull/625)

## [1.0.12] - 2026-01-13

### Fixed
Expand Down
18 changes: 18 additions & 0 deletions packages/mcp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,24 @@ Fetches the source code for a given file.
| `repoId` | yes | The Sourcebot repository ID. |
</details>

### search_commits

Searches for commits in a specific repository based on actual commit time.

<details>
<summary>Parameters</summary>

| Name | Required | Description |
|:-----------|:---------|:-----------------------------------------------------------------------------------------------|
| `repoId` | yes | Repository identifier: either numeric database ID (e.g., 123) or full repository name (e.g., "github.com/owner/repo") as returned by `list_repos`. |
| `query` | no | Search query to filter commits by message (case-insensitive). |
| `since` | no | Show commits after this date (by commit time). Supports ISO 8601 or relative formats. |
| `until` | no | Show commits before this date (by commit time). Supports ISO 8601 or relative formats. |
| `author` | no | Filter by author name or email (supports partial matches). |
| `maxCount` | no | Maximum number of commits to return (default: 50). |

</details>


## Supported Code Hosts
Sourcebot supports the following code hosts:
Expand Down
22 changes: 20 additions & 2 deletions packages/mcp/src/client.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { env } from './env.js';
import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema } from './schemas.js';
import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError } from './types.js';
import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema, searchCommitsResponseSchema } from './schemas.js';
import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError, SearchCommitsRequest, SearchCommitsResponse } from './types.js';
import { isServiceError } from './utils.js';

export const search = async (request: SearchRequest): Promise<SearchResponse | ServiceError> => {
Expand Down Expand Up @@ -52,3 +52,21 @@ export const getFileSource = async (request: FileSourceRequest): Promise<FileSou

return fileSourceResponseSchema.parse(result);
}

export const searchCommits = async (request: SearchCommitsRequest): Promise<SearchCommitsResponse | ServiceError> => {
const result = await fetch(`${env.SOURCEBOT_HOST}/api/commits`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Org-Domain': '~',
...(env.SOURCEBOT_API_KEY ? { 'X-Sourcebot-Api-Key': env.SOURCEBOT_API_KEY } : {})
},
body: JSON.stringify(request)
}).then(response => response.json());

if (isServiceError(result)) {
return result;
}

return searchCommitsResponseSchema.parse(result);
}
49 changes: 46 additions & 3 deletions packages/mcp/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import escapeStringRegexp from 'escape-string-regexp';
import { z } from 'zod';
import { listRepos, search, getFileSource } from './client.js';
import { getFileSource, listRepos, search, searchCommits } from './client.js';
import { env, numberSchema } from './env.js';
import { listReposRequestSchema } from './schemas.js';
import { TextContent } from './types.js';
Expand Down Expand Up @@ -49,6 +49,10 @@ server.tool(
.boolean()
.describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`)
.optional(),
gitRevision: z
.string()
.describe(`The git revision to search in (e.g., 'main', 'HEAD', 'v1.0.0', 'a1b2c3d'). If not provided, defaults to the default branch (usually 'main' or 'master').`)
.optional(),
maxTokens: numberSchema
.describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`)
.transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val))
Expand All @@ -61,6 +65,7 @@ server.tool(
maxTokens = env.DEFAULT_MINIMUM_TOKENS,
includeCodeSnippets = false,
caseSensitive = false,
gitRevision,
}) => {
if (repoIds.length > 0) {
query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`;
Expand All @@ -70,13 +75,17 @@ server.tool(
query += ` ( lang:${languages.join(' or lang:')} )`;
}

if (gitRevision) {
query += ` ( rev:${gitRevision} )`;
}

const response = await search({
query,
matches: env.DEFAULT_MATCHES,
contextLines: env.DEFAULT_CONTEXT_LINES,
isRegexEnabled: true,
isCaseSensitivityEnabled: caseSensitive,
source: 'mcp'
source: 'mcp',
});

if (isServiceError(response)) {
Expand Down Expand Up @@ -162,9 +171,43 @@ server.tool(
}
);

server.tool(
"search_commits",
`Searches for commits in a specific repository based on actual commit time. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`,
{
repoId: z.string().describe(`The repository to search commits in. This is the Sourcebot compatible repository ID as returned by 'list_repos'.`),
query: z.string().describe(`Search query to filter commits by message content (case-insensitive).`).optional(),
since: z.string().describe(`Show commits more recent than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').`).optional(),
until: z.string().describe(`Show commits older than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`).optional(),
author: z.string().describe(`Filter commits by author name or email (supports partial matches and patterns).`).optional(),
maxCount: z.number().int().positive().default(50).describe(`Maximum number of commits to return (default: 50).`),
},
async ({ repoId, query, since, until, author, maxCount }) => {
const result = await searchCommits({
repository: repoId,
query,
since,
until,
author,
maxCount,
});

if (isServiceError(result)) {
return {
content: [{ type: "text", text: `Error: ${result.message}` }],
isError: true,
};
}

return {
content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
};
}
);

server.tool(
"list_repos",
"Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.",
`Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`,
listReposRequestSchema.shape,
async ({ query, pageNumber = 1, limit = 50 }: {
query?: string;
Expand Down
21 changes: 20 additions & 1 deletion packages/mcp/src/schemas.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// @NOTE : Please keep this file in sync with @sourcebot/web/src/features/search/schemas.ts
// @NOTE : Please keep this file in sync with @sourcebot/web/src/features/search/types.ts
// At some point, we should move these to a shared package...
import { z } from "zod";

Expand Down Expand Up @@ -193,3 +193,22 @@ export const serviceErrorSchema = z.object({
errorCode: z.string(),
message: z.string(),
});

export const searchCommitsRequestSchema = z.object({
repository: z.string(),
query: z.string().optional(),
since: z.string().optional(),
until: z.string().optional(),
author: z.string().optional(),
maxCount: z.number().int().positive().max(500).optional(),
});

export const searchCommitsResponseSchema = z.array(z.object({
hash: z.string(),
date: z.string(),
message: z.string(),
refs: z.string(),
body: z.string(),
author_name: z.string(),
author_email: z.string(),
}));
5 changes: 5 additions & 0 deletions packages/mcp/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import {
fileSourceRequestSchema,
symbolSchema,
serviceErrorSchema,
searchCommitsRequestSchema,
searchCommitsResponseSchema,
} from "./schemas.js";
import { z } from "zod";

Expand All @@ -29,3 +31,6 @@ export type FileSourceResponse = z.infer<typeof fileSourceResponseSchema>;
export type TextContent = { type: "text", text: string };

export type ServiceError = z.infer<typeof serviceErrorSchema>;

export type SearchCommitsRequest = z.infer<typeof searchCommitsRequestSchema>;
export type SearchCommitsResponse = z.infer<typeof searchCommitsResponseSchema>;
73 changes: 70 additions & 3 deletions packages/shared/src/env.server.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import { indexSchema } from "@sourcebot/schemas/v3/index.schema";
import { SourcebotConfig } from "@sourcebot/schemas/v3/index.type";
import { createEnv } from "@t3-oss/env-core";
import { Ajv } from "ajv";
import { readFile } from 'fs/promises';
import stripJsonComments from "strip-json-comments";
import { z } from "zod";
import { loadConfig } from "./utils.js";
import { tenancyModeSchema } from "./types.js";
import { SourcebotConfig } from "@sourcebot/schemas/v3/index.type";
import { getTokenFromConfig } from "./crypto.js";
import { tenancyModeSchema } from "./types.js";

// Booleans are specified as 'true' or 'false' strings.
const booleanSchema = z.enum(["true", "false"]);
Expand All @@ -13,6 +16,10 @@ const booleanSchema = z.enum(["true", "false"]);
// @see: https://zod.dev/?id=coercion-for-primitives
const numberSchema = z.coerce.number();

const ajv = new Ajv({
validateFormats: false,
});

export const resolveEnvironmentVariableOverridesFromConfig = async (config: SourcebotConfig): Promise<Record<string, string>> => {
if (!config.environmentOverrides) {
return {};
Expand Down Expand Up @@ -45,6 +52,66 @@ export const resolveEnvironmentVariableOverridesFromConfig = async (config: Sour
return resolved;
}

export const isRemotePath = (path: string) => {
return path.startsWith('https://') || path.startsWith('http://');
}

export const loadConfig = async (configPath?: string): Promise<SourcebotConfig> => {
if (!configPath) {
throw new Error('CONFIG_PATH is required but not provided');
}

const configContent = await (async () => {
if (isRemotePath(configPath)) {
const response = await fetch(configPath);
if (!response.ok) {
throw new Error(`Failed to fetch config file ${configPath}: ${response.statusText}`);
}
return response.text();
} else {
// Retry logic for handling race conditions with mounted volumes
const maxAttempts = 5;
const retryDelayMs = 2000;
let lastError: Error | null = null;

for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
return await readFile(configPath, {
encoding: 'utf-8',
});
} catch (error) {
lastError = error as Error;

// Only retry on ENOENT errors (file not found)
if ((error as NodeJS.ErrnoException)?.code !== 'ENOENT') {
throw error; // Throw immediately for non-ENOENT errors
}

// Log warning before retry (except on the last attempt)
if (attempt < maxAttempts) {
console.warn(`Config file not found, retrying in 2s... (Attempt ${attempt}/${maxAttempts})`);
await new Promise(resolve => setTimeout(resolve, retryDelayMs));
}
}
}

// If we've exhausted all retries, throw the last ENOENT error
if (lastError) {
throw lastError;
}

throw new Error('Failed to load config after all retry attempts');
}
})();

const config = JSON.parse(stripJsonComments(configContent)) as SourcebotConfig;
const isValidConfig = ajv.validate(indexSchema, config);
if (!isValidConfig) {
throw new Error(`Config file '${configPath}' is invalid: ${ajv.errorsText(ajv.errors)}`);
}
return config;
}

// Merge process.env with environment variables resolved from config.json
const runtimeEnv = await (async () => {
const configPath = process.env.CONFIG_PATH;
Expand Down
5 changes: 3 additions & 2 deletions packages/shared/src/index.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,16 @@ export {
} from "./types.js";
export {
base64Decode,
loadConfig,
loadJsonFile,
isRemotePath,
getConfigSettings,
getRepoPath,
} from "./utils.js";
export * from "./constants.js";
export {
env,
resolveEnvironmentVariableOverridesFromConfig,
loadConfig,
isRemotePath,
} from "./env.server.js";
export {
createLogger,
Expand Down
Loading