diff --git a/CHANGELOG.md b/CHANGELOG.md index bb5440f47..4b3206dde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added ask sidebar to homepage. [#721](https://github.com/sourcebot-dev/sourcebot/pull/721) +- Added endpoint for searching commit history for a git repository. [#625](https://github.com/sourcebot-dev/sourcebot/pull/625) ## [4.10.17] - 2026-01-23 diff --git a/packages/backend/src/repoIndexManager.ts b/packages/backend/src/repoIndexManager.ts index e83a84d61..9f03c1d2b 100644 --- a/packages/backend/src/repoIndexManager.ts +++ b/packages/backend/src/repoIndexManager.ts @@ -1,7 +1,7 @@ import * as Sentry from '@sentry/node'; import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db"; import { createLogger, Logger } from "@sourcebot/shared"; -import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema } from '@sourcebot/shared'; +import { env, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata, repoMetadataSchema, getRepoPath } from '@sourcebot/shared'; import { existsSync } from 'fs'; import { readdir, rm } from 'fs/promises'; import { Job, Queue, ReservedJob, Worker } from "groupmq"; @@ -12,7 +12,7 @@ import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, import { captureEvent } from './posthog.js'; import { PromClient } from './promClient.js'; import { RepoWithConnections, Settings } from "./types.js"; -import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js'; +import { getAuthCredentialsForRepo, getShardPrefix, groupmqLifecycleExceptionWrapper, measure, setIntervalAsync } from './utils.js'; import { indexGitRepository } from './zoekt.js'; const LOG_TAG = 'repo-index-manager'; diff --git a/packages/backend/src/utils.ts b/packages/backend/src/utils.ts index c2e4c4959..163641420 100644 --- a/packages/backend/src/utils.ts +++ b/packages/backend/src/utils.ts @@ -53,25 +53,6 @@ export const arraysEqualShallow = (a?: readonly T[], b?: readonly T[]) => { return true; } -// @note: this function is duplicated in `packages/web/src/features/fileTree/actions.ts`. -// @todo: we should move this to a shared package. -export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } => { - // If we are dealing with a local repository, then use that as the path. - // Mark as read-only since we aren't guaranteed to have write access to the local filesystem. - const cloneUrl = new URL(repo.cloneUrl); - if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') { - return { - path: cloneUrl.pathname, - isReadOnly: true, - } - } - - return { - path: path.join(REPOS_CACHE_DIR, repo.id.toString()), - isReadOnly: false, - } -} - export const getShardPrefix = (orgId: number, repoId: number) => { return `${orgId}_${repoId}`; } diff --git a/packages/backend/src/zoekt.ts b/packages/backend/src/zoekt.ts index 7c802d89f..27f17d711 100644 --- a/packages/backend/src/zoekt.ts +++ b/packages/backend/src/zoekt.ts @@ -1,9 +1,9 @@ import { Repo } from "@sourcebot/db"; -import { createLogger, env } from "@sourcebot/shared"; +import { createLogger, env, getRepoPath } from "@sourcebot/shared"; import { exec } from "child_process"; import { INDEX_CACHE_DIR } from "./constants.js"; import { Settings } from "./types.js"; -import { getRepoPath, getShardPrefix } from "./utils.js"; +import { getShardPrefix } from "./utils.js"; const logger = createLogger('zoekt'); diff --git a/packages/mcp/CHANGELOG.md b/packages/mcp/CHANGELOG.md index 1a8d2cbd7..5eb028c03 100644 --- a/packages/mcp/CHANGELOG.md +++ b/packages/mcp/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added `search_commits` tool to search a repos commit history. [#625](https://github.com/sourcebot-dev/sourcebot/pull/625) +- Added `gitRevision` parameter to the `search_code` tool to allow for searching on different branches. [#625](https://github.com/sourcebot-dev/sourcebot/pull/625) + ## [1.0.12] - 2026-01-13 ### Fixed diff --git a/packages/mcp/README.md b/packages/mcp/README.md index a0a875a0f..cb718a8d7 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -208,6 +208,24 @@ Fetches the source code for a given file. | `repoId` | yes | The Sourcebot repository ID. | +### search_commits + +Searches for commits in a specific repository based on actual commit time. + +
+Parameters + +| Name | Required | Description | +|:-----------|:---------|:-----------------------------------------------------------------------------------------------| +| `repoId` | yes | Repository identifier: either numeric database ID (e.g., 123) or full repository name (e.g., "github.com/owner/repo") as returned by `list_repos`. | +| `query` | no | Search query to filter commits by message (case-insensitive). | +| `since` | no | Show commits after this date (by commit time). Supports ISO 8601 or relative formats. | +| `until` | no | Show commits before this date (by commit time). Supports ISO 8601 or relative formats. | +| `author` | no | Filter by author name or email (supports partial matches). | +| `maxCount` | no | Maximum number of commits to return (default: 50). | + +
+ ## Supported Code Hosts Sourcebot supports the following code hosts: diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index fdb3440e2..bf2a2c192 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1,6 +1,6 @@ import { env } from './env.js'; -import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema } from './schemas.js'; -import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError } from './types.js'; +import { listRepositoriesResponseSchema, searchResponseSchema, fileSourceResponseSchema, searchCommitsResponseSchema } from './schemas.js'; +import { FileSourceRequest, FileSourceResponse, ListRepositoriesResponse, SearchRequest, SearchResponse, ServiceError, SearchCommitsRequest, SearchCommitsResponse } from './types.js'; import { isServiceError } from './utils.js'; export const search = async (request: SearchRequest): Promise => { @@ -52,3 +52,21 @@ export const getFileSource = async (request: FileSourceRequest): Promise => { + const result = await fetch(`${env.SOURCEBOT_HOST}/api/commits`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-Org-Domain': '~', + ...(env.SOURCEBOT_API_KEY ? { 'X-Sourcebot-Api-Key': env.SOURCEBOT_API_KEY } : {}) + }, + body: JSON.stringify(request) + }).then(response => response.json()); + + if (isServiceError(result)) { + return result; + } + + return searchCommitsResponseSchema.parse(result); +} diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 43c08f248..d30c79cef 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -5,7 +5,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import escapeStringRegexp from 'escape-string-regexp'; import { z } from 'zod'; -import { listRepos, search, getFileSource } from './client.js'; +import { getFileSource, listRepos, search, searchCommits } from './client.js'; import { env, numberSchema } from './env.js'; import { listReposRequestSchema } from './schemas.js'; import { TextContent } from './types.js'; @@ -49,6 +49,10 @@ server.tool( .boolean() .describe(`Whether to include the code snippets in the response (default: false). If false, only the file's URL, repository, and language will be returned. Set to false to get a more concise response.`) .optional(), + gitRevision: z + .string() + .describe(`The git revision to search in (e.g., 'main', 'HEAD', 'v1.0.0', 'a1b2c3d'). If not provided, defaults to the default branch (usually 'main' or 'master').`) + .optional(), maxTokens: numberSchema .describe(`The maximum number of tokens to return (default: ${env.DEFAULT_MINIMUM_TOKENS}). Higher values provide more context but consume more tokens. Values less than ${env.DEFAULT_MINIMUM_TOKENS} will be ignored.`) .transform((val) => (val < env.DEFAULT_MINIMUM_TOKENS ? env.DEFAULT_MINIMUM_TOKENS : val)) @@ -61,6 +65,7 @@ server.tool( maxTokens = env.DEFAULT_MINIMUM_TOKENS, includeCodeSnippets = false, caseSensitive = false, + gitRevision, }) => { if (repoIds.length > 0) { query += ` ( repo:${repoIds.map(id => escapeStringRegexp(id)).join(' or repo:')} )`; @@ -70,13 +75,17 @@ server.tool( query += ` ( lang:${languages.join(' or lang:')} )`; } + if (gitRevision) { + query += ` ( rev:${gitRevision} )`; + } + const response = await search({ query, matches: env.DEFAULT_MATCHES, contextLines: env.DEFAULT_CONTEXT_LINES, isRegexEnabled: true, isCaseSensitivityEnabled: caseSensitive, - source: 'mcp' + source: 'mcp', }); if (isServiceError(response)) { @@ -162,9 +171,43 @@ server.tool( } ); +server.tool( + "search_commits", + `Searches for commits in a specific repository based on actual commit time. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`, + { + repoId: z.string().describe(`The repository to search commits in. This is the Sourcebot compatible repository ID as returned by 'list_repos'.`), + query: z.string().describe(`Search query to filter commits by message content (case-insensitive).`).optional(), + since: z.string().describe(`Show commits more recent than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-01-01') or relative formats (e.g., '30 days ago', 'last week').`).optional(), + until: z.string().describe(`Show commits older than this date. Filters by actual commit time. Supports ISO 8601 (e.g., '2024-12-31') or relative formats (e.g., 'yesterday').`).optional(), + author: z.string().describe(`Filter commits by author name or email (supports partial matches and patterns).`).optional(), + maxCount: z.number().int().positive().default(50).describe(`Maximum number of commits to return (default: 50).`), + }, + async ({ repoId, query, since, until, author, maxCount }) => { + const result = await searchCommits({ + repository: repoId, + query, + since, + until, + author, + maxCount, + }); + + if (isServiceError(result)) { + return { + content: [{ type: "text", text: `Error: ${result.message}` }], + isError: true, + }; + } + + return { + content: [{ type: "text", text: JSON.stringify(result, null, 2) }], + }; + } +); + server.tool( "list_repos", - "Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.", + `Lists repositories in the organization with optional filtering and pagination. If you receive an error that indicates that you're not authenticated, please inform the user to set the SOURCEBOT_API_KEY environment variable.`, listReposRequestSchema.shape, async ({ query, pageNumber = 1, limit = 50 }: { query?: string; diff --git a/packages/mcp/src/schemas.ts b/packages/mcp/src/schemas.ts index 510635792..00d9877b8 100644 --- a/packages/mcp/src/schemas.ts +++ b/packages/mcp/src/schemas.ts @@ -1,4 +1,4 @@ -// @NOTE : Please keep this file in sync with @sourcebot/web/src/features/search/schemas.ts +// @NOTE : Please keep this file in sync with @sourcebot/web/src/features/search/types.ts // At some point, we should move these to a shared package... import { z } from "zod"; @@ -193,3 +193,22 @@ export const serviceErrorSchema = z.object({ errorCode: z.string(), message: z.string(), }); + +export const searchCommitsRequestSchema = z.object({ + repository: z.string(), + query: z.string().optional(), + since: z.string().optional(), + until: z.string().optional(), + author: z.string().optional(), + maxCount: z.number().int().positive().max(500).optional(), +}); + +export const searchCommitsResponseSchema = z.array(z.object({ + hash: z.string(), + date: z.string(), + message: z.string(), + refs: z.string(), + body: z.string(), + author_name: z.string(), + author_email: z.string(), +})); diff --git a/packages/mcp/src/types.ts b/packages/mcp/src/types.ts index 9c858fe5b..720867a8f 100644 --- a/packages/mcp/src/types.ts +++ b/packages/mcp/src/types.ts @@ -10,6 +10,8 @@ import { fileSourceRequestSchema, symbolSchema, serviceErrorSchema, + searchCommitsRequestSchema, + searchCommitsResponseSchema, } from "./schemas.js"; import { z } from "zod"; @@ -29,3 +31,6 @@ export type FileSourceResponse = z.infer; export type TextContent = { type: "text", text: string }; export type ServiceError = z.infer; + +export type SearchCommitsRequest = z.infer; +export type SearchCommitsResponse = z.infer; diff --git a/packages/shared/src/env.server.ts b/packages/shared/src/env.server.ts index b0e5066a4..4da23b3ab 100644 --- a/packages/shared/src/env.server.ts +++ b/packages/shared/src/env.server.ts @@ -1,9 +1,12 @@ +import { indexSchema } from "@sourcebot/schemas/v3/index.schema"; +import { SourcebotConfig } from "@sourcebot/schemas/v3/index.type"; import { createEnv } from "@t3-oss/env-core"; +import { Ajv } from "ajv"; +import { readFile } from 'fs/promises'; +import stripJsonComments from "strip-json-comments"; import { z } from "zod"; -import { loadConfig } from "./utils.js"; -import { tenancyModeSchema } from "./types.js"; -import { SourcebotConfig } from "@sourcebot/schemas/v3/index.type"; import { getTokenFromConfig } from "./crypto.js"; +import { tenancyModeSchema } from "./types.js"; // Booleans are specified as 'true' or 'false' strings. const booleanSchema = z.enum(["true", "false"]); @@ -13,6 +16,10 @@ const booleanSchema = z.enum(["true", "false"]); // @see: https://zod.dev/?id=coercion-for-primitives const numberSchema = z.coerce.number(); +const ajv = new Ajv({ + validateFormats: false, +}); + export const resolveEnvironmentVariableOverridesFromConfig = async (config: SourcebotConfig): Promise> => { if (!config.environmentOverrides) { return {}; @@ -45,6 +52,66 @@ export const resolveEnvironmentVariableOverridesFromConfig = async (config: Sour return resolved; } +export const isRemotePath = (path: string) => { + return path.startsWith('https://') || path.startsWith('http://'); +} + +export const loadConfig = async (configPath?: string): Promise => { + if (!configPath) { + throw new Error('CONFIG_PATH is required but not provided'); + } + + const configContent = await (async () => { + if (isRemotePath(configPath)) { + const response = await fetch(configPath); + if (!response.ok) { + throw new Error(`Failed to fetch config file ${configPath}: ${response.statusText}`); + } + return response.text(); + } else { + // Retry logic for handling race conditions with mounted volumes + const maxAttempts = 5; + const retryDelayMs = 2000; + let lastError: Error | null = null; + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + return await readFile(configPath, { + encoding: 'utf-8', + }); + } catch (error) { + lastError = error as Error; + + // Only retry on ENOENT errors (file not found) + if ((error as NodeJS.ErrnoException)?.code !== 'ENOENT') { + throw error; // Throw immediately for non-ENOENT errors + } + + // Log warning before retry (except on the last attempt) + if (attempt < maxAttempts) { + console.warn(`Config file not found, retrying in 2s... (Attempt ${attempt}/${maxAttempts})`); + await new Promise(resolve => setTimeout(resolve, retryDelayMs)); + } + } + } + + // If we've exhausted all retries, throw the last ENOENT error + if (lastError) { + throw lastError; + } + + throw new Error('Failed to load config after all retry attempts'); + } + })(); + + const config = JSON.parse(stripJsonComments(configContent)) as SourcebotConfig; + const isValidConfig = ajv.validate(indexSchema, config); + if (!isValidConfig) { + throw new Error(`Config file '${configPath}' is invalid: ${ajv.errorsText(ajv.errors)}`); + } + return config; +} + // Merge process.env with environment variables resolved from config.json const runtimeEnv = await (async () => { const configPath = process.env.CONFIG_PATH; diff --git a/packages/shared/src/index.server.ts b/packages/shared/src/index.server.ts index 0bd73a089..1002400b3 100644 --- a/packages/shared/src/index.server.ts +++ b/packages/shared/src/index.server.ts @@ -20,15 +20,16 @@ export { } from "./types.js"; export { base64Decode, - loadConfig, loadJsonFile, - isRemotePath, getConfigSettings, + getRepoPath, } from "./utils.js"; export * from "./constants.js"; export { env, resolveEnvironmentVariableOverridesFromConfig, + loadConfig, + isRemotePath, } from "./env.server.js"; export { createLogger, diff --git a/packages/shared/src/utils.ts b/packages/shared/src/utils.ts index 4eb454bdb..1574e1d4f 100644 --- a/packages/shared/src/utils.ts +++ b/packages/shared/src/utils.ts @@ -1,15 +1,11 @@ -import { SourcebotConfig } from "@sourcebot/schemas/v3/index.type"; -import { indexSchema } from "@sourcebot/schemas/v3/index.schema"; import { readFile } from 'fs/promises'; import stripJsonComments from 'strip-json-comments'; -import { Ajv } from "ajv"; import { z } from "zod"; import { DEFAULT_CONFIG_SETTINGS } from "./constants.js"; import { ConfigSettings } from "./types.js"; - -const ajv = new Ajv({ - validateFormats: false, -}); +import { Repo } from "@sourcebot/db"; +import path from "path"; +import { env, isRemotePath, loadConfig } from "./env.server.js"; // From https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem export const base64Decode = (base64: string): string => { @@ -17,10 +13,6 @@ export const base64Decode = (base64: string): string => { return Buffer.from(Uint8Array.from(binString, (m) => m.codePointAt(0)!).buffer).toString(); } -export const isRemotePath = (path: string) => { - return path.startsWith('https://') || path.startsWith('http://'); -} - // TODO: Merge this with config loading logic which uses AJV export const loadJsonFile = async ( filePath: string, @@ -81,61 +73,6 @@ export const loadJsonFile = async ( } } -export const loadConfig = async (configPath?: string): Promise => { - if (!configPath) { - throw new Error('CONFIG_PATH is required but not provided'); - } - - const configContent = await (async () => { - if (isRemotePath(configPath)) { - const response = await fetch(configPath); - if (!response.ok) { - throw new Error(`Failed to fetch config file ${configPath}: ${response.statusText}`); - } - return response.text(); - } else { - // Retry logic for handling race conditions with mounted volumes - const maxAttempts = 5; - const retryDelayMs = 2000; - let lastError: Error | null = null; - - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - try { - return await readFile(configPath, { - encoding: 'utf-8', - }); - } catch (error) { - lastError = error as Error; - - // Only retry on ENOENT errors (file not found) - if ((error as NodeJS.ErrnoException)?.code !== 'ENOENT') { - throw error; // Throw immediately for non-ENOENT errors - } - - // Log warning before retry (except on the last attempt) - if (attempt < maxAttempts) { - console.warn(`Config file not found, retrying in 2s... (Attempt ${attempt}/${maxAttempts})`); - await new Promise(resolve => setTimeout(resolve, retryDelayMs)); - } - } - } - - // If we've exhausted all retries, throw the last ENOENT error - if (lastError) { - throw lastError; - } - - throw new Error('Failed to load config after all retry attempts'); - } - })(); - - const config = JSON.parse(stripJsonComments(configContent)) as SourcebotConfig; - const isValidConfig = ajv.validate(indexSchema, config); - if (!isValidConfig) { - throw new Error(`Config file '${configPath}' is invalid: ${ajv.errorsText(ajv.errors)}`); - } - return config; -} export const getConfigSettings = async (configPath?: string): Promise => { if (!configPath) { @@ -148,4 +85,23 @@ export const getConfigSettings = async (configPath?: string): Promise { + // If we are dealing with a local repository, then use that as the path. + // Mark as read-only since we aren't guaranteed to have write access to the local filesystem. + const cloneUrl = new URL(repo.cloneUrl); + if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') { + return { + path: cloneUrl.pathname, + isReadOnly: true, + } + } + + const reposPath = path.join(env.DATA_CACHE_DIR, 'repos'); + + return { + path: path.join(reposPath, repo.id.toString()), + isReadOnly: false, + } } \ No newline at end of file diff --git a/packages/web/src/app/api/(server)/commits/route.ts b/packages/web/src/app/api/(server)/commits/route.ts new file mode 100644 index 000000000..941ca8605 --- /dev/null +++ b/packages/web/src/app/api/(server)/commits/route.ts @@ -0,0 +1,24 @@ +import { searchCommits } from "@/features/search/gitApi"; +import { serviceErrorResponse, schemaValidationError } from "@/lib/serviceError"; +import { isServiceError } from "@/lib/utils"; +import { NextRequest } from "next/server"; +import { searchCommitsRequestSchema } from "@/features/search/types"; + +export async function POST(request: NextRequest): Promise { + const body = await request.json(); + const parsed = await searchCommitsRequestSchema.safeParseAsync(body); + + if (!parsed.success) { + return serviceErrorResponse( + schemaValidationError(parsed.error) + ); + } + + const result = await searchCommits(parsed.data); + + if (isServiceError(result)) { + return serviceErrorResponse(result); + } + + return Response.json(result); +} diff --git a/packages/web/src/features/fileTree/api.ts b/packages/web/src/features/fileTree/api.ts index 0e9ea5111..6b5ab574f 100644 --- a/packages/web/src/features/fileTree/api.ts +++ b/packages/web/src/features/fileTree/api.ts @@ -1,12 +1,9 @@ import 'server-only'; import { sew } from '@/actions'; -import { env } from '@sourcebot/shared'; import { notFound, unexpectedError } from '@/lib/serviceError'; import { withOptionalAuthV2 } from '@/withAuthV2'; -import { Repo } from '@sourcebot/db'; -import { createLogger } from '@sourcebot/shared'; -import path from 'path'; +import { createLogger, getRepoPath } from '@sourcebot/shared'; import { simpleGit } from 'simple-git'; import { FileTreeItem } from './types'; import { buildFileTree, isPathValid, normalizePath } from './utils'; @@ -195,24 +192,3 @@ export const getFiles = async (params: { repoName: string, revisionName: string })); -// @todo: this is duplicated from the `getRepoPath` function in the -// backend's `utils.ts` file. Eventually we should move this to a shared -// package. -const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } => { - // If we are dealing with a local repository, then use that as the path. - // Mark as read-only since we aren't guaranteed to have write access to the local filesystem. - const cloneUrl = new URL(repo.cloneUrl); - if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') { - return { - path: cloneUrl.pathname, - isReadOnly: true, - } - } - - const reposPath = path.join(env.DATA_CACHE_DIR, 'repos'); - - return { - path: path.join(reposPath, repo.id.toString()), - isReadOnly: false, - } -} diff --git a/packages/web/src/features/search/dateUtils.test.ts b/packages/web/src/features/search/dateUtils.test.ts new file mode 100644 index 000000000..a64a83c14 --- /dev/null +++ b/packages/web/src/features/search/dateUtils.test.ts @@ -0,0 +1,379 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { + parseTemporalDate, + validateDateRange, + toDbDate, + toGitDate, +} from './dateUtils'; + +describe('dateUtils', () => { + // Mock the current time for consistent testing + const MOCK_NOW = new Date('2024-06-15T12:00:00.000Z'); + + beforeEach(() => { + vi.useFakeTimers(); + vi.setSystemTime(MOCK_NOW); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe('parseTemporalDate', () => { + describe('ISO 8601 dates', () => { + it('should parse ISO date (YYYY-MM-DD)', () => { + const result = parseTemporalDate('2024-01-01'); + expect(result).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should parse ISO datetime with timezone', () => { + const result = parseTemporalDate('2024-01-01T12:30:00Z'); + expect(result).toBe('2024-01-01T12:30:00.000Z'); + }); + + it('should parse ISO datetime without timezone', () => { + const result = parseTemporalDate('2024-01-01T12:30:00'); + expect(result).toBeDefined(); + expect(result).toContain('2024-01-01'); + }); + + it('should return undefined for undefined input', () => { + const result = parseTemporalDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should return undefined for empty string', () => { + const result = parseTemporalDate(''); + expect(result).toBeUndefined(); + }); + }); + + describe('relative dates - yesterday', () => { + it('should parse "yesterday"', () => { + const result = parseTemporalDate('yesterday'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should parse "YESTERDAY" (case insensitive)', () => { + const result = parseTemporalDate('YESTERDAY'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + }); + + describe('relative dates - N units ago', () => { + it('should parse "1 day ago"', () => { + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should parse "30 days ago"', () => { + const result = parseTemporalDate('30 days ago'); + expect(result).toBe('2024-05-16T12:00:00.000Z'); + }); + + it('should parse "1 week ago"', () => { + const result = parseTemporalDate('1 week ago'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + + it('should parse "2 weeks ago"', () => { + const result = parseTemporalDate('2 weeks ago'); + expect(result).toBe('2024-06-01T12:00:00.000Z'); + }); + + it('should parse "1 month ago"', () => { + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2024-05-15T12:00:00.000Z'); + }); + + it('should parse "3 months ago"', () => { + const result = parseTemporalDate('3 months ago'); + expect(result).toBe('2024-03-15T12:00:00.000Z'); + }); + + it('should parse "1 year ago"', () => { + const result = parseTemporalDate('1 year ago'); + expect(result).toBe('2023-06-15T12:00:00.000Z'); + }); + + it('should parse "2 hours ago"', () => { + const result = parseTemporalDate('2 hours ago'); + expect(result).toBe('2024-06-15T10:00:00.000Z'); + }); + + it('should parse "30 minutes ago"', () => { + const result = parseTemporalDate('30 minutes ago'); + expect(result).toBe('2024-06-15T11:30:00.000Z'); + }); + + it('should parse "45 seconds ago"', () => { + const result = parseTemporalDate('45 seconds ago'); + expect(result).toBe('2024-06-15T11:59:15.000Z'); + }); + + it('should handle singular "day" without "s"', () => { + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should be case insensitive', () => { + const result = parseTemporalDate('30 DAYS AGO'); + expect(result).toBe('2024-05-16T12:00:00.000Z'); + }); + }); + + describe('relative dates - last unit', () => { + it('should parse "last week"', () => { + const result = parseTemporalDate('last week'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + + it('should parse "last month"', () => { + const result = parseTemporalDate('last month'); + expect(result).toBe('2024-05-15T12:00:00.000Z'); + }); + + it('should parse "last year"', () => { + const result = parseTemporalDate('last year'); + expect(result).toBe('2023-06-15T12:00:00.000Z'); + }); + + it('should be case insensitive', () => { + const result = parseTemporalDate('LAST WEEK'); + expect(result).toBe('2024-06-08T12:00:00.000Z'); + }); + }); + + describe('invalid or unknown formats', () => { + it('should return original string for unrecognized format', () => { + const result = parseTemporalDate('some random string'); + expect(result).toBe('some random string'); + }); + + it('should return original string for git-specific formats', () => { + // Git understands these but our parser doesn't convert them + const result = parseTemporalDate('2 weeks 3 days ago'); + expect(result).toBe('2 weeks 3 days ago'); + }); + }); + }); + + describe('validateDateRange', () => { + it('should return null for valid date range', () => { + const error = validateDateRange('2024-01-01', '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should return null when only since is provided', () => { + const error = validateDateRange('2024-01-01', undefined); + expect(error).toBeNull(); + }); + + it('should return null when only until is provided', () => { + const error = validateDateRange(undefined, '2024-12-31'); + expect(error).toBeNull(); + }); + + it('should return null when both are undefined', () => { + const error = validateDateRange(undefined, undefined); + expect(error).toBeNull(); + }); + + it('should return error when since > until', () => { + const error = validateDateRange('2024-12-31', '2024-01-01'); + expect(error).toContain('since'); + expect(error).toContain('until'); + expect(error).toContain('before'); + }); + + it('should validate relative dates', () => { + const error = validateDateRange('30 days ago', '1 day ago'); + expect(error).toBeNull(); + }); + + it('should return error for invalid relative date range', () => { + const error = validateDateRange('1 day ago', '30 days ago'); + expect(error).toContain('since'); + expect(error).toContain('until'); + }); + + it('should handle mixed ISO and relative dates', () => { + const error = validateDateRange('2024-01-01', '30 days ago'); + expect(error).toBeNull(); // 2024-01-01 is before 30 days ago + }); + + it('should return null for same date', () => { + const error = validateDateRange('2024-06-15', '2024-06-15'); + expect(error).toBeNull(); + }); + }); + + describe('toDbDate', () => { + it('should convert ISO date to Date object', () => { + const result = toDbDate('2024-01-01'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-01-01T00:00:00.000Z'); + }); + + it('should convert relative date to Date object', () => { + const result = toDbDate('30 days ago'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-05-16T12:00:00.000Z'); + }); + + it('should return undefined for undefined input', () => { + const result = toDbDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should return undefined for empty string', () => { + const result = toDbDate(''); + expect(result).toBeUndefined(); + }); + + it('should handle "yesterday"', () => { + const result = toDbDate('yesterday'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-06-14T12:00:00.000Z'); + }); + + it('should handle "last week"', () => { + const result = toDbDate('last week'); + expect(result).toBeInstanceOf(Date); + expect(result?.toISOString()).toBe('2024-06-08T12:00:00.000Z'); + }); + }); + + describe('toGitDate', () => { + it('should preserve ISO date format', () => { + const result = toGitDate('2024-01-01'); + expect(result).toBe('2024-01-01'); + }); + + it('should preserve ISO datetime format', () => { + const result = toGitDate('2024-01-01T12:30:00Z'); + expect(result).toBe('2024-01-01T12:30:00Z'); + }); + + it('should preserve "N days ago" format', () => { + const result = toGitDate('30 days ago'); + expect(result).toBe('30 days ago'); + }); + + it('should preserve "yesterday" format', () => { + const result = toGitDate('yesterday'); + expect(result).toBe('yesterday'); + }); + + it('should preserve "last week" format', () => { + const result = toGitDate('last week'); + expect(result).toBe('last week'); + }); + + it('should preserve "last month" format', () => { + const result = toGitDate('last month'); + expect(result).toBe('last month'); + }); + + it('should preserve "last year" format', () => { + const result = toGitDate('last year'); + expect(result).toBe('last year'); + }); + + it('should return undefined for undefined input', () => { + const result = toGitDate(undefined); + expect(result).toBeUndefined(); + }); + + it('should pass through unrecognized format unchanged', () => { + // For formats git doesn't natively understand, pass through to git + const result = toGitDate('some random string'); + expect(result).toBe('some random string'); + }); + + it('should preserve relative time formats', () => { + const result = toGitDate('2 weeks ago'); + expect(result).toBe('2 weeks ago'); + }); + }); + + describe('edge cases', () => { + it('should handle dates at month boundaries', () => { + vi.setSystemTime(new Date('2024-03-31T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + // JavaScript Date handles month rollover + expect(result).toBeDefined(); + }); + + it('should handle dates at year boundaries', () => { + vi.setSystemTime(new Date('2024-01-15T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2023-12-15T12:00:00.000Z'); + }); + + it('should handle leap year February', () => { + vi.setSystemTime(new Date('2024-03-01T12:00:00.000Z')); + const result = parseTemporalDate('1 month ago'); + expect(result).toBe('2024-02-01T12:00:00.000Z'); + }); + + it('should handle midnight times', () => { + vi.setSystemTime(new Date('2024-06-15T00:00:00.000Z')); + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T00:00:00.000Z'); + }); + + it('should handle end of day times', () => { + vi.setSystemTime(new Date('2024-06-15T23:59:59.999Z')); + const result = parseTemporalDate('1 day ago'); + expect(result).toBe('2024-06-14T23:59:59.999Z'); + }); + }); + + describe('integration scenarios', () => { + it('should correctly validate a typical user query range', () => { + const since = '30 days ago'; + const until = 'yesterday'; + + const parsedSince = parseTemporalDate(since); + const parsedUntil = parseTemporalDate(until); + const validationError = validateDateRange(since, until); + + expect(parsedSince).toBe('2024-05-16T12:00:00.000Z'); + expect(parsedUntil).toBe('2024-06-14T12:00:00.000Z'); + expect(validationError).toBeNull(); + }); + + it('should correctly convert for database queries', () => { + const since = '7 days ago'; + const until = 'yesterday'; + + const dbSince = toDbDate(since); + const dbUntil = toDbDate(until); + + expect(dbSince).toBeInstanceOf(Date); + expect(dbUntil).toBeInstanceOf(Date); + expect(dbSince!.getTime()).toBeLessThan(dbUntil!.getTime()); + }); + + it('should correctly preserve for git commands', () => { + const since = '30 days ago'; + const until = 'yesterday'; + + const gitSince = toGitDate(since); + const gitUntil = toGitDate(until); + + // Git natively understands these, so they're preserved + expect(gitSince).toBe('30 days ago'); + expect(gitUntil).toBe('yesterday'); + }); + + it('should handle mixed ISO and relative dates in range validation', () => { + const since = '2024-01-01'; + const until = '7 days ago'; + + const validationError = validateDateRange(since, until); + expect(validationError).toBeNull(); + }); + }); +}); diff --git a/packages/web/src/features/search/dateUtils.ts b/packages/web/src/features/search/dateUtils.ts new file mode 100644 index 000000000..f28e9a5bd --- /dev/null +++ b/packages/web/src/features/search/dateUtils.ts @@ -0,0 +1,186 @@ +/** + * Utilities for parsing and validating date parameters for temporal queries. + * Supports both absolute (ISO 8601) and relative date formats. + */ + +/** + * Parse a date string that can be either: + * - ISO 8601 format (e.g., "2024-01-01", "2024-01-01T12:00:00Z") + * - Relative format (e.g., "30 days ago", "1 week ago", "yesterday", "last week") + * + * @param dateStr - The date string to parse + * @returns ISO 8601 string if successfully parsed, original string if not parseable (to allow git to try), or undefined if input is falsy + * + * @example + * parseTemporalDate('2024-01-01') // '2024-01-01T00:00:00.000Z' + * parseTemporalDate('30 days ago') // Calculates and returns ISO string + * parseTemporalDate('yesterday') // Yesterday's date as ISO string + * parseTemporalDate('some-git-format') // 'some-git-format' (passed through) + * parseTemporalDate(undefined) // undefined + */ +export function parseTemporalDate(dateStr: string | undefined): string | undefined { + if (!dateStr) { + return undefined; + } + + // Try parsing as ISO date first + const isoDate = new Date(dateStr); + if (!isNaN(isoDate.getTime())) { + return isoDate.toISOString(); + } + + // Parse relative dates (Git-compatible format) + // Git accepts these natively, but we normalize to ISO for consistency + const lowerStr = dateStr.toLowerCase().trim(); + + // Handle "yesterday" + if (lowerStr === 'yesterday') { + const date = new Date(); + date.setDate(date.getDate() - 1); + return date.toISOString(); + } + + // Handle "N s ago" format + const matchRelative = lowerStr.match(/^(\d+)\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i); + if (matchRelative) { + const amount = parseInt(matchRelative[1]); + const unit = matchRelative[2].toLowerCase(); + const date = new Date(); + + switch (unit) { + case 'second': + date.setSeconds(date.getSeconds() - amount); + break; + case 'minute': + date.setMinutes(date.getMinutes() - amount); + break; + case 'hour': + date.setHours(date.getHours() - amount); + break; + case 'day': + date.setDate(date.getDate() - amount); + break; + case 'week': + date.setDate(date.getDate() - (amount * 7)); + break; + case 'month': + date.setMonth(date.getMonth() - amount); + break; + case 'year': + date.setFullYear(date.getFullYear() - amount); + break; + } + + return date.toISOString(); + } + + // Handle "last " format + const matchLast = lowerStr.match(/^last\s+(week|month|year)$/i); + if (matchLast) { + const unit = matchLast[1].toLowerCase(); + const date = new Date(); + + switch (unit) { + case 'week': + date.setDate(date.getDate() - 7); + break; + case 'month': + date.setMonth(date.getMonth() - 1); + break; + case 'year': + date.setFullYear(date.getFullYear() - 1); + break; + } + + return date.toISOString(); + } + + // If we can't parse it, return the original string + // This allows git log to try parsing it with its own logic + return dateStr; +} + +/** + * Validate that a date range is consistent (since < until). + * + * @param since - Start date (inclusive) + * @param until - End date (inclusive) + * @returns Error message if invalid, null if valid + */ +export function validateDateRange(since: string | undefined, until: string | undefined): string | null { + if (!since || !until) { + return null; // No validation needed if either is missing + } + + const parsedSince = parseTemporalDate(since); + const parsedUntil = parseTemporalDate(until); + + if (!parsedSince || !parsedUntil) { + return null; // Let individual date parsing handle invalid formats + } + + const sinceDate = new Date(parsedSince); + const untilDate = new Date(parsedUntil); + + if (isNaN(sinceDate.getTime()) || isNaN(untilDate.getTime())) { + return null; + } + + if (sinceDate > untilDate) { + return `Invalid date range: 'since' (${since}) must be before 'until' (${until})`; + } + + return null; +} + +/** + * Convert a date to a format suitable for Prisma database queries. + * Returns a Date object or undefined. + * + * @param dateStr - The date string to convert + * @returns Date object or undefined + */ +export function toDbDate(dateStr: string | undefined): Date | undefined { + if (!dateStr) { + return undefined; + } + + const parsed = parseTemporalDate(dateStr); + if (!parsed) { + return undefined; + } + + const date = new Date(parsed); + return isNaN(date.getTime()) ? undefined : date; +} + +/** + * Convert a date to a format suitable for git log commands. + * Git accepts relative formats directly, so we preserve them when possible. + * + * @param dateStr - The date string to convert + * @returns Git-compatible date string or undefined + */ +export function toGitDate(dateStr: string | undefined): string | undefined { + if (!dateStr) { + return undefined; + } + + // Git natively understands these formats, so preserve them + const gitNativeFormats = [ + /^\d+\s+(second|minute|hour|day|week|month|year)s?\s+ago$/i, + /^yesterday$/i, + /^last\s+(week|month|year)$/i, + /^\d{4}-\d{2}-\d{2}$/, // ISO date + /^\d{4}-\d{2}-\d{2}T/, // ISO datetime + ]; + + for (const pattern of gitNativeFormats) { + if (pattern.test(dateStr)) { + return dateStr; // Git can handle this directly + } + } + + // Otherwise, parse and convert to ISO + return parseTemporalDate(dateStr); +} diff --git a/packages/web/src/features/search/gitApi.test.ts b/packages/web/src/features/search/gitApi.test.ts new file mode 100644 index 000000000..ca5ac403e --- /dev/null +++ b/packages/web/src/features/search/gitApi.test.ts @@ -0,0 +1,560 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { searchCommits } from './gitApi'; +import * as dateUtils from './dateUtils'; + +// Mock dependencies +vi.mock('simple-git'); +vi.mock('fs'); +vi.mock('@sourcebot/shared', () => ({ + REPOS_CACHE_DIR: '/mock/cache/dir', + getRepoPath: (repo: { id: number }) => ({ + path: `/mock/cache/dir/${repo.id}`, + }), +})); +vi.mock('@/lib/serviceError', () => ({ + unexpectedError: (message: string) => ({ + errorCode: 'UNEXPECTED_ERROR', + message, + }), + notFound: (message: string) => ({ + errorCode: 'NOT_FOUND', + message, + }), +})); +vi.mock('@/actions', () => ({ + sew: async (fn: () => Promise | T): Promise => { + try { + return await fn(); + } catch (error) { + // Mock sew to convert thrown errors to ServiceError + return { + errorCode: 'UNEXPECTED_ERROR', + message: error instanceof Error ? error.message : String(error), + } as T; + } + }, +})); +// Create a mock findFirst function that we can configure per-test +const mockFindFirst = vi.fn(); + +vi.mock('@/withAuthV2', () => ({ + withOptionalAuthV2: async (fn: (args: { org: { id: number; name: string }; prisma: unknown }) => Promise): Promise => { + // Mock withOptionalAuthV2 to provide org and prisma context + const mockOrg = { id: 1, name: 'test-org' }; + const mockPrisma = { + repo: { + findFirst: mockFindFirst, + }, + }; + return await fn({ org: mockOrg, prisma: mockPrisma }); + }, +})); +vi.mock('@/lib/utils', () => ({ + isServiceError: (obj: unknown): obj is { errorCode: string } => { + return obj !== null && typeof obj === 'object' && 'errorCode' in obj; + }, +})); + +// Import mocked modules +import { simpleGit } from 'simple-git'; +import { existsSync } from 'fs'; + +describe('searchCommits', () => { + const mockGitLog = vi.fn(); + const mockCwd = vi.fn(); + const mockSimpleGit = simpleGit as unknown as vi.Mock; + const mockExistsSync = existsSync as unknown as vi.Mock; + + beforeEach(() => { + vi.clearAllMocks(); + + // Reset mockFindFirst before each test + mockFindFirst.mockReset(); + + // Setup default mocks + mockExistsSync.mockReturnValue(true); + mockCwd.mockReturnValue({ + log: mockGitLog, + }); + mockSimpleGit.mockReturnValue({ + cwd: mockCwd, + }); + + // Setup default repo mock + mockFindFirst.mockResolvedValue({ id: 123, name: 'github.com/test/repo' }); + }); + + describe('repository validation', () => { + it('should return error when repository is not found in database', async () => { + mockFindFirst.mockResolvedValue(null); + + const result = await searchCommits({ + repository: 'github.com/nonexistent/repo', + }); + + expect(result).toMatchObject({ + errorCode: 'NOT_FOUND', + message: expect.stringContaining('Repository "github.com/nonexistent/repo" not found'), + }); + }); + + it('should query database with correct repository name', async () => { + mockFindFirst.mockResolvedValue({ id: 456, name: 'github.com/test/repo' }); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repository: 'github.com/test/repo', + }); + + expect(mockFindFirst).toHaveBeenCalledWith({ + where: { + name: 'github.com/test/repo', + orgId: 1, + }, + }); + }); + }); + + describe('date range validation', () => { + it('should validate date range and return error for invalid range', async () => { + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue( + 'Invalid date range: since must be before until' + ); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + since: '2024-12-31', + until: '2024-01-01', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: 'Invalid date range: since must be before until', + }); + }); + + it('should proceed when date range is valid', async () => { + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + since: '2024-01-01', + until: '2024-12-31', + }); + + expect(Array.isArray(result)).toBe(true); + }); + }); + + describe('date parsing', () => { + it('should parse dates using toGitDate', async () => { + const toGitDateSpy = vi.spyOn(dateUtils, 'toGitDate'); + toGitDateSpy.mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repository: 'github.com/test/repo', + since: '30 days ago', + until: 'yesterday', + }); + + expect(toGitDateSpy).toHaveBeenCalledWith('30 days ago'); + expect(toGitDateSpy).toHaveBeenCalledWith('yesterday'); + }); + + it('should pass parsed dates to git log', async () => { + vi.spyOn(dateUtils, 'toGitDate') + .mockReturnValueOnce('2024-01-01') + .mockReturnValueOnce('2024-12-31'); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repository: 'github.com/test/repo', + since: '30 days ago', + until: 'yesterday', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--since': '2024-01-01', + '--until': '2024-12-31', + }) + ); + }); + }); + + describe('git log options', () => { + beforeEach(() => { + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: [] }); + }); + + it('should set default maxCount', async () => { + await searchCommits({ + repository: 'github.com/test/repo', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + maxCount: 50, + }) + ); + }); + + it('should use custom maxCount', async () => { + await searchCommits({ + repository: 'github.com/test/repo', + maxCount: 100, + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + maxCount: 100, + }) + ); + }); + + it('should add --since when since is provided', async () => { + await searchCommits({ + repository: 'github.com/test/repo', + since: '30 days ago', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--since': '30 days ago', + }) + ); + }); + + it('should add --until when until is provided', async () => { + await searchCommits({ + repository: 'github.com/test/repo', + until: 'yesterday', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--until': 'yesterday', + }) + ); + }); + + it('should add --author when author is provided', async () => { + await searchCommits({ + repository: 'github.com/test/repo', + author: 'john@example.com', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--author': 'john@example.com', + }) + ); + }); + + it('should add --grep and --regexp-ignore-case when query is provided', async () => { + await searchCommits({ + repository: 'github.com/test/repo', + query: 'fix bug', + }); + + expect(mockGitLog).toHaveBeenCalledWith( + expect.objectContaining({ + '--grep': 'fix bug', + '--regexp-ignore-case': null, + }) + ); + }); + + it('should combine all options', async () => { + await searchCommits({ + repository: 'github.com/test/repo', + query: 'feature', + since: '2024-01-01', + until: '2024-12-31', + author: 'jane@example.com', + maxCount: 25, + }); + + expect(mockGitLog).toHaveBeenCalledWith({ + maxCount: 25, + '--since': '2024-01-01', + '--until': '2024-12-31', + '--author': 'jane@example.com', + '--grep': 'feature', + '--regexp-ignore-case': null, + }); + }); + }); + + describe('successful responses', () => { + it('should return commit array from git log', async () => { + const mockCommits = [ + { + hash: 'abc123', + date: '2024-06-15', + message: 'feat: add feature', + refs: 'HEAD -> main', + body: '', + author_name: 'John Doe', + author_email: 'john@example.com', + }, + { + hash: 'def456', + date: '2024-06-14', + message: 'fix: bug fix', + refs: '', + body: '', + author_name: 'Jane Smith', + author_email: 'jane@example.com', + }, + ]; + + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + }); + + expect(result).toEqual(mockCommits); + }); + + it('should return empty array when no commits match', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + query: 'nonexistent', + }); + + expect(result).toEqual([]); + }); + }); + + describe('error handling', () => { + it('should return error for "not a git repository"', async () => { + mockGitLog.mockRejectedValue(new Error('not a git repository')); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('not a valid git repository'), + }); + }); + + it('should return error for "ambiguous argument"', async () => { + mockGitLog.mockRejectedValue(new Error('ambiguous argument')); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + since: 'invalid-date', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Invalid git reference or date format'), + }); + }); + + it('should return error for timeout', async () => { + mockGitLog.mockRejectedValue(new Error('timeout exceeded')); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('timed out'), + }); + }); + + it('should return ServiceError for other Error instances', async () => { + mockGitLog.mockRejectedValue(new Error('some other error')); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Failed to search commits in repository github.com/test/repo'), + }); + }); + + it('should return ServiceError for non-Error exceptions', async () => { + mockGitLog.mockRejectedValue('string error'); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + }); + + expect(result).toMatchObject({ + errorCode: 'UNEXPECTED_ERROR', + message: expect.stringContaining('Failed to search commits in repository github.com/test/repo'), + }); + }); + }); + + describe('git client configuration', () => { + it('should set working directory using cwd', async () => { + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repository: 'github.com/test/repo', + }); + + expect(mockCwd).toHaveBeenCalledWith('/mock/cache/dir/123'); + }); + + it('should use correct repository path from database', async () => { + mockFindFirst.mockResolvedValue({ id: 456, name: 'github.com/other/repo' }); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repository: 'github.com/other/repo', + }); + + expect(mockCwd).toHaveBeenCalledWith('/mock/cache/dir/456'); + }); + }); + + describe('integration scenarios', () => { + it('should handle a typical commit search with filters', async () => { + const mockCommits = [ + { + hash: 'abc123', + date: '2024-06-10T14:30:00Z', + message: 'fix: resolve authentication bug', + refs: 'HEAD -> main', + body: 'Fixed issue with JWT token validation', + author_name: 'Security Team', + author_email: 'security@example.com', + }, + ]; + + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repository: 'github.com/test/repo', + query: 'authentication', + since: '30 days ago', + until: 'yesterday', + author: 'security', + maxCount: 20, + }); + + expect(result).toEqual(mockCommits); + expect(mockGitLog).toHaveBeenCalledWith({ + maxCount: 20, + '--since': '30 days ago', + '--until': 'yesterday', + '--author': 'security', + '--grep': 'authentication', + '--regexp-ignore-case': null, + }); + }); + + it('should handle repository not found in database', async () => { + mockFindFirst.mockResolvedValue(null); + + const result = await searchCommits({ + repository: 'github.com/nonexistent/repo', + query: 'feature', + }); + + expect(result).toMatchObject({ + errorCode: 'NOT_FOUND', + }); + expect(result).toHaveProperty('message'); + const message = (result as { message: string }).message; + expect(message).toContain('github.com/nonexistent/repo'); + expect(message).toContain('not found'); + }); + }); + + describe('repository lookup', () => { + beforeEach(() => { + // Reset mockFindFirst before each test in this suite + mockFindFirst.mockReset(); + }); + + it('should query database for repository by name', async () => { + mockFindFirst.mockResolvedValue({ id: 456, name: 'github.com/owner/repo' }); + mockGitLog.mockResolvedValue({ all: [] }); + + const result = await searchCommits({ + repository: 'github.com/owner/repo', + }); + + expect(Array.isArray(result)).toBe(true); + expect(mockFindFirst).toHaveBeenCalledWith({ + where: { + name: 'github.com/owner/repo', + orgId: 1, + }, + }); + }); + + it('should return NOT_FOUND error when repository is not found', async () => { + mockFindFirst.mockResolvedValue(null); + + const result = await searchCommits({ + repository: 'github.com/nonexistent/repo', + }); + + expect(result).toMatchObject({ + errorCode: 'NOT_FOUND', + message: expect.stringContaining('Repository "github.com/nonexistent/repo" not found'), + }); + }); + + it('should use repository ID from database to determine path', async () => { + mockFindFirst.mockResolvedValue({ id: 789, name: 'github.com/example/project' }); + mockGitLog.mockResolvedValue({ all: [] }); + + await searchCommits({ + repository: 'github.com/example/project', + }); + + expect(mockCwd).toHaveBeenCalledWith('/mock/cache/dir/789'); + }); + + it('should work end-to-end with repository lookup', async () => { + const mockCommits = [ + { + hash: 'xyz789', + date: '2024-06-20T10:00:00Z', + message: 'feat: new feature', + refs: 'main', + body: 'Added new functionality', + author_name: 'Developer', + author_email: 'dev@example.com', + }, + ]; + + mockFindFirst.mockResolvedValue({ id: 555, name: 'github.com/test/repository' }); + vi.spyOn(dateUtils, 'validateDateRange').mockReturnValue(null); + vi.spyOn(dateUtils, 'toGitDate').mockImplementation((date) => date); + mockGitLog.mockResolvedValue({ all: mockCommits }); + + const result = await searchCommits({ + repository: 'github.com/test/repository', + query: 'feature', + since: '7 days ago', + author: 'Developer', + }); + + expect(result).toEqual(mockCommits); + expect(mockCwd).toHaveBeenCalledWith('/mock/cache/dir/555'); + }); + }); +}); diff --git a/packages/web/src/features/search/gitApi.ts b/packages/web/src/features/search/gitApi.ts new file mode 100644 index 000000000..d1274ba6a --- /dev/null +++ b/packages/web/src/features/search/gitApi.ts @@ -0,0 +1,120 @@ +import { sew } from '@/actions'; +import { notFound, ServiceError, unexpectedError } from '@/lib/serviceError'; +import { withOptionalAuthV2 } from '@/withAuthV2'; +import { getRepoPath } from '@sourcebot/shared'; +import { simpleGit } from 'simple-git'; +import { toGitDate, validateDateRange } from './dateUtils'; +import { SearchCommitsRequest } from './types'; + +export interface Commit { + hash: string; + date: string; + message: string; + refs: string; + body: string; + author_name: string; + author_email: string; +} + +/** + * Search commits in a repository using git log. + * + * **Date Formats**: Supports both ISO 8601 dates and relative formats + * (e.g., "30 days ago", "last week", "yesterday"). Git natively handles + * these formats in the --since and --until flags. + */ +export const searchCommits = async ({ + repository, + query, + since, + until, + author, + maxCount = 50, +}: SearchCommitsRequest): Promise => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + const repo = await prisma.repo.findFirst({ + where: { + name: repository, + orgId: org.id, + }, + }); + + if (!repo) { + return notFound(`Repository "${repository}" not found.`); + } + + const { path: repoPath } = getRepoPath(repo); + + // Validate date range if both since and until are provided + const dateRangeError = validateDateRange(since, until); + if (dateRangeError) { + return unexpectedError(dateRangeError); + } + + // Parse dates to git-compatible format + const gitSince = toGitDate(since); + const gitUntil = toGitDate(until); + + const git = simpleGit().cwd(repoPath); + + try { + const logOptions: Record = { + maxCount, + }; + + if (gitSince) { + logOptions['--since'] = gitSince; + } + + if (gitUntil) { + logOptions['--until'] = gitUntil; + } + + if (author) { + logOptions['--author'] = author; + } + + if (query) { + logOptions['--grep'] = query; + logOptions['--regexp-ignore-case'] = null; // Case insensitive + } + + const log = await git.log(logOptions); + return log.all as unknown as Commit[]; + } catch (error: unknown) { + // Provide detailed error messages for common git errors + const errorMessage = error instanceof Error ? error.message : String(error); + + if (errorMessage.includes('not a git repository')) { + return unexpectedError( + `Invalid git repository at ${repoPath}. ` + + `The directory exists but is not a valid git repository.` + ); + } + + if (errorMessage.includes('ambiguous argument')) { + return unexpectedError( + `Invalid git reference or date format. ` + + `Please check your date parameters: since="${since}", until="${until}"` + ); + } + + if (errorMessage.includes('timeout')) { + return unexpectedError( + `Git operation timed out after 30 seconds for repository ${repository}. ` + + `The repository may be too large or the git operation is taking too long.` + ); + } + + // Generic error fallback + if (error instanceof Error) { + throw new Error( + `Failed to search commits in repository ${repository}: ${error.message}` + ); + } else { + throw new Error( + `Failed to search commits in repository ${repository}: ${errorMessage}` + ); + } + } + })); diff --git a/packages/web/src/features/search/types.ts b/packages/web/src/features/search/types.ts index c90cfdd14..65a32a7a3 100644 --- a/packages/web/src/features/search/types.ts +++ b/packages/web/src/features/search/types.ts @@ -163,3 +163,13 @@ export const fileSourceResponseSchema = z.object({ webUrl: z.string().optional(), }); export type FileSourceResponse = z.infer; + +export const searchCommitsRequestSchema = z.object({ + repository: z.string(), + query: z.string().optional(), + since: z.string().optional(), + until: z.string().optional(), + author: z.string().optional(), + maxCount: z.number().int().positive().max(500).optional(), +}); +export type SearchCommitsRequest = z.infer;