POST /v1/findsimilar
Discover content semantically related to a specific document. Find related discussions, similar implementations, or connected topics.
Endpoint
POST https://lightfast.ai/api/v1/findsimilar
Authentication
See Authentication for details.
bashAuthorization: Bearer YOUR_API_KEY X-Workspace-ID: ws_abc123Authorization: Bearer YOUR_API_KEY X-Workspace-ID: ws_abc123
Request Body
typescript{ id?: string // Content ID to find similar items for url?: string // URL alternative to ID limit?: number // Results to return (1-50, default: 10) threshold?: number // Minimum similarity (0-1, default: 0.5) sameSourceOnly?: boolean // Only same source type (default: false) excludeIds?: string[] // IDs to exclude from results filters?: { // Same filters as search sourceTypes?: string[] observationTypes?: string[] actorNames?: string[] dateRange?: { start?: string; end?: string } } }{ id?: string // Content ID to find similar items for url?: string // URL alternative to ID limit?: number // Results to return (1-50, default: 10) threshold?: number // Minimum similarity (0-1, default: 0.5) sameSourceOnly?: boolean // Only same source type (default: false) excludeIds?: string[] // IDs to exclude from results filters?: { // Same filters as search sourceTypes?: string[] observationTypes?: string[] actorNames?: string[] dateRange?: { start?: string; end?: string } } }
Note: Either id or url must be provided, but not both.
Response
typescript{ source: { id: string // Source content ID title: string // Source title type: string // Source content type cluster?: { // Cluster info if available topic: string | null memberCount: number } } similar: Array<{ id: string // Similar content ID title: string // Content title url: string // Link to source snippet?: string // Content preview score: number // Combined similarity score (0-1) vectorSimilarity: number // Raw vector similarity entityOverlap?: number // Entity overlap ratio (0-1) sameCluster: boolean // In same cluster as source source: string // Source system type: string // Content type occurredAt?: string // When content occurred (ISO 8601) }> meta: { total: number // Total similar items found took: number // Processing time in ms inputEmbedding: { found: boolean // Embedding found in storage generated: boolean // Embedding generated on-the-fly } } requestId: string // Request ID for debugging }{ source: { id: string // Source content ID title: string // Source title type: string // Source content type cluster?: { // Cluster info if available topic: string | null memberCount: number } } similar: Array<{ id: string // Similar content ID title: string // Content title url: string // Link to source snippet?: string // Content preview score: number // Combined similarity score (0-1) vectorSimilarity: number // Raw vector similarity entityOverlap?: number // Entity overlap ratio (0-1) sameCluster: boolean // In same cluster as source source: string // Source system type: string // Content type occurredAt?: string // When content occurred (ISO 8601) }> meta: { total: number // Total similar items found took: number // Processing time in ms inputEmbedding: { found: boolean // Embedding found in storage generated: boolean // Embedding generated on-the-fly } } requestId: string // Request ID for debugging }
Example Request
bashcurl -X POST https://lightfast.ai/api/v1/findsimilar \ -H "Authorization: Bearer $LIGHTFAST_API_KEY" \ -H "X-Workspace-ID: $LIGHTFAST_WORKSPACE_ID" \ -H "Content-Type: application/json" \ -d '{ "id": "obs_abc123", "limit": 10, "threshold": 0.6, "filters": { "observationTypes": ["pull_request", "issue"] } }'curl -X POST https://lightfast.ai/api/v1/findsimilar \ -H "Authorization: Bearer $LIGHTFAST_API_KEY" \ -H "X-Workspace-ID: $LIGHTFAST_WORKSPACE_ID" \ -H "Content-Type: application/json" \ -d '{ "id": "obs_abc123", "limit": 10, "threshold": 0.6, "filters": { "observationTypes": ["pull_request", "issue"] } }'
Example Response
json{ "source": { "id": "obs_abc123", "title": "Implement OAuth 2.0 authentication", "type": "pull_request", "cluster": { "topic": "Authentication & Security", "memberCount": 15 } }, "similar": [ { "id": "obs_def456", "title": "Add JWT refresh token rotation", "url": "https://github.com/org/repo/pull/456", "snippet": "Implements automatic refresh token rotation for improved security...", "score": 0.89, "vectorSimilarity": 0.92, "entityOverlap": 0.75, "sameCluster": true, "source": "github", "type": "pull_request", "occurredAt": "2024-04-10T09:20:00Z" }, { "id": "obs_ghi789", "title": "Security review: Authentication tokens", "url": "https://github.com/org/repo/issues/789", "snippet": "We need to review token expiration and refresh mechanisms...", "score": 0.84, "vectorSimilarity": 0.86, "entityOverlap": 0.60, "sameCluster": true, "source": "github", "type": "issue", "occurredAt": "2024-03-01T14:15:00Z" } ], "meta": { "total": 27, "took": 89, "inputEmbedding": { "found": true, "generated": false } }, "requestId": "req_abc123def456" }{ "source": { "id": "obs_abc123", "title": "Implement OAuth 2.0 authentication", "type": "pull_request", "cluster": { "topic": "Authentication & Security", "memberCount": 15 } }, "similar": [ { "id": "obs_def456", "title": "Add JWT refresh token rotation", "url": "https://github.com/org/repo/pull/456", "snippet": "Implements automatic refresh token rotation for improved security...", "score": 0.89, "vectorSimilarity": 0.92, "entityOverlap": 0.75, "sameCluster": true, "source": "github", "type": "pull_request", "occurredAt": "2024-04-10T09:20:00Z" }, { "id": "obs_ghi789", "title": "Security review: Authentication tokens", "url": "https://github.com/org/repo/issues/789", "snippet": "We need to review token expiration and refresh mechanisms...", "score": 0.84, "vectorSimilarity": 0.86, "entityOverlap": 0.60, "sameCluster": true, "source": "github", "type": "issue", "occurredAt": "2024-03-01T14:15:00Z" } ], "meta": { "total": 27, "took": 89, "inputEmbedding": { "found": true, "generated": false } }, "requestId": "req_abc123def456" }
Understanding Similarity Scores
Score Fields
| Field | Description |
|---|---|
score | Combined similarity score (0-1), weighted blend of all factors |
vectorSimilarity | Raw semantic similarity from embeddings |
entityOverlap | Ratio of shared entities (people, repos, topics) |
sameCluster | Whether result belongs to same topic cluster |
Score Interpretation
- 0.90-1.00: Nearly identical or duplicate content
- 0.75-0.89: Highly related, same topic/concept
- 0.60-0.74: Moderately related, overlapping themes
- 0.50-0.59: Loosely related, some common elements
Use threshold to filter out results below a minimum similarity.
Filtering Results
By Similarity Threshold
Only return results above a minimum score:
typescript{ "id": "obs_abc123", "threshold": 0.7 // Only results with score >= 0.7 }{ "id": "obs_abc123", "threshold": 0.7 // Only results with score >= 0.7 }
Same Source Only
Restrict to same source type:
typescript{ "id": "obs_abc123", "sameSourceOnly": true // Only GitHub if source is GitHub }{ "id": "obs_abc123", "sameSourceOnly": true // Only GitHub if source is GitHub }
Exclude Specific IDs
Exclude certain results:
typescript{ "id": "obs_abc123", "excludeIds": ["obs_def456", "obs_ghi789"] }{ "id": "obs_abc123", "excludeIds": ["obs_def456", "obs_ghi789"] }
By Content Type
Use the same filters as search:
typescript{ "id": "obs_abc123", "filters": { "observationTypes": ["pull_request"], "sourceTypes": ["github"], "dateRange": { "start": "2024-01-01T00:00:00Z" } } }{ "id": "obs_abc123", "filters": { "observationTypes": ["pull_request"], "sourceTypes": ["github"], "dateRange": { "start": "2024-01-01T00:00:00Z" } } }
Use Cases
"More Like This" Feature
typescriptasync function findRelated(documentId: string) { const response = await fetch('https://lightfast.ai/api/v1/findsimilar', { method: 'POST', headers: { 'Authorization': `Bearer ${process.env.LIGHTFAST_API_KEY}`, 'X-Workspace-ID': process.env.LIGHTFAST_WORKSPACE_ID!, 'Content-Type': 'application/json' }, body: JSON.stringify({ id: documentId, limit: 5, threshold: 0.6 }) }) const data = await response.json() console.log('Similar to:', data.source.title) data.similar.forEach(item => { console.log(`- ${item.title} (score: ${item.score})`) }) }async function findRelated(documentId: string) { const response = await fetch('https://lightfast.ai/api/v1/findsimilar', { method: 'POST', headers: { 'Authorization': `Bearer ${process.env.LIGHTFAST_API_KEY}`, 'X-Workspace-ID': process.env.LIGHTFAST_WORKSPACE_ID!, 'Content-Type': 'application/json' }, body: JSON.stringify({ id: documentId, limit: 5, threshold: 0.6 }) }) const data = await response.json() console.log('Similar to:', data.source.title) data.similar.forEach(item => { console.log(`- ${item.title} (score: ${item.score})`) }) }
Find Duplicate Issues
typescriptasync function checkForDuplicates(issueId: string) { const response = await fetch('https://lightfast.ai/api/v1/findsimilar', { method: 'POST', headers: { /* ... */ }, body: JSON.stringify({ id: issueId, threshold: 0.9, // High threshold for duplicates filters: { observationTypes: ['issue'] } }) }) const data = await response.json() if (data.similar.length > 0) { console.warn('Potential duplicates found:') data.similar.forEach(dup => { console.log(`- ${dup.title} (score: ${dup.score})`) }) } }async function checkForDuplicates(issueId: string) { const response = await fetch('https://lightfast.ai/api/v1/findsimilar', { method: 'POST', headers: { /* ... */ }, body: JSON.stringify({ id: issueId, threshold: 0.9, // High threshold for duplicates filters: { observationTypes: ['issue'] } }) }) const data = await response.json() if (data.similar.length > 0) { console.warn('Potential duplicates found:') data.similar.forEach(dup => { console.log(`- ${dup.title} (score: ${dup.score})`) }) } }
Explore Topic Clusters
typescriptasync function exploreCluster(documentId: string) { const response = await fetch('https://lightfast.ai/api/v1/findsimilar', { method: 'POST', headers: { /* ... */ }, body: JSON.stringify({ id: documentId, limit: 20 }) }) const data = await response.json() // Show cluster info if (data.source.cluster) { console.log('Cluster:', data.source.cluster.topic) console.log('Members:', data.source.cluster.memberCount) } // Group by whether they're in same cluster const inCluster = data.similar.filter(s => s.sameCluster) const outside = data.similar.filter(s => !s.sameCluster) console.log('In same cluster:', inCluster.length) console.log('Related but outside cluster:', outside.length) }async function exploreCluster(documentId: string) { const response = await fetch('https://lightfast.ai/api/v1/findsimilar', { method: 'POST', headers: { /* ... */ }, body: JSON.stringify({ id: documentId, limit: 20 }) }) const data = await response.json() // Show cluster info if (data.source.cluster) { console.log('Cluster:', data.source.cluster.topic) console.log('Members:', data.source.cluster.memberCount) } // Group by whether they're in same cluster const inCluster = data.similar.filter(s => s.sameCluster) const outside = data.similar.filter(s => !s.sameCluster) console.log('In same cluster:', inCluster.length) console.log('Related but outside cluster:', outside.length) }
Error Handling
typescriptconst response = await fetch('https://lightfast.ai/api/v1/findsimilar', { method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, 'X-Workspace-ID': workspaceId, 'Content-Type': 'application/json' }, body: JSON.stringify({ id: 'obs_abc123' }) }) if (!response.ok) { const error = await response.json() console.error('Request failed:', error.error, error.message) }const response = await fetch('https://lightfast.ai/api/v1/findsimilar', { method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, 'X-Workspace-ID': workspaceId, 'Content-Type': 'application/json' }, body: JSON.stringify({ id: 'obs_abc123' }) }) if (!response.ok) { const error = await response.json() console.error('Request failed:', error.error, error.message) }
See Error Reference for all error codes.
Next Steps
- POST /v1/search - Search to find initial content
- POST /v1/contents - Get full content for similar items
- Authentication - API authentication details