From 701869f73b0358d0b715fef8336f020d922b905d Mon Sep 17 00:00:00 2001 From: Rudraksh Tank <24ucs052@lnmiit.ac.in> Date: Sat, 4 Jul 2026 01:16:02 +0530 Subject: [PATCH] Refactor analytics functions for improved clarity --- src/services/analytics.js | 446 +++++++++++++++++++++++++++++--------- 1 file changed, 338 insertions(+), 108 deletions(-) diff --git a/src/services/analytics.js b/src/services/analytics.js index 2ca5969..ce14364 100644 --- a/src/services/analytics.js +++ b/src/services/analytics.js @@ -1,147 +1,286 @@ // Repo Health Indicator // Activity (40%) + Issue Health (30%) + Diversity (30%) -export function computeHealthScore(repo, contributorCount = 0) { - const daysSince = (Date.now() - new Date(repo.pushed_at)) / 86_400_000 - const activity = Math.max(0, 100 - daysSince) - const total = (repo.open_issues_count || 0) + 10 - const issueHealth = Math.max(0, 100 - (repo.open_issues_count / total) * 100) - const diversity = Math.min(100, contributorCount * 10) - return Math.round(activity * 0.4 + issueHealth * 0.3 + diversity * 0.3) + +const MS_PER_DAY = 86_400_000 + +function daysSince(dateStr, now = Date.now()) { + const time = Date.parse(dateStr) + + if (!Number.isFinite(time)) return Infinity + + return Math.max(0, (now - time) / MS_PER_DAY) +} + +function safeNumber(value, fallback = 0) { + const number = Number(value) + + return Number.isFinite(number) ? number : fallback +} + +function enrichRepo(repo, orgLogin, contributors = [], now = Date.now()) { + const healthScore = computeHealthScore(repo, contributors.length, now) + const activityClassification = computeActivityClassification(repo, now) + const busFactor = computeBusFactor(contributors) + + return { + ...repo, + orgLogin, + contributors, + healthScore, + activityClassification, + busFactor, + } +} + +export function computeHealthScore(repo, contributorCount = 0, now = Date.now()) { + const days = daysSince(repo?.pushed_at, now) + + const activity = Math.max(0, 100 - days) + + const openIssues = Math.max(0, safeNumber(repo?.open_issues_count)) + const total = openIssues + 10 + const issueHealth = Math.max(0, 100 - (openIssues / total) * 100) + + const diversity = Math.min(100, Math.max(0, safeNumber(contributorCount)) * 10) + + return Math.round( + activity * 0.4 + + issueHealth * 0.3 + + diversity * 0.3 + ) } // Repo Lifecycle — Thriving, Active, Dormant, Hibernating based on recency of last push -export function computeActivityClassification(repo) { - const days = (Date.now() - new Date(repo.pushed_at)) / 86_400_000 - if (days <= 30) return 'Thriving' - if (days <= 90) return 'Active' +export function computeActivityClassification(repo, now = Date.now()) { + const days = daysSince(repo?.pushed_at, now) + + if (days <= 30) return 'Thriving' + if (days <= 90) return 'Active' if (days <= 180) return 'Dormant' + return 'Hibernating' } -// Bus Factor +// Bus Factor export function computeBusFactor(contributors = []) { - if (!contributors.length) return { factor: 0, risk: 'unknown' } - const total = contributors.reduce((s, c) => s + c.contributions, 0) - if (!total) return { factor: 0, risk: 'unknown' } - let cum = 0 - for (let i = 0; i < contributors.length; i++) { - cum += contributors[i].contributions - if (cum / total > 0.5) { - const f = i + 1 - return { factor: f, risk: f <= 1 ? 'critical' : f <= 2 ? 'high' : 'healthy' } + const sorted = [...contributors] + .map(c => ({ + ...c, + contributions: Math.max(0, safeNumber(c?.contributions)), + })) + .filter(c => c.contributions > 0) + .sort((a, b) => b.contributions - a.contributions) + + if (!sorted.length) { + return { factor: 0, risk: 'unknown' } + } + + const total = sorted.reduce((sum, c) => sum + c.contributions, 0) + + if (!total) { + return { factor: 0, risk: 'unknown' } + } + + let cumulative = 0 + + for (let i = 0; i < sorted.length; i++) { + cumulative += sorted[i].contributions + + if (cumulative / total >= 0.5) { + const factor = i + 1 + + return { + factor, + risk: + factor <= 1 ? 'critical' : + factor <= 2 ? 'high' : + 'healthy', + } } } - return { factor: contributors.length, risk: 'healthy' } + + return { + factor: sorted.length, + risk: 'healthy', + } } // Unified Analytical Data Model // Merges multiple orgs into one normalized graph: // Organization → Repositories → Contributors → Issues/PRs -export function buildAnalyticalModel(orgs, reposPerOrg, contribsPerRepo, totalReposPerOrg) { - const allRepos = [] +export function buildAnalyticalModel( + orgs = [], + reposPerOrg = {}, + contribsPerRepo = {}, + totalReposPerOrg = {} +) { + const now = Date.now() + + const allRepos = [] const contributorMap = {} - const totalRepos = []; + const totalRepos = [] orgs.forEach(org => { - const repos = reposPerOrg[org.login] || [] - const total = totalReposPerOrg[org.login] || []; + const orgLogin = org?.login + + if (!orgLogin) return + + const repos = reposPerOrg[orgLogin] || [] + const total = totalReposPerOrg[orgLogin] || [] total.forEach(repo => { - const key = `${org.login}/${repo.name}` - const contribs = contribsPerRepo[key] || [] - const health = computeHealthScore(repo, contribs.length) - const activityClassification = computeActivityClassification(repo) - const bf = computeBusFactor(contribs) - totalRepos.push({ ...repo, orgLogin: org.login, contributors: contribs, healthScore: health, activityClassification: activityClassification, busFactor: bf }) + const key = `${orgLogin}/${repo.name}` + const contributors = contribsPerRepo[key] || [] + + totalRepos.push(enrichRepo(repo, orgLogin, contributors, now)) }) repos.forEach(repo => { - const key = `${org.login}/${repo.name}` - const contribs = contribsPerRepo[key] || [] - allRepos.push({ ...repo, orgLogin: org.login }); + const key = `${orgLogin}/${repo.name}` + const contributors = contribsPerRepo[key] || [] + + allRepos.push(enrichRepo(repo, orgLogin, contributors, now)) // Build contributor map — deduplicated by login across orgs - contribs.forEach(c => { - if (!contributorMap[c.login]) { - contributorMap[c.login] = { - login: c.login, - avatar_url: c.avatar_url, + contributors.forEach(contributor => { + if (!contributor?.login) return + + const contributionCount = Math.max(0, safeNumber(contributor.contributions)) + + if (!contributorMap[contributor.login]) { + contributorMap[contributor.login] = { + login: contributor.login, + avatar_url: contributor.avatar_url, totalContribs: 0, repos: [], orgs: new Set(), lastActive: null, + lastActiveMs: null, } } - const entry = contributorMap[c.login] - entry.totalContribs += c.contributions - entry.repos.push({ name: repo.name, org: org.login, count: c.contributions }) - entry.orgs.add(org.login) - if (!entry.lastActive || repo.pushed_at > entry.lastActive) { - entry.lastActive = repo.pushed_at + + const entry = contributorMap[contributor.login] + + entry.totalContribs += contributionCount + entry.repos.push({ + name: repo.name, + org: orgLogin, + count: contributionCount, + }) + entry.orgs.add(orgLogin) + + const pushedAtMs = Date.parse(repo.pushed_at) + + if (Number.isFinite(pushedAtMs)) { + if (!entry.lastActiveMs || pushedAtMs > entry.lastActiveMs) { + entry.lastActiveMs = pushedAtMs + entry.lastActive = repo.pushed_at + } } }) }) }) // Finalize contributors: compute signals - const contributors = Object.values(contributorMap).map(c => ({ - ...c, - orgs: Array.from(c.orgs), - isConnector: c.repos.length >= 3, - isCrossOrg: c.orgs.size > 1, - freshness: c.lastActive - ? Math.max(0, 100 - (Date.now() - new Date(c.lastActive)) / 86_400_000) - : 0, - })).sort((a, b) => b.totalContribs - a.totalContribs) + const contributors = Object.values(contributorMap) + .map(({ orgs, lastActiveMs, ...contributor }) => ({ + ...contributor, + orgs: Array.from(orgs), + isConnector: contributor.repos.length >= 3, + isCrossOrg: orgs.size > 1, + freshness: contributor.lastActive + ? Math.round(Math.max(0, 100 - daysSince(contributor.lastActive, now))) + : 0, + })) + .sort((a, b) => b.totalContribs - a.totalContribs) // Graph is constructed here and persisted through cache layers - return { allRepos, contributors, totalRepos } + return { + allRepos, + contributors, + totalRepos, + } } // Time-Series Bucketing // Parses created_at, closed_at, merged_at into weekly/monthly bins +function getISOWeekKey(dateStr) { + const d = new Date(dateStr) + + if (Number.isNaN(d.getTime())) return null + + const date = new Date(Date.UTC( + d.getUTCFullYear(), + d.getUTCMonth(), + d.getUTCDate() + )) + + const day = date.getUTCDay() || 7 + + date.setUTCDate(date.getUTCDate() + 4 - day) + + const yearStart = new Date(Date.UTC(date.getUTCFullYear(), 0, 1)) + const week = Math.ceil((((date - yearStart) / MS_PER_DAY) + 1) / 7) + + return `${date.getUTCFullYear()}-W${String(week).padStart(2, '0')}` +} + export function buildTimeSeries(issues = [], granularity = 'monthly') { const buckets = {} const toKey = dateStr => { if (!dateStr) return null + const d = new Date(dateStr) + + if (Number.isNaN(d.getTime())) return null + if (granularity === 'weekly') { - const jan1 = new Date(d.getFullYear(), 0, 1) - const week = Math.ceil(((d - jan1) / 86_400_000 + jan1.getDay() + 1) / 7) - return `${d.getFullYear()}-W${String(week).padStart(2, '0')}` + return getISOWeekKey(dateStr) } - return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, '0')}` + + return `${d.getUTCFullYear()}-${String(d.getUTCMonth() + 1).padStart(2, '0')}` } const ensure = key => { if (!buckets[key]) { - buckets[key] = { date: key, prs_created: 0, prs_merged: 0, prs_closed: 0, issues_created: 0, issues_closed: 0 } + buckets[key] = { + date: key, + prs_created: 0, + prs_merged: 0, + prs_closed: 0, + issues_created: 0, + issues_closed: 0, + } } } issues.forEach(item => { - const isPR = Boolean(item.pull_request) + const isPR = Boolean(item?.pull_request) - const ck = toKey(item.created_at) - if (ck) { - ensure(ck) - if (isPR) buckets[ck].prs_created++ - else buckets[ck].issues_created++ + const createdKey = toKey(item?.created_at) + + if (createdKey) { + ensure(createdKey) + + if (isPR) buckets[createdKey].prs_created++ + else buckets[createdKey].issues_created++ } - if (item.closed_at) { - const xk = toKey(item.closed_at) - if (xk) { - ensure(xk) - if (isPR) buckets[xk].prs_closed++ - else buckets[xk].issues_closed++ - } + const closedKey = toKey(item?.closed_at) + + if (closedKey) { + ensure(closedKey) + + if (isPR) buckets[closedKey].prs_closed++ + else buckets[closedKey].issues_closed++ } - if (isPR && item.pull_request?.merged_at) { - const mk = toKey(item.pull_request.merged_at) - if (mk) { ensure(mk); buckets[mk].prs_merged++ } + const mergedKey = isPR ? toKey(item?.pull_request?.merged_at) : null + + if (mergedKey) { + ensure(mergedKey) + buckets[mergedKey].prs_merged++ } }) @@ -151,53 +290,144 @@ export function buildTimeSeries(issues = [], granularity = 'monthly') { } // CSV Export -function download(content, filename, type = 'text/csv') { +function csvCell(value) { + if (value === null || value === undefined) return '' + + let cell = String(value) + + // Prevent CSV formula injection in spreadsheet apps + if (/^[=+\-@]/.test(cell)) { + cell = `'${cell}` + } + + if (/[",\n\r]/.test(cell)) { + cell = `"${cell.replaceAll('"', '""')}"` + } + + return cell +} + +function toCSV(rows) { + return rows + .map(row => row.map(csvCell).join(',')) + .join('\n') +} + +function download(content, filename, type = 'text/csv;charset=utf-8') { const blob = new Blob([content], { type }) - const url = URL.createObjectURL(blob) - const a = Object.assign(document.createElement('a'), { href: url, download: filename }) + const url = URL.createObjectURL(blob) + + const a = Object.assign(document.createElement('a'), { + href: url, + download: filename, + }) + + document.body.appendChild(a) a.click() - URL.revokeObjectURL(url) -} + a.remove() -export function exportReposCSV(repos) { - const header = ['Repository','Org','Stars','Forks','Open Issues','Health Score','Activity Classification','Language','Last Active'] - const rows = repos.map(r => [r.name, r.orgLogin, r.stargazers_count, r.forks_count, r.open_issues_count, r.healthScore, r.activityClassification, r.language || 'N/A', r.pushed_at?.slice(0, 10)]) - download([header, ...rows].map(r => r.join(',')).join('\n'), 'orgexplorer-repos.csv') + setTimeout(() => URL.revokeObjectURL(url), 0) } -export function exportContributorsCSV(contributors) { - const header = ['Login','Total Contributions','Repos','Orgs','Last Active','Connector','Cross-Org'] - const rows = contributors.map(c => [c.login, c.totalContribs, c.repos.length, c.orgs.length, c.lastActive?.slice(0, 10) || '', c.isConnector, c.isCrossOrg]) - download([header, ...rows].map(r => r.join(',')).join('\n'), 'orgexplorer-contributors.csv') +export function exportReposCSV(repos = []) { + const header = [ + 'Repository', + 'Org', + 'Stars', + 'Forks', + 'Open Issues', + 'Health Score', + 'Activity Classification', + 'Language', + 'Last Active', + ] + + const rows = repos.map(repo => [ + repo.name, + repo.orgLogin, + repo.stargazers_count ?? 0, + repo.forks_count ?? 0, + repo.open_issues_count ?? 0, + repo.healthScore ?? '', + repo.activityClassification ?? '', + repo.language || 'N/A', + repo.pushed_at?.slice(0, 10) || '', + ]) + + download(toCSV([header, ...rows]), 'orgexplorer-repos.csv') } -export function exportTrendsCSV(series) { - const header = ['Date','PRs Created','PRs Merged','PRs Closed','Issues Created','Issues Closed'] - const rows = series.map(s => [s.date, s.prs_created, s.prs_merged, s.prs_closed, s.issues_created, s.issues_closed]) - download([header, ...rows].map(r => r.join(',')).join('\n'), 'orgexplorer-trends.csv') +export function exportContributorsCSV(contributors = []) { + const header = [ + 'Login', + 'Total Contributions', + 'Repos', + 'Orgs', + 'Last Active', + 'Connector', + 'Cross-Org', + ] + + const rows = contributors.map(contributor => [ + contributor.login, + contributor.totalContribs ?? 0, + contributor.repos?.length ?? 0, + contributor.orgs?.length ?? 0, + contributor.lastActive?.slice(0, 10) || '', + Boolean(contributor.isConnector), + Boolean(contributor.isCrossOrg), + ]) + + download(toCSV([header, ...rows]), 'orgexplorer-contributors.csv') } -export function getTopRepositories(repos, limit = 10) { - const MS_PER_DAY = 1000 * 60 * 60 * 24; +export function exportTrendsCSV(series = []) { + const header = [ + 'Date', + 'PRs Created', + 'PRs Merged', + 'PRs Closed', + 'Issues Created', + 'Issues Closed', + ] + const rows = series.map(item => [ + item.date, + item.prs_created ?? 0, + item.prs_merged ?? 0, + item.prs_closed ?? 0, + item.issues_created ?? 0, + item.issues_closed ?? 0, + ]) + + download(toCSV([header, ...rows]), 'orgexplorer-trends.csv') +} + +export function getTopRepositories(repos = [], limit = 10, now = Date.now()) { return [...repos] .map(repo => { - const pushedAtMs = Date.parse(repo.pushed_at); - const daysSinceLastPush = Number.isFinite(pushedAtMs) ? (Date.now() - pushedAtMs) / MS_PER_DAY : Infinity; + const daysSinceLastPush = daysSince(repo.pushed_at, now) - const activityBonus = 0.5 * Math.max(0, 365 - daysSinceLastPush); + const stars = Math.max(0, safeNumber(repo.stargazers_count)) + const forks = Math.max(0, safeNumber(repo.forks_count)) + const openIssues = Math.max(0, safeNumber(repo.open_issues_count)) + const healthScore = Math.max(0, safeNumber(repo.healthScore)) - const score = - (repo.stargazers_count ?? 0) + - (repo.forks_count ?? 0) * 2 + - (repo.watchers_count ?? 0) * 1.5 + - activityBonus; + const activityBonus = 0.5 * Math.max(0, 365 - daysSinceLastPush) + const issuePenalty = Math.min(openIssues, 100) * 0.2 + + const score = + stars + + forks * 2 + + healthScore * 0.5 + + activityBonus - + issuePenalty return { ...repo, - score, - }; + score: Math.round(score * 100) / 100, + } }) .sort((a, b) => b.score - a.score) - .slice(0, limit); + .slice(0, limit) }