Skip to content

Commit

Permalink
FAI-14913: GitHub source enable fetching public organizations (#1907)
Browse files Browse the repository at this point in the history
  • Loading branch information
chalenge authored Feb 4, 2025
1 parent 26c6f21 commit 457db00
Show file tree
Hide file tree
Showing 11 changed files with 291 additions and 49 deletions.
7 changes: 7 additions & 0 deletions sources/github-source/resources/spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,13 @@
"type": "integer",
"default": 0,
"order": 33
},
"fetch_public_organizations": {
"type": "boolean",
"title": "Fetch Public Organizations",
"description": "Fetch public organizations",
"default": false,
"order": 34
}
}
}
Expand Down
38 changes: 37 additions & 1 deletion sources/github-source/src/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ export const DEFAULT_CONCURRENCY = 4;
export const DEFAULT_BACKFILL = false;
export const DEFAULT_FETCH_PR_DIFF_COVERAGE = false;
export const DEFAULT_PR_CUTOFF_LAG_SECONDS = 0;
export const DEFAULT_FETCH_PUBLIC_ORGANIZATIONS = false;

type TeamMemberTimestamps = {
[user: string]: {
Expand Down Expand Up @@ -147,6 +148,7 @@ export abstract class GitHub {
protected readonly fetchPullRequestDiffCoverage: boolean;
protected readonly pullRequestCutoffLagSeconds: number;
protected readonly useEnterpriseAPIs: boolean;
protected readonly fetchPublicOrganizations: boolean;

constructor(
config: GitHubConfig,
Expand All @@ -171,6 +173,8 @@ export abstract class GitHub {
this.pullRequestCutoffLagSeconds =
config.pull_request_cutoff_lag_seconds ?? DEFAULT_PR_CUTOFF_LAG_SECONDS;
this.useEnterpriseAPIs = config.enterprises?.length > 0;
this.fetchPublicOrganizations =
config.fetch_public_organizations ?? DEFAULT_FETCH_PUBLIC_ORGANIZATIONS;
}

static async instance(
Expand Down Expand Up @@ -214,6 +218,7 @@ export abstract class GitHub {
return orgs;
}

@Memoize()
async getOrganization(orgLogin: string): Promise<Organization> {
const org = await this.octokit(orgLogin).orgs.get({org: orgLogin});
return pick(org.data, [
Expand Down Expand Up @@ -2055,13 +2060,44 @@ export class GitHubToken extends GitHub {
}
}

if (this.fetchPublicOrganizations) {
for await (const org of this.getPublicOrganizations()) {
empty = false;
yield org;
}
}

if (!empty) {
return;
}

// Fine-grained tokens return an empty list for visible orgs,
// so if we get to this point, we're possibly using a fine-grained token.
// In order to determine which orgs are visible, check visible repos and track their orgs
yield* this.getOrganizationsByRepositories();
}

private async *getPublicOrganizations(): AsyncGenerator<string> {
this.logger.info(
`Fetching public organizations enabled. ` +
`This may result in a large number of requests.`
);
const orgList = this.baseOctokit.paginate.iterator(
this.baseOctokit.orgs.list,
{
per_page: this.pageSize,
}
);
for await (const res of orgList) {
for (const org of res.data) {
yield org.login;
}
}
}

/*
* In order to determine which orgs are visible, check visible repos and track their orgs
*/
private async *getOrganizationsByRepositories(): AsyncGenerator<string> {
const seenOrgs = new Set<string>();
const reposIter = this.baseOctokit.paginate.iterator(
this.baseOctokit.repos.listForAuthenticatedUser,
Expand Down
88 changes: 61 additions & 27 deletions sources/github-source/src/org-repo-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,43 +93,77 @@ export class OrgRepoFilter {
@Memoize()
async getOrganizations(): Promise<ReadonlyArray<string>> {
if (!this.organizations) {
const organizations = new Set<string>();
const github = await GitHub.instance(this.config, this.logger);
const visibleOrgs = await github.getOrganizations();
if (!visibleOrgs.length) {
const visibleOrgs = new Set(
(await github.getOrganizations()).map((o) => toLower(o))
);

if (!visibleOrgs.size) {
this.logger.warn('No visible organizations found');
}
if (!this.filterConfig.organizations) {
visibleOrgs.forEach((org) => {
const lowerOrg = toLower(org);
if (this.filterConfig.excludedOrganizations?.has(lowerOrg)) {
this.logger.info(`Skipping excluded organization ${lowerOrg}`);
return;
}

this.organizations = await this.filterOrganizations(visibleOrgs, github);
}

if (
this.config.run_mode !== RunMode.EnterpriseCopilotOnly &&
this.organizations.size === 0
) {
throw new VError(
'No visible organizations remain after applying inclusion and exclusion filters'
);
}

return Array.from(this.organizations);
}

private async filterOrganizations(
visibleOrgs: Set<string>,
github: GitHub
): Promise<Set<string>> {
const organizations = new Set<string>();

if (!this.filterConfig.organizations) {
for (const org of visibleOrgs) {
const lowerOrg = toLower(org);
if (!this.filterConfig.excludedOrganizations?.has(lowerOrg)) {
organizations.add(lowerOrg);
});
} else {
this.filterConfig.organizations.forEach((org) => {
const lowerOrg = toLower(org);
if (!visibleOrgs.some((o) => toLower(o) === lowerOrg)) {
this.logger.warn(`Skipping not found organization ${lowerOrg}`);
return;
}
} else {
this.logger.info(`Skipping excluded organization ${lowerOrg}`);
}
}
} else {
for (const organization of this.filterConfig.organizations) {
const lowerOrg = toLower(organization);
if (await this.isVisibleOrganization(visibleOrgs, lowerOrg, github)) {
organizations.add(lowerOrg);
});
}
}
this.organizations = organizations;
}

if (this.config.run_mode !== RunMode.EnterpriseCopilotOnly) {
if (this.organizations.size === 0) {
throw new VError(
'No visible organizations remain after applying inclusion and exclusion filters'
);
}
return organizations;
}

private async isVisibleOrganization(
visibleOrgs: Set<string>,
lowerOrg: string,
github: GitHub
): Promise<boolean> {
if (visibleOrgs.has(lowerOrg)) {
return true;
}

return Array.from(this.organizations);
// Attempt direct organization lookup if not in visibleOrgs
try {
await github.getOrganization(lowerOrg);
return true;
} catch (error: any) {
this.logger.warn(
`Fetching organization ${lowerOrg} failed with error: ` +
`${error.status} - ${error.message}. Skipping.`
);
return false;
}
}

@Memoize()
Expand Down
1 change: 1 addition & 0 deletions sources/github-source/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export interface GitHubConfig extends AirbyteConfig, RoundRobinConfig {
readonly end_date?: string;
readonly fetch_pull_request_diff_coverage?: boolean;
readonly pull_request_cutoff_lag_seconds?: number;
readonly fetch_public_organizations?: boolean;
// startDate and endDate are calculated from start_date, end_date, and cutoff_days
startDate?: Date;
endDate?: Date;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ exports[`OrgRepoFilter getOrganizations - all - no list 1`] = `
]
`;

exports[`OrgRepoFilter getOrganizations - fetch public orgs 1`] = `
[
"org-1",
"org-2",
]
`;

exports[`OrgRepoFilter getOrganizations - fine-grained token - no list 1`] = `
[
"org-1",
Expand All @@ -36,6 +43,13 @@ exports[`OrgRepoFilter getOrganizations - specific organizations included 1`] =
]
`;

exports[`OrgRepoFilter getOrganizations - visible organization not in listForAuthenticatedUser 1`] = `
[
"org-1",
"org-oss",
]
`;

exports[`OrgRepoFilter getRepositories (FarosGraph) - nothing included - nothing excluded 1`] = `
[
{
Expand Down
46 changes: 46 additions & 0 deletions sources/github-source/test/org-repo-filter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ describe('OrgRepoFilter', () => {
{login: 'Org-2'},
{login: 'Org-3'},
]),
get: jest.fn().mockRejectedValue(new Error('404')),
},
repos: {
listForOrg: jest
Expand Down Expand Up @@ -69,6 +70,51 @@ describe('OrgRepoFilter', () => {
expect(organizations).toMatchSnapshot();
});

test('getOrganizations - fetch public orgs', async () => {
const publicOrgsConfig = {
...config,
fetch_public_organizations: true,
};
setupGitHubInstance(
{
orgs: {
listForAuthenticatedUser: jest
.fn()
.mockReturnValue([{login: 'Org-1'}]),
list: jest.fn().mockReturnValue([{login: 'Org-2'}]),
},
},
logger,
publicOrgsConfig
);
const orgRepoFilter = new OrgRepoFilter(publicOrgsConfig, logger);
const organizations = await orgRepoFilter.getOrganizations();
expect(organizations).toMatchSnapshot();
});

test('getOrganizations - visible organization not in listForAuthenticatedUser', async () => {
setupGitHubInstance(
{
orgs: {
listForAuthenticatedUser: jest
.fn()
.mockReturnValue([{login: 'Org-1'}]),
get: jest.fn().mockResolvedValue({login: 'Org-OSS'}),
},
},
logger
);
const orgRepoFilter = new OrgRepoFilter(
{
...config,
organizations: ['org-1', 'org-oss'],
},
logger
);
const organizations = await orgRepoFilter.getOrganizations();
expect(organizations).toMatchSnapshot();
});

test('getOrganizations - specific organizations excluded', async () => {
const orgRepoFilter = new OrgRepoFilter(
{
Expand Down
1 change: 1 addition & 0 deletions sources/jira-source/src/jira.ts
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ export class Jira {
for await (const project of this.getProjectsIterator(keys)) {
projects.push(project);
}
this.logger?.debug(`Found ${projects.length} browseable projects from Jira instance: ${projects.map(p => p.key).join(', ')}`);
return projects;
}

Expand Down
35 changes: 30 additions & 5 deletions sources/jira-source/src/project-board-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ export class ProjectBoardFilter {
@Memoize()
async getProjects(): Promise<ReadonlyArray<string>> {
if (!this.projects) {
this.logger.info('Generating list of projects to sync');
this.projects = new Set();

const jira = await Jira.instance(this.config, this.logger);
Expand All @@ -109,19 +110,38 @@ export class ProjectBoardFilter {
}
}
} else {
for (const project of this.filterConfig.projects) {
if (jira.isProjectInBucket(project)) {
this.projects.add(toUpper(project));
}
}
await this.getProjectsFromConfig();
}
this.logger.info(
`Will sync ${this.projects.size} projects: ` +
`${Array.from(this.projects).join(', ')}`
);
}
return Array.from(this.projects);
}

private async getProjectsFromConfig(): Promise<void> {
const jira = await Jira.instance(this.config, this.logger);
const visibleProjects = await jira.getProjects();
const keys = new Set(visibleProjects.map((p) => p.key));
const ids = new Set(visibleProjects.map((p) => p.id));
for (const project of this.filterConfig.projects) {
if (!keys.has(project) && !ids.has(project)) {
this.logger.warn(
`Project ${project} defined in config is not visible in Jira instance. Skipping.`
);
continue;
}
if (jira.isProjectInBucket(project)) {
this.projects.add(toUpper(project));
}
}
}

@Memoize()
async getBoards(): Promise<ReadonlyArray<BoardInclusion>> {
if (!this.boards) {
this.logger.info('Generating list of boards to sync.');
this.boards = new Map();

const jira = await Jira.instance(this.config, this.logger);
Expand All @@ -137,6 +157,8 @@ export class ProjectBoardFilter {
} else {
await this.getBoardsFromJira(jira);
}
this.logger.info(`Will sync ${this.boards.size} boards.`);
this.logger.debug(`Boards to sync: ${Array.from(this.boards.keys()).join(', ')}`);
}
return Array.from(this.boards.values());
}
Expand Down Expand Up @@ -187,7 +209,9 @@ export class ProjectBoardFilter {
* @returns A Promise that resolves when all boards have been processed.
*/
private async getBoardsFromJira(jira: Jira): Promise<void> {
this.logger.info('Fetching boards to sync from Jira.');
for (const project of this.projects) {
this.logger.info(`Fetching boards to sync for project ${project}`);
for (const board of await jira.getProjectBoards(project)) {
const boardId = toString(board.id);
const {included, issueSync} = await this.getBoardInclusion(boardId);
Expand All @@ -199,6 +223,7 @@ export class ProjectBoardFilter {
}

private async getBoardsFromFaros(jira: Jira): Promise<void> {
this.logger.info('Fetching boards to sync from Faros Graph.');
const projects = jira.getProjectBoardsFromGraph(
this.farosClient,
this.config.graph ?? DEFAULT_GRAPH,
Expand Down
Loading

0 comments on commit 457db00

Please sign in to comment.