diff --git a/.eslintrc.js b/.eslintrc.js index 096be64..b6acebc 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -8,4 +8,7 @@ module.exports = { plugins: [ '@typescript-eslint', ], + rules: { + 'linebreak-style': "off" + } }; diff --git a/src/match/findMatching.ts b/src/match/findMatching.ts new file mode 100644 index 0000000..e8ac4e6 --- /dev/null +++ b/src/match/findMatching.ts @@ -0,0 +1,71 @@ +import { MatchingExternal, MatchingInternal, ProjectData } from './matchingTypes'; +import { + matchingStats, + placeStudentsOfChoicesBalanced, + range, +} from './matchingHelpers'; + +/** + * Assigns students of choice starting from start going to limit in batches of size batch using the balanced + * algorithm to break ties. Examples of use: + * step3(sampleData, 1, 1, 1) - Matches all first choice students it can + * step3(sampleData, 2, 2, 1) - Matches all second choice students it can + * step3(sampleData, 3, 20, 3) - Matches 3,4,5 then 5,6,7 then 8,9,10... etc until 20. + * + * @param allProjectData + * @param start - Starting number, inclusive + * @param end - The number to process until, inclusive + * @param batch - The size of the batches of choices to work on at once. + */ +function matchChoices(allProjectData: ProjectData, start: number, end: number, batch: number): void { + for (let startChoice = start; startChoice <= end; startChoice += batch) { + // Avoid going over the limit in the last iteration + const choices = startChoice + batch < end + ? range(startChoice, startChoice + batch) + : range(startChoice, end + 1); + Object.values(allProjectData) + .forEach((project) => { + placeStudentsOfChoicesBalanced(allProjectData, project.projectId, choices, project.projSizeRemaining); + }); + } +} + +/** + * Generates a single match of all students to projects. May have missing students. + * @param data + */ +function generateMatch(data: ProjectData): MatchingInternal { + matchChoices(data, 1, 1, 2); + matchChoices(data, 3, 20, 1); + return { + match: data, + stats: matchingStats(data), + }; +} + +/** + * Generates a match that probably has no unassigned students (very likely but not guaranteed, call again if it fails) + * @param {ProjectData} data - The project information to create a match for. Will not be mutated. + */ +export function generateReliableMatch(data: ProjectData): MatchingExternal { + const startTime = process.hrtime(); + let copyOfData = JSON.parse(JSON.stringify(data)); + let bestMatch: MatchingInternal = generateMatch(copyOfData); + for (let i = 0; i < 50; i += 1) { + copyOfData = JSON.parse(JSON.stringify(data)); + const match = generateMatch(copyOfData); + if (match.stats.unassignedStudents < bestMatch.stats.unassignedStudents + || (match.stats.unassignedStudents === bestMatch.stats.unassignedStudents + && match.stats.matchingScore < bestMatch.stats.matchingScore)) { + bestMatch = match; + } + } + const endTime = process.hrtime(startTime); + return { + match: bestMatch.match, + stats: { + ...bestMatch.stats, + runtimeMs: endTime[0] * 1000 + endTime[1] / 1000000, + }, + }; +} diff --git a/src/match/matchingHelpers.ts b/src/match/matchingHelpers.ts new file mode 100644 index 0000000..ba9f7ae --- /dev/null +++ b/src/match/matchingHelpers.ts @@ -0,0 +1,288 @@ +import assert from 'assert'; +import { Project, ProjectPreference } from '@prisma/client'; +import { + MatchingExternal, + MatchingStatsInternal, + ProjectData, + StudentChoice, + StudentChoices, +} from './matchingTypes'; +import { MatchTuple, MatchingResult } from '../types/Matching'; + +/* Randomize array in-place using Durstenfeld shuffle algorithm https://stackoverflow.com/a/12646864 */ +function shuffleArray(array: Array): void { + for (let i = array.length - 1; i > 0; i -= 1) { + const j = Math.floor(Math.random() * (i + 1)); + // eslint-disable-next-line no-param-reassign + [array[i], array[j]] = [array[j], array[i]]; + } +} + +/** + * A very basic implementation of python's range function, used to generate an array of numbers from a start + * (inclusive) and end (exclusive) + * @param start - The start of the range, inclusive + * @param stop - The end of the range, exclusive + */ +export function range(start: number, stop: number): number[] { + const arr = []; + for (let i = start; i < stop; i += 1) { + arr.push(i); + } + return arr; +} + +/** + * Checks to see if a StudentChoice matches either a choice number or an array of choice numbers + * @param choice + * @param studentChoice + */ +function compareChoice(choice: number[] | number, studentChoice: StudentChoice): boolean { + return Array.isArray(choice) + ? (choice as number[]).includes(studentChoice.choice) + : studentChoice.choice === (choice as number); +} + +/** + * Marks a student in all project's `studentsSelected` as having been matched somewhere already + * @param projectData - The project data, mutated in place + * @param studentId - Student to mark + */ +function markStudent(projectData: ProjectData, studentId: string): void { + Object.values(projectData) + .forEach((value) => { + if (value.studentsSelected[studentId] !== undefined) { + // eslint-disable-next-line no-param-reassign + value.studentsSelected[studentId].matched = true; + } + }); +} + +/** + * Places a student into a project and then marks them as matched in all projects. Handles correctly updating the + * number of needed students and decrementing num_first_choice + * @param projectData + * @param projectId + * @param studentId + */ +function placeStudent(projectData: ProjectData, projectId: string, studentId: string): void { + const project = projectData[projectId]; + const student = project.studentsSelected[studentId]; + const firstChoice: boolean = student.choice === 1; + // We should never try to match a student that's already been matched + assert( + student.matched !== true, + `Tried to place a student that's already been matched ${JSON.stringify(student)}`, + ); + // eslint-disable-next-line no-param-reassign + projectData[projectId].studentsMatched[student.studentId] = student; + markStudent(projectData, studentId); + project.projSizeRemaining -= 1; + if (firstChoice) project.numFirstChoice -= 1; +} + +/** + * Counts the number of votes a certain student has remaining in non-filled projects. + * @param projectData + * @param studentId + */ +function countStudentVotes(projectData: ProjectData, studentId: string): number { + return Object.values(projectData) + // Only look at non-filled projects + .filter((project) => project.projSizeRemaining > 0) + // Sum the number of these projects that contain the given studentId + .reduce((previousCount, project) => { + // Check if the student exists in the studentSelected + const doesStudentExist = Object.keys( + project.studentsSelected, + ) + .some((id) => id === studentId); + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + // eslint-disable-next-line no-bitwise + return previousCount + (doesStudentExist | 0); + // Prev line casts a bool to an int very fast (2 orders of mag faster than other methods) + }, 0); +} + +/** + * Counts the number of unmarked students on a project who voted for it with a given choice. + * @param studentsSelected + * @param choice + */ +export function countStudentsOfChoices(studentsSelected: StudentChoices, choice: number[] | number): number { + return Object.values(studentsSelected) + .filter( + (student) => student.matched !== true, + // eslint-disable-next-line arrow-body-style + ) + // eslint-disable-next-line arrow-body-style + .reduce((secondChoiceCounter, student) => { + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + // eslint-disable-next-line no-bitwise + return secondChoiceCounter + ((compareChoice(choice, student)) | 0); + }, 0); +} + +/** + * Places students similar to placeStudentsOfChoice, however it is smarter and will break ties by removing the + * student who appears least frequently in other remaining votes. This is based on the assumption that this person + * will be the most likely to accidentally have all their voted projects filled up on then. + * @param projectData + * @param projectId + * @param choice + * @param count + */ +export function placeStudentsOfChoicesBalanced( + projectData: ProjectData, + projectId: string, + choice: number[] | number, + count: number, +): void { + // An array of students on this project who have only one remaining project + const singleStudents: [string, number][] = Object.values(projectData[projectId].studentsSelected) + .filter((student) => student.matched !== true) + .map( + (student): [string, number] => [student.studentId, countStudentVotes(projectData, student.studentId)], + ) + .filter( + (studentVotes) => studentVotes[1] === 1, + ); + + // Randomize the array and then match as many single students as possible. + let counter = 0; + shuffleArray(singleStudents); + singleStudents.slice(0, count) + .forEach((student) => { + placeStudent(projectData, projectId, student[0]); + counter += 1; + }); + + // An array of mappings from student IDs to the number of votes they have on non-filled projects for unmarked + // students matching the choice + const studentFrequency: [string, number][] = Object.values(projectData[projectId].studentsSelected) + .filter((student) => compareChoice(choice, student) && student.matched !== true) + .map( + (student) => [student.studentId, countStudentVotes(projectData, student.studentId)], + ); + // Sort with least occurrences at the start and randomize the order within blocks of the same number of occurrences + studentFrequency.sort((a, b) => { + let value = a[1] - b[1]; + if (value === 0) { + value = 0.5 - Math.random(); + } + return value; + }); + + // Get the first few count students and apply them to any remaining slots. This count and counter thing will always + // work, slice handles all this very nicely. + studentFrequency.slice(0, count - counter) + .forEach((student) => placeStudent(projectData, projectId, student[0])); +} + +/** + * Counts the number of open spots left in projects + * @param projectData + */ +function countUnfilled(projectData: ProjectData) { + return Object.values(projectData) + .reduce((prevVal, currentVal) => prevVal + currentVal.projSizeRemaining, 0); +} + +/** + * Counts the number of unmarked students (students that didn't get applied to anything) + * @param projectData + */ +function unassignedStudentsCount(projectData: ProjectData): number { + const countedStudentIds: string[] = []; + return Object.values(projectData) + .reduce((previousValue, currentValue) => { + const unmatchedStudents = Object.values( + currentValue.studentsSelected, + ) + .filter((student) => student.matched !== true && !countedStudentIds.includes(student.studentId)); + countedStudentIds.push(...unmatchedStudents.map((student) => student.studentId)); + return previousValue + unmatchedStudents.length; + }, 0); +} + +/** + * Measures the effectiveness of a match, or the choice rank number that all students got divided by the number of + * students + * @param projectData + * @param totalStudents + */ +function measureMatchEffectiveness(projectData: ProjectData, totalStudents: number) { + const rawScore = Object.values(projectData) + .reduce((sumScoreOverall, currentProject) => sumScoreOverall + + Object.values(currentProject.studentsMatched) + .reduce((sumScore, currentStudent) => sumScore + currentStudent.choice, 0), 0); + return rawScore / totalStudents; +} + +/** + * Combines a bunch of relevant stats about a matching to check how it's doing + * @param projectData + * @constructor + */ +export function matchingStats(projectData: ProjectData): MatchingStatsInternal { + // eslint-disable-next-line func-names + const totalStudents = (function () { + const totalStudentsSet = new Set(); + Object.values(projectData) + .forEach((project) => { + Object.values(project.studentsSelected) + .forEach((student) => { + totalStudentsSet.add(student.studentId); + }); + }); + return totalStudentsSet.size; + }()); + return { + totalProjects: Object.keys(projectData).length, + totalStudents, + unassignedStudents: unassignedStudentsCount(projectData), + unfilledSlots: countUnfilled(projectData), + matchingScore: measureMatchEffectiveness(projectData, totalStudents), + }; +} + +export function parsePrismaData(prismaData: (Project & { projectPreferences: ProjectPreference[] })[]): ProjectData { + const projectData: ProjectData = {}; + prismaData.forEach((prismaProject) => { + // Generate the student choices + const studentsSelected: StudentChoices = {}; + prismaProject.projectPreferences.forEach((prismaStudentChoice) => { + studentsSelected[prismaStudentChoice.studentId] = { + studentId: prismaStudentChoice.studentId, + choice: prismaStudentChoice.ranking, + }; + }); + // Fill in the rest of the data + projectData[prismaProject.id] = { + studentsSelected, + projectId: prismaProject.id, + numFirstChoice: countStudentsOfChoices(studentsSelected, 1), + projSizeRemaining: Object.keys(studentsSelected).length, + studentsMatched: {}, + }; + }); + return projectData; +} + +/** + * Marshals data from internal structure to datatype required for export + */ +export function prepareDataForExport(matchingData: MatchingExternal): MatchingResult { + const matchingProjectData: MatchTuple[] = Object + .values(matchingData.match) + .flatMap((projectData) => Object.keys(projectData.studentsMatched).map((student) => ({ + studentId: student, + projectId: projectData.projectId, + }))); + return { + stats: matchingData.stats, + match: matchingProjectData, + }; +} diff --git a/src/match/matchingTypes.ts b/src/match/matchingTypes.ts new file mode 100644 index 0000000..f925656 --- /dev/null +++ b/src/match/matchingTypes.ts @@ -0,0 +1,51 @@ +export interface ProjectData { + [projectId: string]: ProjectDataDictElement; +} + +export interface ProjectDataDictElementCore { + projectId: string; + studentsSelected: StudentChoices; + studentsMatched: StudentChoices; + projSizeRemaining: number; + numFirstChoice: number; +} + +/// This gross thing is a match for having too many unnecessary fields in my testing data. +export interface ProjectDataDictElement extends ProjectDataDictElementCore { + [x: string]: any; +} + +export interface StudentChoices { + [studentId: string]: StudentChoice; +} + +export interface Student { + studentId: string; +} + +export interface StudentChoice extends Student { + choice: number; + matched?: true | undefined; // This works as a kind of default +} + +export interface MatchingStatsInternal { + totalProjects: number; + totalStudents: number; + unassignedStudents: number; + unfilledSlots: number; + matchingScore: number; +} + +export interface MatchingStatsExternal extends MatchingStatsInternal { + runtimeMs: number; +} + +export interface MatchingInternal { + match: ProjectData; + stats: MatchingStatsInternal; +} + +export interface MatchingExternal { + match: ProjectData; + stats: MatchingStatsExternal; +} diff --git a/src/match/testMatching.ts b/src/match/testMatching.ts new file mode 100644 index 0000000..f24686c --- /dev/null +++ b/src/match/testMatching.ts @@ -0,0 +1,5 @@ +import { generateReliableMatch } from './findMatching'; +import { sampleData } from './matchingData'; + +const { stats } = generateReliableMatch(sampleData); +console.log(stats); diff --git a/src/resolvers/Match.ts b/src/resolvers/Match.ts index 5855d7a..1ae39ae 100644 --- a/src/resolvers/Match.ts +++ b/src/resolvers/Match.ts @@ -1,34 +1,55 @@ -import { - Resolver, Authorized, Query, Mutation, Arg, Ctx, -} from 'type-graphql'; -import { - PrismaClient, -} from '@prisma/client'; +import { Arg, Authorized, Ctx, Mutation, Query, Resolver, } from 'type-graphql'; +import { PrismaClient } from '@prisma/client'; import { Inject, Service } from 'typedi'; -import { Track, StudentStatus } from '../enums'; -import { Context, AuthRole } from '../context'; +import { ProjectStatus, StudentStatus, Track } from '../enums'; +import { AuthRole, Context } from '../context'; import { - Student, Tag, Match, Preference, Project, + Preference, + Recommendation, + Student, + Tag, } from '../types'; -import { getProjectMatches } from '../search'; +import { getProjectRecs } from '../search'; +import { parsePrismaData, prepareDataForExport } from '../match/matchingHelpers'; +import { generateReliableMatch } from '../match/findMatching'; +import { MatchingResult } from '../types/Matching'; @Service() -@Resolver(Match) +@Resolver(Recommendation) export class MatchResolver { @Inject(() => PrismaClient) - private readonly prisma : PrismaClient; + private readonly prisma: PrismaClient; + + @Authorized(AuthRole.ADMIN) + @Query(() => [MatchingResult]) + async matchStudents(): Promise { + const prismaProjectData = await this.prisma.project.findMany({ + where: { + status: { + not: ProjectStatus.DRAFT, + }, + }, + include: { + projectPreferences: true, + }, + }); + const projectData = parsePrismaData(prismaProjectData); + const matching = generateReliableMatch(projectData); + return prepareDataForExport(matching); + } @Authorized(AuthRole.STUDENT) - @Query(() => [Match], { nullable: true }) - async projectMatches( + @Query(() => [Recommendation], { nullable: true }) + // TODO: Check how to rename endpoints like this. This is really getting recs not matches. + async projectRecs( @Ctx() { auth }: Context, @Arg('tags', () => [String]) tagIds: string[], - ): Promise { + ): Promise { const student = await this.prisma.student.findUnique({ where: auth.toWhere() }); const tags = await this.prisma.tag.findMany({ where: { id: { in: tagIds } } }); if (!student || student.status !== StudentStatus.ACCEPTED) throw Error('You have not been accepted.'); - return getProjectMatches(student, tags); + return getProjectRecs(student, tags); } @Authorized(AuthRole.STUDENT) @@ -36,9 +57,16 @@ export class MatchResolver { async projectPreferences( @Ctx() { auth }: Context, ): Promise { - return this.prisma.projectPreference.findMany({ + return this.prisma.projectPreference.findMany({ where: { student: auth.toWhere() }, - include: { project: { include: { tags: true, mentors: true } } }, + include: { + project: { + include: { + tags: true, + mentors: true + } + } + }, orderBy: [{ ranking: 'asc' }], }); } @@ -54,17 +82,25 @@ export class MatchResolver { const student = await this.prisma.student.findUnique({ where: auth.toWhere() }); const projects = await this.prisma.project.findMany({ where: { id: { in: projectIds } }, - include: { tags: true, mentors: true }, + include: { + tags: true, + mentors: true + }, }); if (!student || student.status !== StudentStatus.ACCEPTED) throw Error('You have not been accepted.'); if (projectIds.length < 3) throw Error('You must select at least 3 project preferences.'); if (projects.length !== projectIds.length) throw Error('You selected a project which does not exist.'); - projects.forEach(({ id, track }) => { + projects.forEach(({ + id, + track + }) => { if ( (student.track === Track.BEGINNER && track !== Track.BEGINNER) || (student.track !== Track.BEGINNER && track === Track.BEGINNER) - ) throw Error(`You cannot select project ID ${id} because it is not in your track.`); + ) { + throw Error(`You cannot select project ID ${id} because it is not in your track.`); + } }); await this.prisma.projectPreference.deleteMany({ where: { student: auth.toWhere() } }); diff --git a/src/search/getProjectMatches.ts b/src/search/getProjectRecs.ts similarity index 97% rename from src/search/getProjectMatches.ts rename to src/search/getProjectRecs.ts index 029faad..9cdd5e4 100644 --- a/src/search/getProjectMatches.ts +++ b/src/search/getProjectRecs.ts @@ -7,7 +7,7 @@ import { PrismaClient } from '@prisma/client'; import Container from 'typedi'; import config from '../config'; import { - Student, Match, Project, Tag, + Student, Recommendation, Project, Tag, } from '../types'; import { Track, TagType } from '../enums'; import { geoToTimezone } from '../utils'; @@ -122,7 +122,7 @@ async function buildQueryFor(student: Student, tags: Tag[]): Promise { +export async function getProjectRecs(student: Student, tags: Tag[]): Promise { const prisma = Container.get(PrismaClient); const elastic = Container.get(Client); diff --git a/src/search/index.ts b/src/search/index.ts index cc423b1..1724af8 100644 --- a/src/search/index.ts +++ b/src/search/index.ts @@ -7,4 +7,4 @@ export default function searchSyncHandler(): void { export * from './ElasticEntry'; export * from './sync'; -export * from './getProjectMatches'; +export * from './getProjectRecs'; diff --git a/src/types/Matching.ts b/src/types/Matching.ts new file mode 100644 index 0000000..e92fa5b --- /dev/null +++ b/src/types/Matching.ts @@ -0,0 +1,42 @@ +// eslint-disable-next-line max-classes-per-file +import { Field, ObjectType } from 'type-graphql'; +import { MatchingStatsExternal } from '../match/matchingTypes'; + +@ObjectType() +export class MatchingStats implements MatchingStatsExternal { + @Field(() => Number) + totalProjects: number; + + @Field(() => Number) + totalStudents: number; + + @Field(() => Number) + unassignedStudents: number; + + @Field(() => Number) + unfilledSlots: number; + + @Field(() => Number) + matchingScore: number; + + @Field(() => Number) + runtimeMs: number; +} + +@ObjectType() +export class MatchTuple { + @Field(() => String) + studentId: string; + + @Field(() => String) + projectId: string; +} + +@ObjectType() +export class MatchingResult { + @Field(() => [MatchTuple]) + match: MatchTuple[]; + + @Field(() => MatchingStats) + stats: MatchingStats; +} diff --git a/src/types/Match.ts b/src/types/Recommendation.ts similarity index 86% rename from src/types/Match.ts rename to src/types/Recommendation.ts index ec646de..c4301b4 100644 --- a/src/types/Match.ts +++ b/src/types/Recommendation.ts @@ -2,7 +2,7 @@ import { ObjectType, Field } from 'type-graphql'; import { Project } from './Project'; @ObjectType() -export class Match { +export class Recommendation { @Field(() => Number) score: number diff --git a/src/types/index.ts b/src/types/index.ts index 599c335..69bd9d4 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -2,5 +2,5 @@ export * from './Mentor'; export * from './Student'; export * from './Project'; export * from './Tag'; -export * from './Match'; +export * from './Recommendation'; export * from './Preference';