From 6d8c8b71773788b5a2a81c668e8c0ac8248ee89e Mon Sep 17 00:00:00 2001 From: Jacqueline Date: Thu, 31 Oct 2024 18:33:45 -0400 Subject: [PATCH] Add IDF vector to db --- client/src/modules/Admin/Components/Admin.tsx | 27 ++++++- server/db/schema.ts | 5 +- server/scripts/index.ts | 3 +- server/scripts/populate-courses.ts | 53 +------------- server/scripts/populate-recdata.ts | 72 +++++++++++++++++++ server/src/admin/admin.controller.ts | 11 +++ server/src/admin/admin.router.ts | 25 ++++++- server/src/course/course.controller.ts | 7 +- server/src/course/course.data-access.ts | 6 +- server/src/course/course.router.ts | 14 +++- 10 files changed, 161 insertions(+), 62 deletions(-) create mode 100644 server/scripts/populate-recdata.ts diff --git a/client/src/modules/Admin/Components/Admin.tsx b/client/src/modules/Admin/Components/Admin.tsx index 7c4eb6d1..56911bf9 100644 --- a/client/src/modules/Admin/Components/Admin.tsx +++ b/client/src/modules/Admin/Components/Admin.tsx @@ -23,7 +23,7 @@ export const Admin = () => { const [doubleClick, setDoubleClick] = useState(false) const [updating, setUpdating] = useState(false); - type updatedStates = 'empty' | 'semester' | 'profsReset' | 'profsUpdate' | 'subjects' | 'database' | 'description' | 'processed'; + type updatedStates = 'empty' | 'semester' | 'profsReset' | 'profsUpdate' | 'subjects' | 'database' | 'description' | 'processed' | 'idf'; const [updated, setUpdated] = useState('empty'); const successMessages = { 'empty': '', @@ -33,7 +33,8 @@ export const Admin = () => { 'subjects': "Subject full name data successfully updated", 'database': "Database successfully initialized", 'description': "Course description data successfully added", - 'processed': "Processed course descriptino data successfully added" + 'processed': "Processed course descriptino data successfully added", + 'idf': "IDF vector data successfully added" }; const [updatingField, setUpdatingField] = useState(""); @@ -299,6 +300,20 @@ export const Admin = () => { } } + async function updateIdfVector() { + console.log('Updatng IDF vector') + setUpdating(true) + setUpdatingField("IDF vector") + const response = await axios.post('/api/admin/rec/idf', { token: token }); + if (response.status === 200) { + console.log('Updated IDF vector') + setUpdating(false) + setUpdated('idf') + } else { + console.log('Error at updateIdfVector') + } + } + /** * Handle the first click to the "Initialize Database" button. Show an alert * and update state to remember the next click will be a double click. @@ -411,6 +426,14 @@ export const Admin = () => { > Update Processed Descriptions + {renderInitButton(doubleClick)} diff --git a/server/db/schema.ts b/server/db/schema.ts index 12798a93..0cb47adc 100644 --- a/server/db/schema.ts +++ b/server/db/schema.ts @@ -1,5 +1,6 @@ import mongoose, { Schema } from "mongoose"; import { Class, Student, Subject, Review, Professor } from "common"; +import { object } from "joi"; /* @@ -180,11 +181,11 @@ export const RecommendationMetadata = mongoose.model; + idfVector: Object; } export const GlobalMetadata = mongoose.model( diff --git a/server/scripts/index.ts b/server/scripts/index.ts index 5c1e5dd8..d5c64f8f 100644 --- a/server/scripts/index.ts +++ b/server/scripts/index.ts @@ -4,9 +4,10 @@ export { addNewSemester, addAllCourses, addAllDescriptions, - addAllProcessedDescriptions } from './populate-courses'; export { addAllProfessors, resetProfessors } from './populate-professors'; +export { addAllProcessedDescriptions, addIdfVector } from './populate-recdata'; + export { findAllSemesters } from './utils'; diff --git a/server/scripts/populate-courses.ts b/server/scripts/populate-courses.ts index 8a55b1a8..56a06fce 100644 --- a/server/scripts/populate-courses.ts +++ b/server/scripts/populate-courses.ts @@ -6,11 +6,10 @@ import axios from 'axios'; import shortid from 'shortid'; import { ScrapingSubject, ScrapingClass } from './types'; -import { Classes, Professors, Subjects, RecommendationMetadata } from '../db/schema'; +import { Classes, Professors, Subjects } from '../db/schema'; import { extractProfessors } from './populate-professors'; import { fetchSubjects } from './populate-subjects'; import { addStudentReview } from '../src/review/review.controller'; -import { preprocess } from '../src/course/course.recalgo'; /** * Adds all possible crosslisted classes retrieved from Course API to crosslisted list in Courses database for all semesters. @@ -571,56 +570,6 @@ export const addCourseDescription = async (course): Promise => { return false; } -export const addAllProcessedDescriptions = async (): Promise => { - try { - const courses = await Classes.find().exec(); - if (courses) { - for (const course of courses) { - await addProcessedDescription(course); - } - } - return true; - } catch (err) { - console.log(`Error in adding processed descriptions: ${err}`); - } -} - -const addProcessedDescription = async (course): Promise => { - const courseId = course._id; - const description = course.classDescription; - const processed = preprocess(description); - const subject = course.classSub; - const num = course.classNum; - try { - console.log(`${subject} ${num}: ${processed}`) - const rec = await RecommendationMetadata.findOne({ _id: courseId }); - if (rec) { - await RecommendationMetadata.updateOne( - { _id: courseId }, - { $set: { processedDescription: processed } } - ); - } else { - const res = await new RecommendationMetadata({ - _id: courseId, - classSub: subject, - classNum: num, - processedDescription: processed - }) - .save() - .catch((err) => { - console.log(err); - return null; - }); - if (!res) { - throw new Error(); - } - } - return true; - } catch (err) { - console.log(`Error in adding processed description for ${subject} ${num}: ${err}`); - } -} - // export const addAllSimilarityData = async (): Promise => { // try { // const courses = await Classes.find().exec(); diff --git a/server/scripts/populate-recdata.ts b/server/scripts/populate-recdata.ts new file mode 100644 index 00000000..3f3c5eaa --- /dev/null +++ b/server/scripts/populate-recdata.ts @@ -0,0 +1,72 @@ +import { Classes, RecommendationMetadata, GlobalMetadata } from '../db/schema'; +import { preprocess, idf } from '../src/course/course.recalgo'; + +export const addAllProcessedDescriptions = async (): Promise => { + try { + const courses = await Classes.find().exec(); + if (courses) { + for (const course of courses) { + await addProcessedDescription(course); + } + } + return true; + } catch (err) { + console.log(`Error in adding processed descriptions: ${err}`); + } +} + +const addProcessedDescription = async (course): Promise => { + const courseId = course._id; + const description = course.classDescription; + const processed = preprocess(description); + const subject = course.classSub; + const num = course.classNum; + try { + console.log(`${subject} ${num}: ${processed}`) + const rec = await RecommendationMetadata.findOne({ _id: courseId }); + if (rec) { + await RecommendationMetadata.updateOne( + { _id: courseId }, + { $set: { processedDescription: processed } } + ); + } else { + const res = await new RecommendationMetadata({ + _id: courseId, + classSub: subject, + classNum: num, + processedDescription: processed + }) + .save() + .catch((err) => { + console.log(err); + return null; + }); + if (!res) { + throw new Error(); + } + } + return true; + } catch (err) { + console.log(`Error in adding processed description for ${subject} ${num}: ${err}`); + } +} + +export const addIdfVector = async (): Promise => { + try { + const metadata = await RecommendationMetadata.find().exec(); + const descriptions = metadata.map(course => course.processedDescription.split(' ')); + const allTerms = [...new Set(descriptions.flat())]; + const idfValues = idf(allTerms, descriptions); + const res = await new GlobalMetadata({ + idfVector: idfValues + }).save(); + + if (!res) { + throw new Error(); + } + return true; + } catch (err) { + console.log(`Error in adding IDF Vector to Global Metadata database: ${err}`); + return false; + } +} \ No newline at end of file diff --git a/server/src/admin/admin.controller.ts b/server/src/admin/admin.controller.ts index 86c1c09a..9a06a15d 100644 --- a/server/src/admin/admin.controller.ts +++ b/server/src/admin/admin.controller.ts @@ -42,6 +42,7 @@ import { addNewSemester, addAllDescriptions, addAllProcessedDescriptions, + addIdfVector, } from '../../scripts'; import { fetchAddSubjects } from '../../scripts/populate-subjects'; @@ -429,4 +430,14 @@ export const addProcessedDescriptionsDb = async ({ auth }: VerifyAdminType) => { const descriptionResult = await addAllProcessedDescriptions(); return descriptionResult; +} + +export const addIdfVectorDb = async ({ auth }: VerifyAdminType) => { + const userIsAdmin = verifyTokenAdmin({ auth }); + if (!userIsAdmin) { + return null; + } + + const idfResult = await addIdfVector(); + return idfResult; } \ No newline at end of file diff --git a/server/src/admin/admin.router.ts b/server/src/admin/admin.router.ts index 0dea19c3..0f4e5912 100644 --- a/server/src/admin/admin.router.ts +++ b/server/src/admin/admin.router.ts @@ -27,7 +27,8 @@ import { addAdmin, approveReviews, addCourseDescriptionsDb, - addProcessedDescriptionsDb + addProcessedDescriptionsDb, + addIdfVectorDb } from './admin.controller'; export const adminRouter = express.Router(); @@ -531,3 +532,25 @@ adminRouter.post('/rec/desc', async (req, res) => { return res.status(500).json({ error: `Internal Server Error: ${err}` }); } }); + +adminRouter.post('/rec/idf', async (req, res) => { + const { token }: AdminRequestType = req.body; + try { + const auth = new Auth({ token }); + const result = await addIdfVectorDb({ auth }); + console.log(result) + + if (result) { + res.status(200); + res.set('Connection', 'close'); + res.json({ message: 'IDF vector added!' }); + return res; + } + + return res + .status(400) + .json({ error: 'IDF vector was unable to be added!' }); + } catch (err) { + return res.status(500).json({ error: `Internal Server Error: ${err}` }); + } +}); diff --git a/server/src/course/course.controller.ts b/server/src/course/course.controller.ts index f4a09623..b668b9fe 100644 --- a/server/src/course/course.controller.ts +++ b/server/src/course/course.controller.ts @@ -1,4 +1,4 @@ -import { findCourseById, findCourseByInfo, findRecommendationByInfo } from './course.data-access'; +import { findCourseById, findCourseByInfo, findRecommendationByInfo, findGlobalMetadata } from './course.data-access'; import { CourseIdRequestType, CourseInfoRequestType, CourseDescriptionRequestType } from './course.type'; import { preprocess, tfidf, cosineSimilarity, idf } from './course.recalgo'; @@ -90,6 +90,11 @@ export const getRecommendationData = async ( return course; } +export const getGlobalMetadata = async () => { + const global = await findGlobalMetadata(); + return global; +} + export const getProcessedDescription = (text) => { const processed = preprocess(text); return processed; diff --git a/server/src/course/course.data-access.ts b/server/src/course/course.data-access.ts index a62b4516..06df72ca 100644 --- a/server/src/course/course.data-access.ts +++ b/server/src/course/course.data-access.ts @@ -1,4 +1,4 @@ -import { Classes, RecommendationMetadata } from "../../db/schema"; +import { Classes, RecommendationMetadata, GlobalMetadata } from "../../db/schema"; export const findCourseById = async (courseId: string) => await Classes.findOne({ _id: courseId }).exec(); @@ -16,4 +16,6 @@ export const findRecommendationByInfo = async ( ) => await RecommendationMetadata.findOne({ classSub: courseSubject, classNum: courseNumber, -}).exec(); \ No newline at end of file +}).exec(); + +export const findGlobalMetadata = async () => await GlobalMetadata.find().exec(); \ No newline at end of file diff --git a/server/src/course/course.router.ts b/server/src/course/course.router.ts index 494ae9c6..b1f58116 100644 --- a/server/src/course/course.router.ts +++ b/server/src/course/course.router.ts @@ -1,7 +1,7 @@ import express from 'express'; import { CourseIdRequestType, CourseInfoRequestType, CourseDescriptionRequestType } from './course.type'; -import { getCourseByInfo, getReviewsCrossListOR, getRecommendationData, getProcessedDescription, getSimilarity } from './course.controller'; +import { getCourseByInfo, getReviewsCrossListOR, getRecommendationData, getProcessedDescription, getSimilarity, getGlobalMetadata } from './course.controller'; import { getCourseById } from '../utils'; @@ -89,6 +89,18 @@ courseRouter.post('/getRecData', async (req, res) => { } }); +courseRouter.post('/getGlobal', async (req, res) => { + try { + const global = await getGlobalMetadata(); + + return res.status(200).json({ result: global }); + } catch (err) { + return res + .status(500) + .json({ error: `Internal Server Error: ${err.message}` }); + } +}); + /** Reachable at POST /api/courses/getPreDesc * @body description: a course description * Gets the processed description to use for the similarity algorithm