-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
49 lines (44 loc) · 1.41 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import PdfTextExtractor from "./scripts/pdf_extract.js";
import DocxTextExtractor from "./scripts/docx_extract.js";
import PptxTextExtractor from "./scripts/pptx_extract.js";
class Parser {
constructor(filePath) {
this.filePath = filePath;
console.log("filePath:", filePath);
this.extension = filePath.split(".").pop().toLowerCase();
switch (this.extension) {
case "pdf":
this.extractor = new PdfTextExtractor();
console.log("extractor:", this.extractor);
break;
case "docx":
this.extractor = new DocxTextExtractor();
break;
case "pptx":
this.extractor = new PptxTextExtractor();
break;
default:
throw new Error(`Unsupported file type: ${this.extension}`);
}
}
async extractAll() {
return await this.extractor.extract(this.filePath);
}
async extractPage(pageNumber) {
if (this.extension === "pdf" || this.extension === "pptx") {
return await this.extractor.extract(this.filePath, pageNumber);
} else {
throw new Error(
"Page extraction is only supported for PDF and pptx files."
);
}
}
// async extractSlide(slideNumber) {
// if (this.extension === "pptx") {
// return await this.extractor.extract(this.filePath, slideNumber);
// } else {
// throw new Error("Slide extraction is only supported for PPTX files.");
// }
// }
}
export default Parser;