Skip to content

Commit a8b0150

Browse files
committed
Support page index for sentences
1 parent cd740f1 commit a8b0150

File tree

3 files changed

+20
-8
lines changed

3 files changed

+20
-8
lines changed

Sources/TextRank/Sentence.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ public struct Sentence: Hashable {
1414
words.count
1515
}
1616

17+
public let pageIndex: Int
1718
public let originalTextIndex: Int
1819

19-
public init(text: String, originalTextIndex: Int, additionalStopwords: [String] = [String]()) {
20+
public init(text: String, originalTextIndex: Int, pageIndex: Int = 0, additionalStopwords: [String] = [String]()) {
2021
self.text = text
22+
self.pageIndex = pageIndex
2123
self.originalTextIndex = originalTextIndex
2224
words = Sentence.removeStopWords(from: Sentence.clean(self.text),
2325
additionalStopwords: additionalStopwords)

Sources/TextRank/TextRank.swift

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import Foundation
99

1010
public class TextRank {
11-
public var text: String {
11+
public var pages: [String] = [String]() {
1212
didSet {
1313
textToSentences()
1414
}
@@ -25,26 +25,35 @@ public class TextRank {
2525
}
2626

2727
public init() {
28-
text = ""
28+
pages = [""]
2929
graph = TextGraph(damping: graphDamping)
3030
}
3131

32+
public init(pages: [String]) {
33+
self.pages = pages
34+
graph = TextGraph(damping: graphDamping)
35+
textToSentences()
36+
}
37+
3238
public init(text: String) {
33-
self.text = text
39+
self.pages = [text]
3440
graph = TextGraph(damping: graphDamping)
3541
textToSentences()
3642
}
3743

3844
public init(text: String, summarizationFraction: Float = 0.2, graphDamping: Float = 0.85) {
39-
self.text = text
45+
self.pages = [text]
4046
self.summarizationFraction = summarizationFraction
4147
self.graphDamping = graphDamping
4248
graph = TextGraph(damping: graphDamping)
4349
textToSentences()
4450
}
4551

4652
func textToSentences() {
47-
sentences = TextRank.splitIntoSentences(text, additionalStopwords: stopwords).filter { $0.length > 0 }
53+
sentences = [];
54+
for (pageIndex, page) in pages.enumerated() {
55+
sentences.append(contentsOf: TextRank.splitIntoSentences(page, pageIndex: pageIndex, additionalStopwords: stopwords).filter { $0.length > 0 })
56+
}
4857
}
4958
}
5059

@@ -87,7 +96,7 @@ extension TextRank {
8796
/// Split text into sentences.
8897
/// - Parameter text: Original text.
8998
/// - Returns: An array of sentences.
90-
static func splitIntoSentences(_ text: String, additionalStopwords stopwords: [String] = [String]()) -> [Sentence] {
99+
static func splitIntoSentences(_ text: String, pageIndex: Int = 0, additionalStopwords stopwords: [String] = [String]()) -> [Sentence] {
91100
if text.isEmpty { return [] }
92101

93102
var x = [Sentence]()
@@ -96,6 +105,7 @@ extension TextRank {
96105
x.append(
97106
Sentence(text: substring.trimmingCharacters(in: .whitespacesAndNewlines),
98107
originalTextIndex: x.count,
108+
pageIndex: pageIndex,
99109
additionalStopwords: stopwords)
100110
)
101111
}

Tests/TextRankTests/TextRankTests.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ class TextRankTests: XCTestCase {
4949
XCTAssertEqual(textRank.graph.edges.count, 2)
5050

5151
text = "Dog cat bird. Sheep dog cat peacock. Horse cow fish dog chicken."
52-
textRank.text = text
52+
textRank.pages = [text]
5353
textRank.buildGraph()
5454
XCTAssertEqual(textRank.graph.nodes.count, 3)
5555
XCTAssertEqual(textRank.graph.edges.count, 3)

0 commit comments

Comments
 (0)