Skip to content

Commit 6ba963a

Browse files
committed
replace pageIndex with pageID in TextRank and Sentence
1 parent 64f56e2 commit 6ba963a

File tree

3 files changed

+52
-43
lines changed

3 files changed

+52
-43
lines changed

Sources/TextRank/Sentence.swift

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ public struct Sentence: Hashable {
1414
words.count
1515
}
1616

17-
public let pageIndex: Int
17+
public let pageID: String
1818
public let originalTextIndex: Int
1919

20-
public init(text: String, originalTextIndex: Int, pageIndex: Int = 0, additionalStopwords: [String] = [String]()) {
20+
public init(text: String, originalTextIndex: Int, pageID: String = "", additionalStopwords: [String] = [String]()) {
2121
self.text = text
22-
self.pageIndex = pageIndex
22+
self.pageID = pageID
2323
self.originalTextIndex = originalTextIndex
2424
words = Sentence.removeStopWords(from: Sentence.clean(self.text),
2525
additionalStopwords: additionalStopwords)

Sources/TextRank/TextRank.swift

+14-15
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88
import Foundation
99

1010
public class TextRank {
11-
public var pages: [String] = [String]() {
11+
public var pages: [(pageId: String, chunks: String)] = [(pageId: String, chunks: String)]() {
1212
didSet {
1313
textToSentences()
1414
}
1515
}
1616

17-
public var chunks: [[String]] = [[String]]() {
17+
public var chunks: [(pageId: String, chunks: [String])] = [(pageId: String, chunks: [String])]() {
1818
didSet {
1919
chunksToSentences()
2020
}
@@ -31,30 +31,30 @@ public class TextRank {
3131
}
3232

3333
public init() {
34-
pages = [""]
34+
pages = [("", "")]
3535
graph = TextGraph(damping: graphDamping)
3636
}
3737

38-
public init(pages: [String]) {
38+
public init(pages: [(pageId: String, chunks: String)]) {
3939
self.pages = pages
4040
graph = TextGraph(damping: graphDamping)
4141
textToSentences()
4242
}
43-
44-
public init(chunksPerPage: [[String]]) {
43+
44+
public init(chunksPerPage: [(pageId: String, chunks: [String])]) {
4545
self.chunks = chunksPerPage
4646
graph = TextGraph(damping: graphDamping)
4747
chunksToSentences()
4848
}
4949

5050
public init(text: String) {
51-
self.pages = [text]
51+
self.pages = [("", text)]
5252
graph = TextGraph(damping: graphDamping)
5353
textToSentences()
5454
}
5555

5656
public init(text: String, summarizationFraction: Float = 0.2, graphDamping: Float = 0.85) {
57-
self.pages = [text]
57+
self.pages = [("", text)]
5858
self.summarizationFraction = summarizationFraction
5959
self.graphDamping = graphDamping
6060
graph = TextGraph(damping: graphDamping)
@@ -63,23 +63,22 @@ public class TextRank {
6363

6464
func chunksToSentences() {
6565
sentences = []
66-
for (pageIndex, pageChunks) in chunks.enumerated() {
66+
for (pageId, pageChunks) in chunks {
6767
for (chunkIndex, chunk) in pageChunks.enumerated() {
6868
sentences.append(
6969
Sentence(text: chunk.trimmingCharacters(in: .whitespacesAndNewlines),
7070
originalTextIndex: chunkIndex,
71-
pageIndex: pageIndex,
71+
pageID: pageId,
7272
additionalStopwords: stopwords)
7373
)
74-
7574
}
7675
}
7776
}
7877

7978
func textToSentences() {
8079
sentences = [];
81-
for (pageIndex, page) in pages.enumerated() {
82-
sentences.append(contentsOf: TextRank.splitIntoSentences(page, pageIndex: pageIndex, additionalStopwords: stopwords).filter { $0.length > 0 })
80+
for (pageId, page) in pages {
81+
sentences.append(contentsOf: TextRank.splitIntoSentences(page, pageID: pageId, additionalStopwords: stopwords).filter { $0.length > 0 })
8382
}
8483
}
8584
}
@@ -123,7 +122,7 @@ extension TextRank {
123122
/// Split text into sentences.
124123
/// - Parameter text: Original text.
125124
/// - Returns: An array of sentences.
126-
static func splitIntoSentences(_ text: String, pageIndex: Int = 0, additionalStopwords stopwords: [String] = [String]()) -> [Sentence] {
125+
static func splitIntoSentences(_ text: String, pageID: String = "", additionalStopwords stopwords: [String] = [String]()) -> [Sentence] {
127126
if text.isEmpty { return [] }
128127

129128
var x = [Sentence]()
@@ -132,7 +131,7 @@ extension TextRank {
132131
x.append(
133132
Sentence(text: substring.trimmingCharacters(in: .whitespacesAndNewlines),
134133
originalTextIndex: x.count,
135-
pageIndex: pageIndex,
134+
pageID: pageID,
136135
additionalStopwords: stopwords)
137136
)
138137
}

Tests/TextRankTests/TextRankTests.swift

+35-25
Original file line numberDiff line numberDiff line change
@@ -43,38 +43,48 @@ class TextRankTests: XCTestCase {
4343

4444
func testCreatingSentencesFromChunks() {
4545
let testChunks = [
46-
[
47-
"Unveilin"
48-
],
49-
[
50-
"Table of content\nTable of content 2 Introduction 3 The Historical Importance of \"Romeo and Juliet\" 3 The Year and Time of Creation 3 The Author: William Shakespeare 3 Part One: Exploring \"Romeo and Juliet\" 4 Chapter 1: The Feuding Families of Verona 4 Chapter 2: Love Blossoms Amidst Conflict 4 Chapter 3: A Secret Union of Hearts 5 Chapter 4: A Duel\'s Dark Consequences 5 Chapter 5: Love Tested by Separation 6 Chapter 6: A Friar\'s Bold Plan Unfolds 6 Chapter 7: Missteps and Fateful Discoveries 7 ",
51-
"Chapter 8: A Sleep of Death 7 Chapter 9: The Tragic Farewell 8 Overview 8 Possible Exam Questions and Answers 10\n2"
52-
],
53-
[
54-
"Introduction\nWelcome to the world of \"Romeo and Juliet,\" a literary gem that has captivated hearts for centuries. In this study guide, we\'ll journey through the pages of this remarkable work by William Shakespeare, exploring its historical importance, the year and time of its creation, and why it continues to be celebrated as one of the greatest literary achievements of all time.\n",
55-
"The Historical Importance of \"Romeo and Juliet\"\n\"Romeo and Juliet\" is not just a play; it\'s a cultural touchstone that has left an indelible mark on literature, theater, and society. Written during the Renaissance period in England, between 1594 and 1596, this tragic love story emerged during a time of immense creativity and innovation. It was a time when art, science, and exploration flourished, and Shakespeare\'s works played a pivotal role in shaping the literary landscape of the era.\n",
56-
"The Year and Time of Creation\nImagine the cobblestone streets of Elizabethan England, a bustling setting where the ink flowed freely from quills onto parchment. It was during this vibrant period that Shakespeare penned \"Romeo and Juliet.\" While the exact year of its composition remains debated, it is widely believed to have been written around 1595. The play\'s timeless themes of love, conflict, and fate resonated then, just as they continue to resonate today.\n",
57-
"The Author: William Shakespeare\nAt the heart of this enduring masterpiece stands the genius of William Shakespeare, a playwright and poet whose literary contributions have left an indelible impact on human culture. Born in 1564, Shakespeare crafted stories that transcend time and language barriers. ",
58-
"His ability to delve into the complexities of human nature, his skill in capturing the range of human emotions, and his knack for weaving captivating narratives have solidified his place as one of history\'s greatest storytellers.\n",
59-
"\"Romeo and Juliet\" stands as a testament to Shakespeare\'s unparalleled craftsmanship. As we embark on this journey through the play\'s pages, let us marvel at its historical significance, appreciate the genius of its creator, and discover why this timeless tale of love and tragedy continues to resonate with readers and audiences across the world.\n3"
60-
]
46+
(
47+
pageId: "0123",
48+
chunks: [
49+
"Unveilin"
50+
]
51+
),
52+
(
53+
pageId: "4567",
54+
chunks: [
55+
"Table of content\nTable of content 2 Introduction 3 The Historical Importance of \"Romeo and Juliet\" 3 The Year and Time of Creation 3 The Author: William Shakespeare 3 Part One: Exploring \"Romeo and Juliet\" 4 Chapter 1: The Feuding Families of Verona 4 Chapter 2: Love Blossoms Amidst Conflict 4 Chapter 3: A Secret Union of Hearts 5 Chapter 4: A Duel\'s Dark Consequences 5 Chapter 5: Love Tested by Separation 6 Chapter 6: A Friar\'s Bold Plan Unfolds 6 Chapter 7: Missteps and Fateful Discoveries 7 ",
56+
"Chapter 8: A Sleep of Death 7 Chapter 9: The Tragic Farewell 8 Overview 8 Possible Exam Questions and Answers 10\n2"
57+
]
58+
),
59+
(
60+
pageId: "7890",
61+
chunks: [
62+
"Introduction\nWelcome to the world of \"Romeo and Juliet,\" a literary gem that has captivated hearts for centuries. In this study guide, we\'ll journey through the pages of this remarkable work by William Shakespeare, exploring its historical importance, the year and time of its creation, and why it continues to be celebrated as one of the greatest literary achievements of all time.\n",
63+
"The Historical Importance of \"Romeo and Juliet\"\n\"Romeo and Juliet\" is not just a play; it\'s a cultural touchstone that has left an indelible mark on literature, theater, and society. Written during the Renaissance period in England, between 1594 and 1596, this tragic love story emerged during a time of immense creativity and innovation. It was a time when art, science, and exploration flourished, and Shakespeare\'s works played a pivotal role in shaping the literary landscape of the era.\n",
64+
"The Year and Time of Creation\nImagine the cobblestone streets of Elizabethan England, a bustling setting where the ink flowed freely from quills onto parchment. It was during this vibrant period that Shakespeare penned \"Romeo and Juliet.\" While the exact year of its composition remains debated, it is widely believed to have been written around 1595. The play\'s timeless themes of love, conflict, and fate resonated then, just as they continue to resonate today.\n",
65+
"The Author: William Shakespeare\nAt the heart of this enduring masterpiece stands the genius of William Shakespeare, a playwright and poet whose literary contributions have left an indelible impact on human culture. Born in 1564, Shakespeare crafted stories that transcend time and language barriers. ",
66+
"His ability to delve into the complexities of human nature, his skill in capturing the range of human emotions, and his knack for weaving captivating narratives have solidified his place as one of history\'s greatest storytellers.\n",
67+
"\"Romeo and Juliet\" stands as a testament to Shakespeare\'s unparalleled craftsmanship. As we embark on this journey through the play\'s pages, let us marvel at its historical significance, appreciate the genius of its creator, and discover why this timeless tale of love and tragedy continues to resonate with readers and audiences across the world.\n3"
68+
]
69+
)
6170
]
62-
let textRank = TextRank(chunks: testChunks)
71+
72+
let textRank = TextRank(chunksPerPage: testChunks)
6373
print(textRank)
64-
XCTAssertEqual(textRank.sentences[0].text, testChunks[0][0].trimmingCharacters(in: .whitespacesAndNewlines))
65-
XCTAssertEqual(textRank.sentences[0].pageIndex, 0)
74+
XCTAssertEqual(textRank.sentences[0].text, testChunks[0].chunks[0].trimmingCharacters(in: .whitespacesAndNewlines))
75+
XCTAssertEqual(textRank.sentences[0].pageID, "0123")
6676
XCTAssertEqual(textRank.sentences[0].originalTextIndex, 0)
6777

68-
XCTAssertEqual(textRank.sentences[1].text, testChunks[1][0].trimmingCharacters(in: .whitespacesAndNewlines))
69-
XCTAssertEqual(textRank.sentences[1].pageIndex, 1)
78+
XCTAssertEqual(textRank.sentences[1].text, testChunks[1].chunks[0].trimmingCharacters(in: .whitespacesAndNewlines))
79+
XCTAssertEqual(textRank.sentences[1].pageID, "4567")
7080
XCTAssertEqual(textRank.sentences[1].originalTextIndex, 0)
7181

72-
XCTAssertEqual(textRank.sentences[2].text, testChunks[1][1].trimmingCharacters(in: .whitespacesAndNewlines))
73-
XCTAssertEqual(textRank.sentences[2].pageIndex, 1)
82+
XCTAssertEqual(textRank.sentences[2].text, testChunks[1].chunks[1].trimmingCharacters(in: .whitespacesAndNewlines))
83+
XCTAssertEqual(textRank.sentences[2].pageID, "4567")
7484
XCTAssertEqual(textRank.sentences[2].originalTextIndex, 1)
7585

76-
XCTAssertEqual(textRank.sentences[8].text, testChunks[2][5].trimmingCharacters(in: .whitespacesAndNewlines))
77-
XCTAssertEqual(textRank.sentences[8].pageIndex, 2)
86+
XCTAssertEqual(textRank.sentences[8].text, testChunks[2].chunks[5].trimmingCharacters(in: .whitespacesAndNewlines))
87+
XCTAssertEqual(textRank.sentences[8].pageID, "7890")
7888
XCTAssertEqual(textRank.sentences[8].originalTextIndex, 5)
7989
}
8090

@@ -86,7 +96,7 @@ class TextRankTests: XCTestCase {
8696
XCTAssertEqual(textRank.graph.edges.count, 2)
8797

8898
text = "Dog cat bird. Sheep dog cat peacock. Horse cow fish dog chicken."
89-
textRank.pages = [text]
99+
textRank.pages = [("0123", text)]
90100
textRank.buildGraph()
91101
XCTAssertEqual(textRank.graph.nodes.count, 3)
92102
XCTAssertEqual(textRank.graph.edges.count, 3)

0 commit comments

Comments
 (0)