@@ -12,14 +12,12 @@ import kotlinx.coroutines.coroutineScope
12
12
import kotlinx.coroutines.withContext
13
13
import kotlinx.coroutines.yield
14
14
import org.jsoup.Jsoup
15
- import ua.acclorite.book_story.R
16
15
import ua.acclorite.book_story.data.parser.DocumentParser
16
+ import ua.acclorite.book_story.data.parser.MarkdownParser
17
17
import ua.acclorite.book_story.data.parser.TextParser
18
- import ua.acclorite.book_story.domain.reader.Chapter
19
- import ua.acclorite.book_story.domain.reader.ChapterWithText
20
- import ua.acclorite.book_story.domain.ui.UIText
21
- import ua.acclorite.book_story.domain.util.Resource
18
+ import ua.acclorite.book_story.domain.reader.ReaderText
22
19
import ua.acclorite.book_story.presentation.core.util.addAll
20
+ import ua.acclorite.book_story.presentation.core.util.clearAllMarkdown
23
21
import ua.acclorite.book_story.presentation.core.util.clearMarkdown
24
22
import java.io.File
25
23
import java.util.concurrent.ConcurrentLinkedQueue
@@ -34,16 +32,16 @@ private typealias Title = String
34
32
private val dispatcher = Dispatchers .IO .limitedParallelism(2 )
35
33
36
34
class EpubTextParser @Inject constructor(
35
+ private val markdownParser : MarkdownParser ,
37
36
private val documentParser : DocumentParser
38
37
) : TextParser {
39
38
40
- override suspend fun parse (file : File ): Resource < List <ChapterWithText > > {
39
+ override suspend fun parse (file : File ): List <ReaderText > {
41
40
Log .i(EPUB_TAG , " Started EPUB parsing: ${file.name} ." )
42
41
43
42
return try {
44
- val chapters = mutableListOf<ChapterWithText >()
45
-
46
43
yield ()
44
+ var readerText = listOf<ReaderText >()
47
45
48
46
withContext(Dispatchers .IO ) {
49
47
ZipFile (file).use { zip ->
@@ -62,35 +60,28 @@ class EpubTextParser @Inject constructor(
62
60
Log .i(EPUB_TAG , " Chapter entries, size: ${chapterEntries.size} " )
63
61
Log .i(EPUB_TAG , " Title entries, size: ${chapterTitleEntries?.size} " )
64
62
65
- zip.parseEpub(
63
+ readerText = zip.parseEpub(
66
64
chapterEntries = chapterEntries,
67
65
chapterTitleEntries = chapterTitleEntries
68
- ).let {
69
- if (it == null || it.isEmpty()) {
70
- Log .e(EPUB_TAG , " Could not parse EPUB (null or empty)." )
71
- return @withContext
72
- }
73
- chapters.addAll(it)
74
- }
66
+ )
75
67
}
76
68
}
77
69
78
70
yield ()
79
71
80
- if (chapters.isEmpty()) {
81
- return Resource .Error (UIText .StringResource (R .string.error_file_empty))
72
+ if (
73
+ readerText.filterIsInstance<ReaderText .Text >().isEmpty() ||
74
+ readerText.filterIsInstance<ReaderText .Chapter >().isEmpty()
75
+ ) {
76
+ Log .e(EPUB_TAG , " Could not extract text from EPUB." )
77
+ return emptyList()
82
78
}
83
79
84
80
Log .i(EPUB_TAG , " Successfully finished EPUB parsing." )
85
- Resource . Success (chapters)
81
+ readerText
86
82
} catch (e: Exception ) {
87
83
e.printStackTrace()
88
- Resource .Error (
89
- UIText .StringResource (
90
- R .string.error_query,
91
- e.message?.take(40 )?.trim() ? : " "
92
- )
93
- )
84
+ emptyList()
94
85
}
95
86
}
96
87
@@ -107,18 +98,18 @@ class EpubTextParser @Inject constructor(
107
98
private suspend fun ZipFile.parseEpub (
108
99
chapterEntries : List <ZipEntry >,
109
100
chapterTitleEntries : Map <Title , List <String >>?
110
- ): List <ChapterWithText > ? {
101
+ ): List <ReaderText > {
111
102
112
- val chapters = mutableListOf<ChapterWithText >()
103
+ val readerText = mutableListOf<ReaderText >()
113
104
coroutineScope {
114
- val unformattedChapters = ConcurrentLinkedQueue <ChapterWithText >()
105
+ val unformattedText = ConcurrentLinkedQueue <Pair < Int , List < ReaderText >> >()
115
106
116
107
// Asynchronously getting all chapters with text
117
108
val jobs = chapterEntries.mapIndexed { index, entry ->
118
109
async(dispatcher) {
119
110
yield ()
120
111
121
- unformattedChapters .parseZipEntry(
112
+ unformattedText .parseZipEntry(
122
113
zip = this @parseEpub,
123
114
index = index,
124
115
entry = entry,
@@ -131,27 +122,15 @@ class EpubTextParser @Inject constructor(
131
122
jobs.awaitAll()
132
123
133
124
// Sorting chapters in correct order
134
- chapters.addAll {
135
- var textIndex = - 1
136
- unformattedChapters.toList()
137
- .sortedBy { it.chapter.index }
138
- .mapIndexed { index, item ->
139
- item.copy(
140
- chapter = item.chapter.copy(
141
- index = index,
142
- startIndex = textIndex + 1 ,
143
- endIndex = textIndex + item.text.size
144
- )
145
- ).also { textIndex + = item.text.size }
146
- }
125
+ readerText.addAll {
126
+ unformattedText.toList()
127
+ .sortedBy { (index, _) -> index }
128
+ .map { it.second }
129
+ .flatten()
147
130
}
148
131
}
149
132
150
- if (chapters.isEmpty()) {
151
- return null
152
- }
153
-
154
- return chapters
133
+ return readerText
155
134
}
156
135
157
136
/* *
@@ -163,54 +142,75 @@ class EpubTextParser @Inject constructor(
163
142
* @param entry [ZipEntry].
164
143
* @param chapterTitleMap Titles from [getChapterTitleMapFromToc].
165
144
*/
166
- private suspend fun ConcurrentLinkedQueue<ChapterWithText >.parseZipEntry (
145
+ private suspend fun ConcurrentLinkedQueue <Pair < Int , List < ReaderText >> >.parseZipEntry (
167
146
zip : ZipFile ,
168
147
index : Int ,
169
148
entry : ZipEntry ,
170
149
chapterTitleMap : Map <Title , List <String >>?
171
150
) {
172
151
// Getting all text
173
152
val content = zip.getInputStream(entry).bufferedReader().use { it.readText() }
174
- var chapter = documentParser.run {
153
+ var text = documentParser.run {
175
154
Jsoup .parse(content).parseDocument()
176
155
}
156
+ val readerText = mutableListOf<ReaderText >()
177
157
178
- if (chapter.isEmpty()) {
179
- Log .w(EPUB_TAG , " Chapter ${entry.name} is empty." )
180
- return
181
- }
182
-
183
- // Getting title and removing first line (if matches title)
184
- val chapterTitle = getChapterTitleFromToc(
158
+ // Adding chapter title from TOC if found
159
+ var chapterAdded = false
160
+ getChapterTitleFromToc(
185
161
chapterSource = entry.name,
186
162
chapterTitleMap = chapterTitleMap
187
- ).run {
188
- if (this != null ) {
189
- return @run this
163
+ ).apply {
164
+ if (this == null ) return @apply
165
+ readerText.add(
166
+ ReaderText .Chapter (
167
+ title = this
168
+ )
169
+ )
170
+ chapterAdded = true
171
+
172
+ text = text.dropWhile { line ->
173
+ line.clearMarkdown().lowercase() == this .lowercase()
190
174
}
191
- chapter.first().clearMarkdown()
192
- }.also { title ->
193
- chapter = chapter.dropWhile { line ->
194
- line.clearMarkdown().lowercase() == title.lowercase()
175
+ }
176
+
177
+ // Format and add text
178
+ text.forEach { line ->
179
+ yield ()
180
+
181
+ if (line.isNotBlank()) {
182
+ when (line) {
183
+ " ***" , " ---" -> readerText.add(
184
+ ReaderText .Separator
185
+ )
186
+
187
+ else -> {
188
+ if (! chapterAdded && line.clearAllMarkdown().isNotBlank()) {
189
+ readerText.add(
190
+ 0 , ReaderText .Chapter (
191
+ title = line.clearAllMarkdown()
192
+ )
193
+ )
194
+ chapterAdded = true
195
+ } else readerText.add(
196
+ ReaderText .Text (
197
+ line = markdownParser.parse(line)
198
+ )
199
+ )
200
+ }
201
+ }
195
202
}
196
203
}
197
204
198
- if (chapter.isEmpty()) {
199
- Log .w(EPUB_TAG , " Chapter ${entry.name} is empty." )
205
+ if (
206
+ readerText.filterIsInstance<ReaderText .Text >().isEmpty() ||
207
+ readerText.filterIsInstance<ReaderText .Chapter >().isEmpty()
208
+ ) {
209
+ Log .w(EPUB_TAG , " Could not extract text from [${entry.name} ]." )
200
210
return
201
211
}
202
212
203
- add(
204
- ChapterWithText (
205
- Chapter (
206
- index = index,
207
- title = chapterTitle,
208
- startIndex = 0 ,
209
- endIndex = 0
210
- ),
211
- text = chapter
212
- )
213
- )
213
+ add(index to readerText)
214
214
}
215
215
216
216
/* *
0 commit comments