Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: smarter indentation detection. #9

Merged
merged 1 commit into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ dependencies {

api(libs.antlr.runtime)
api(libs.kotlinStdLib)

testImplementation(libs.junit.jupiter.params)
}
41 changes: 41 additions & 0 deletions core/src/main/kotlin/cash/grammar/kotlindsl/utils/Whitespace.kt
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package cash.grammar.kotlindsl.utils

import com.squareup.cash.grammar.KotlinLexer
import org.antlr.v4.runtime.CharStream
import org.antlr.v4.runtime.CommonTokenStream
import org.antlr.v4.runtime.ParserRuleContext
import org.antlr.v4.runtime.Token
import org.antlr.v4.runtime.misc.Interval

/**
* Utilities for working with whitespace, including newlines, carriage returns, etc.
Expand Down Expand Up @@ -112,6 +114,45 @@ public object Whitespace {
return tokens.getHiddenTokensToRight(before.tokenIndex, KotlinLexer.WHITESPACE)
}

/**
* Returns the indentation in this input, based on the assumption that the first indentation
* discovered is the common indent level. If no indent discovered (which could happen if this
* input contains only top-level statements), defaults to [min].
*/
public fun computeIndent(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really like this function because it feels like a poor man's regex, and the whole point of creating a parse tree is avoiding regex -- nonetheless, dealing with whitespace has proven to be a huge pain, this works, and I think we ought to be able to further improve it.

tokens: CommonTokenStream,
input: CharStream,
min: String = " ",
): String {
// We need at least two tokens for this to make sense.
if (tokens.size() < 2) return min

val start = tokens.get(0).startIndex
val stop = tokens.get(tokens.size() - 1).stopIndex

if (start == -1 || stop == -1) return min

// Kind of a gross implementation, but a starting point that works -- can optimize later.
input.getText(Interval.of(start, stop)).lineSequence().forEach { line ->
var indent = ""
// a line might contain JUST whitespace -- we don't want to count these.
var nonEmptyLine = false

for (c in line.toCharArray()) {
if (c == ' ' || c == '\t') {
indent += c
} else {
nonEmptyLine = true
break
}
}

if (nonEmptyLine && indent.isNotEmpty()) return indent
}

return min
}

/**
* Use this in conjunction with [trimGently] to maintain original end-of-file formatting.
*/
Expand Down
104 changes: 98 additions & 6 deletions core/src/test/kotlin/cash/grammar/kotlindsl/utils/WhitespaceTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@ import cash.grammar.kotlindsl.parse.Parser
import cash.grammar.kotlindsl.utils.Blocks.isBuildscript
import cash.grammar.kotlindsl.utils.Blocks.isSubprojects
import cash.grammar.kotlindsl.utils.test.TestErrorListener
import com.squareup.cash.grammar.KotlinParser
import com.squareup.cash.grammar.KotlinParser.NamedBlockContext
import com.squareup.cash.grammar.KotlinParserBaseListener
import org.antlr.v4.runtime.CharStream
import org.antlr.v4.runtime.CommonTokenStream
import org.antlr.v4.runtime.Token
import org.assertj.core.api.Assertions.assertThat
import org.assertj.core.api.Assertions.tuple
import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource

internal class WhitespaceTest {

Expand All @@ -34,7 +36,7 @@ internal class WhitespaceTest {
errorListener = TestErrorListener {
throw RuntimeException("Syntax error: ${it?.message}", it)
},
listenerFactory = { _, tokens, _ -> TestListener(tokens) }
listenerFactory = { input, tokens, _ -> TestListener(input, tokens) }
).listener()

// There are two newlines preceding the 'subprojects' block
Expand Down Expand Up @@ -64,7 +66,7 @@ internal class WhitespaceTest {
errorListener = TestErrorListener {
throw RuntimeException("Syntax error: ${it?.message}", it)
},
listenerFactory = { _, tokens, _ -> TestListener(tokens) }
listenerFactory = { input, tokens, _ -> TestListener(input, tokens) }
).listener()

// There are two spaces preceding the 'buildscript' block (on the same line)
Expand Down Expand Up @@ -95,7 +97,7 @@ internal class WhitespaceTest {
errorListener = TestErrorListener {
throw RuntimeException("Syntax error: ${it?.message}", it)
},
listenerFactory = { _, tokens, _ -> TestListener(tokens) }
listenerFactory = { input, tokens, _ -> TestListener(input, tokens) }
).listener()

assertThat(scriptListener.trailingBuildscriptNewlines).isEqualTo(1)
Expand All @@ -115,20 +117,101 @@ internal class WhitespaceTest {
throw RuntimeException("Syntax error: ${it?.message}", it)
},
startRule = { parser -> parser.kotlinFile() },
listenerFactory = { _, tokens, _ -> TestListener(tokens) }
listenerFactory = { input, tokens, _ -> TestListener(input, tokens) }
).listener()

assertThat(scriptListener.trailingKotlinFileNewlines).isEqualTo(1)
}

@ParameterizedTest(name = "{0}")
@MethodSource("indentationCases")
fun `can discover indentation`(testCase: TestCase) {
val parser = Parser(
file = testCase.sourceText,
errorListener = TestErrorListener {
throw RuntimeException("Syntax error: ${it?.message}", it)
},
listenerFactory = { input, tokens, _ ->
TestListener(
input = input,
tokens = tokens,
defaultIndent = testCase.defaultIndent,
)
}
).listener()

assertThat(parser.indent).isEqualTo(testCase.expectedIndent)
}

private companion object {
@JvmStatic fun indentationCases() = listOf(
TestCase(
displayName = "two spaces",
sourceText = """
plugins {
id("kotlin")
}
""".trimIndent(),
expectedIndent = " ",
),
TestCase(
displayName = "four spaces",
sourceText = """
plugins {
id("kotlin")
}
""".trimIndent(),
expectedIndent = " ",
),
TestCase(
displayName = "tab",
sourceText = "plugins {\n\tid(\"kotlin\")\n}",
expectedIndent = "\t",
),
TestCase(
displayName = "mixed spaces and tab",
sourceText = "plugins {\n\t id(\"kotlin\")\n}",
expectedIndent = "\t ",
),
TestCase(
displayName = "defaults to two spaces",
sourceText = """
package com.example

class Foo
""".trimIndent(),
expectedIndent = " ",
),
TestCase(
displayName = "ignores empty lines",
// the line between `package...` and `class...` contains a single space -- don't count this
sourceText = "package com.example\n \nclass Foo",
expectedIndent = " ",
),
TestCase(
displayName = "can change default to tab",
sourceText = """
package com.example

class Foo
""".trimIndent(),
defaultIndent = "\t",
expectedIndent = "\t",
),
)
}

private class TestListener(
private val tokens: CommonTokenStream
private val input: CharStream,
private val tokens: CommonTokenStream,
private val defaultIndent: String = " ",
) : KotlinParserBaseListener() {

var newlines: List<Token>? = null
var whitespace: List<Token>? = null
val trailingBuildscriptNewlines = Whitespace.countTerminalNewlines(tokens)
val trailingKotlinFileNewlines = Whitespace.countTerminalNewlines(tokens)
val indent = Whitespace.computeIndent(tokens, input, defaultIndent)

override fun exitNamedBlock(ctx: NamedBlockContext) {
if (ctx.isSubprojects) {
Expand All @@ -139,4 +222,13 @@ internal class WhitespaceTest {
}
}
}

internal class TestCase(
val displayName: String,
val sourceText: String,
val defaultIndent: String = " ",
val expectedIndent: String,
) {
override fun toString(): String = displayName
}
}