-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathBblfsh.scala
123 lines (101 loc) · 3.57 KB
/
Bblfsh.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package tech.sourced.engine.util
import java.nio.charset.StandardCharsets
import gopkg.in.bblfsh.sdk.v1.protocol.generated.Status
import gopkg.in.bblfsh.sdk.v1.uast.generated.Node
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
import org.bblfsh.client.BblfshClient
import tech.sourced.engine.util.Bblfsh.client
object Bblfsh extends Logging {
case class Config(host: String, port: Int)
/** Key used for the option to specify the host of the bblfsh grpc service. */
val hostKey = "spark.tech.sourced.bblfsh.grpc.host"
/** Key used for the option to specify the port of the bblfsh grpc service. */
val portKey = "spark.tech.sourced.bblfsh.grpc.port"
/** Default bblfsh host. */
val defaultHost = "0.0.0.0"
/** Default bblfsh port. */
val defaultPort = 9432
var supportedLanguages: Set[String] = Set()
/** Map human languages names returned by enry to understandable by bblfsh */
private var languagesMapping = Map[String, String](
"C#" -> "csharp"
)
private var config: Config = _
private var client: BblfshClient = _
/**
* Returns the configuration for bblfsh.
*
* @param session Spark session
* @return bblfsh configuration
*/
def getConfig(session: SparkSession): Config = {
if (config == null) {
val host = session.conf.get(hostKey, Bblfsh.defaultHost)
val port = session.conf.get(portKey, Bblfsh.defaultPort.toString).toInt
config = Config(host, port)
}
config
}
private def getClient(config: Config): BblfshClient = synchronized {
if (client == null) {
client = BblfshClient(config.host, config.port)
}
client
}
private def getSupportedLanguages(config: Config): Set[String] = synchronized {
if (supportedLanguages.isEmpty) {
val client = getClient(config)
supportedLanguages = client.supportedLanguages()
.languages.map(m => m.language)
.toSet
}
supportedLanguages
}
private def shouldExtractLanguage(config: Config, lang: String): Boolean = {
val supportedLanguages = getSupportedLanguages(config)
supportedLanguages.contains(lang.toLowerCase())
}
/**
* Extracts the UAST using bblfsh.
*
* @param path File path
* @param content File content
* @param lang File language
* @param config bblfsh configuration
* @return List of uast nodes binary-encoded as a byte array
*/
def extractUAST(path: String,
content: Array[Byte],
lang: String,
config: Config): Seq[Array[Byte]] = {
val bblfshLang = languagesMapping.getOrElse(lang, lang)
//FIXME(bzz): not everything is UTF-8 encoded :/
// if lang == null, it hasn't been classified yet
// so rely on bblfsh to guess this file's language
if (bblfshLang != null && !shouldExtractLanguage(config, bblfshLang)) {
Seq()
} else {
val client = getClient(config)
val contentStr = new String(content, StandardCharsets.UTF_8)
val parsed = client.parse(path, content = contentStr, lang = bblfshLang)
if (parsed.status == Status.OK) {
Seq(parsed.uast.get.toByteArray)
} else {
logWarning(s"${parsed.status} $path: ${parsed.errors.mkString("; ")}")
Seq()
}
}
}
/**
* Filter an UAST node using the given query.
*
* @param node An UAST node
* @param query XPath expression
* @param config bblfsh configuration
* @return UAST list of filtered nodes
*/
def filter(node: Node, query: String, config: Config): List[Node] = {
getClient(config).filter(node, query)
}
}