@@ -20,11 +20,11 @@ import com.typesafe.config.Config
2020import org .apache .hadoop .fs .Path
2121import org .apache .spark .sql .{DataFrame , SaveMode , SparkSession }
2222import org .slf4j .LoggerFactory
23- import za .co .absa .pramen .api .{ExternalChannelFactory , MetastoreReader , Sink , SinkResult }
23+ import za .co .absa .pramen .api .{DataFormat , ExternalChannelFactory , MetaTableDef , MetastoreReader , Query , Sink , SinkResult }
2424import za .co .absa .pramen .core .exceptions .CmdFailedException
2525import za .co .absa .pramen .core .process .{ProcessRunner , ProcessRunnerImpl }
2626import za .co .absa .pramen .core .sink .CmdLineSink .{CMD_LINE_KEY , CmdLineDataParams }
27- import za .co .absa .pramen .core .utils .{ConfigUtils , FsUtils }
27+ import za .co .absa .pramen .core .utils .{ConfigUtils , FsUtils , SparkUtils }
2828
2929import java .time .LocalDate
3030import java .time .format .DateTimeFormatter
@@ -51,7 +51,7 @@ import scala.util.control.NonFatal
5151 *
5252 * Otherwise, the data can be accessed by the command line tool directly from the metastore.
5353 *
54- * Example sink definition:
54+ * == Example sink definition:==
5555 * {{{
5656 * {
5757 * name = "cmd_line"
@@ -73,6 +73,7 @@ import scala.util.control.NonFatal
7373 * Here is an example of a sink definition in a pipeline. As for any other operation you can specify
7474 * dependencies, transformations, filters and columns to select.
7575 *
76+ * ==Example operation:==
7677 * {{{
7778 * {
7879 * name = "Command Line sink"
@@ -154,15 +155,18 @@ class CmdLineSink(sinkConfig: Config,
154155
155156 log.info(s " $count records saved to $tempPath. " )
156157
157- val cmdLine = getCmdLine(cmdLineTemplate, Option (tempPath), infoDate)
158+ val cmdLine = getCmdLine(cmdLineTemplate, Option (tempPath), Option (tempPath), infoDate)
158159
159160 runCmd(cmdLine)
160161
161162 log.info(s " $count records sent to the cmd line sink ( $cmdLine). " )
162163 }
163164 SinkResult (count)
164165 case None =>
165- val cmdLine = getCmdLine(cmdLineTemplate, None , infoDate)
166+ val metaTable = metastore.getTableDef(tableName)
167+ val (dataPath, partitionPath) = getPaths(metaTable, infoDate)
168+
169+ val cmdLine = getCmdLine(cmdLineTemplate, dataPath, partitionPath, infoDate)
166170
167171 runCmd(cmdLine)
168172
@@ -173,21 +177,80 @@ class CmdLineSink(sinkConfig: Config,
173177 }
174178 }
175179
180+ private [core] def getPaths (metaTable : MetaTableDef , infoDate : LocalDate ): (Option [Path ], Option [Path ]) = {
181+ val basePathOpt = metaTable.format match {
182+ case DataFormat .Parquet (path, _) =>
183+ Option (path)
184+ case DataFormat .Delta (query, _) =>
185+ query match {
186+ case Query .Path (path) =>
187+ Option (path)
188+ case _ => None
189+ }
190+ case _ =>
191+ None
192+ }
193+
194+ basePathOpt match {
195+ case Some (basePath) =>
196+ (Option (new Path (basePath)), Option (SparkUtils .getPartitionPath(infoDate, metaTable.infoDateColumn, metaTable.infoDateFormat, basePath)))
197+ case None =>
198+ (None , None )
199+ }
200+ }
201+
176202 private [core] def getCmdLine (cmdLineTemplate : String ,
177203 dataPath : Option [Path ],
204+ partitionPath : Option [Path ],
178205 infoDate : LocalDate ): String = {
179206 log.info(s " CmdLine template: $cmdLineTemplate" )
180207
181208 val cmdWithDates = cmdLineTemplate.replace(" @infoDate" , infoDate.toString)
182209 .replace(" @infoMonth" , infoDate.format(DateTimeFormatter .ofPattern(" yyyy-MM" )))
183210
184- dataPath match {
211+ val cmdWithDataPath = dataPath match {
185212 case Some (path) =>
186- cmdWithDates.replace(" @dataPath" , path.toString)
187- .replace(" @dataUri" , path.toUri.toString)
213+ if (Option (path.toUri.getAuthority).isDefined) {
214+ val bucket = path.toUri.getAuthority
215+ val prefixOrg = path.toUri.getPath
216+ val prefix = if (prefixOrg.startsWith(" /" , 0 ))
217+ prefixOrg.substring(1 )
218+ else
219+ prefixOrg
220+
221+ cmdWithDates
222+ .replace(" @bucket" , bucket)
223+ .replace(" @prefix" , prefix)
224+ .replace(" @dataPath" , path.toString)
225+ .replace(" @dataUri" , path.toUri.toString)
226+ } else {
227+ cmdWithDates.replace(" @dataPath" , path.toString)
228+ .replace(" @dataUri" , path.toUri.toString)
229+ }
188230 case None =>
189231 cmdWithDates
190232 }
233+
234+ partitionPath match {
235+ case Some (path) =>
236+ if (Option (path.toUri.getAuthority).isDefined) {
237+ val bucket = path.toUri.getAuthority
238+ val prefixOrg = path.toUri.getPath
239+ val prefix = if (prefixOrg.startsWith(" /" , 0 ))
240+ prefixOrg.substring(1 )
241+ else
242+ prefixOrg
243+
244+ cmdWithDataPath
245+ .replace(" @bucket" , bucket)
246+ .replace(" @partitionPrefix" , prefix)
247+ .replace(" @partitionPath" , path.toString)
248+ } else {
249+ cmdWithDataPath.replace(" @partitionPath" , path.toString)
250+ }
251+ case None =>
252+ cmdWithDataPath
253+ }
191254 }
192255
193256 private [core] def runCmd (cmdLine : String ): Unit = {
0 commit comments