#!/usr/bin/env groovy import groovy.xml.XmlUtil import java.nio.charset.StandardCharsets import groovy.xml.QName class CopyKettleJob { def stripParams = false , stripConnections = false , addParams = false , addConnections = false , jobFile, toDir, transFiles = [], dryRun = false def CopyKettleJob(args) { def cli = new CliBuilder(usage: 'copy_kettlejob.groovy [options] []') cli.with { h(longOpt: 'help', 'get usage info') p(longOpt: 'strip-parameters', 'Strip all parameters from transformations') P(longOpt: 'add-parameters', 'Add parameters to transformation') c(longOpt: 'strip-connections', 'Strip all connections from transformations') C(longOpt: 'add-connections', 'Add connections to transformation. If no given, modify files in place.') d(longOpt: 'dry-run' , 'Just print out what would happen, don\'t actually do anything') configFile(args:1, argName: 'myconfigfile', 'Use this config file for kettle parameters and database connection') cli._(longOpt:'jobfile', args:1, argName:'jobfile', 'Job to copy') cli._(longOpt:'to', args:1, argName:'to dir', 'Directory to copy job to') } def options = cli.parse(args) if (!args || options.h) { cli.usage() return } def extraArgs = options.arguments(); stripParams = options.p stripConnections = options.c addParams = options.P addConnections = options.C dryRun = options.d if (stripParams && addParams) { println "Cannot use parameters 'p' and 'P' at the same time" } if (stripConnections && addConnections) { println "Cannot use parameters 'c' and 'C' at the same time" } jobFile = new File(extraArgs[0]) if (extraArgs[1] != null) { toDir = new File(extraArgs[1]) } else { toDir = jobFile.getAbsoluteFile().getParentFile() } def configFilePath = options.configFile ?: getConfigFilePath() def configFile = new File(configFilePath) println "Copying ${jobFile} to ${toDir} strip params: ${stripParams}, strip connections : ${stripConnections}" if (! jobFile.canRead()) { println "Cannot open ${jobFile}" return } if (toDir != null && !toDir.isDirectory()) { println "Cannot open ${toDir} or no directory" return } def parser = new XmlParser() def job = parser.parse(jobFile) def entries = job.entries.entry def transforms = [] entries.each { if (it.type.text() == 'TRANS') { transforms.add(it) } } def transFiles = [] transforms.each { def file = (new File(it.filename.text())).name def path = (new File(jobFile.absolutePath)).parentFile def realFile = new File(path.getAbsolutePath() + File.separator + file) def transFile = ["transformation" : parser.parse(realFile), "fileName" : file, "entry": it] transFiles += transFile transFiles.addAll(getSubtransformations(transFile, parser)) } // remove duplicate transformations transFiles = transFiles.unique { a,b -> a.fileName <=> b.fileName } // strip params and connections if wanted transFiles.each { def transformation = it.transformation if (stripConnections || addConnections) { def connections = transformation.connection connections.each { connection -> connection.parent().remove(connection) } } if (addConnections) { def connections = transformation.connection def connectionsList = getConnectionConfig(configFile) connectionsList.each { con -> transformation.append(con) } } if (stripParams) { def params = transformation.info.parameters.parameter params.each { param -> param.parent().remove(param) } // println XmlUtil.serialize(transformation.info.parameters[0]) } else if ( addParams ) { def paramsNode = transformation.info.parameters.getAt(0) //params.addAll(getParamsConfig()) def paramList = getParamsConfig(configFile) paramList.each { paramsNode.append(it) } } // add empty chars for xml-tags def allNodes = transformation.breadthFirst() allNodes.each{ node -> fixWhitespacePaddings(node) } } // insert transformation file name with job relative path (${Internal.Job.Filename.Directory}) // and write destination transformation files transFiles.each { println it.entry.filename.text() def fileName = it.fileName def filePath = '\${Internal.Job.Filename.Directory}' + "/" + fileName it.entry.filename.replaceNode { filename(filePath) } def transOut = new File(toDir.absolutePath + File.separator + fileName) if (!dryRun) writeXmlToFile(it.transformation, transOut) } if (stripConnections || addConnections) { def connections = job.connection connections.each { connection -> connection.parent().remove(connection) } } if (addConnections) { def connections = job.connection def connectionsList = getConnectionConfig(configFile) connectionsList.each { con -> job.append(con) } } def jobOut = new File(toDir.absolutePath + File.separator + jobFile.name) if (!dryRun) writeXmlToFile(job, jobOut) } def fixWhitespacePaddings(node) { if (node.name() == "field" && node.children() != []) { def pad_char = node.pad_char def pad_len = node.pad_len // this one's kinda dirty, 'cause it only considers the first childnodes, but...this seems to be the only case if (pad_len != [] && pad_char != [] && pad_len[0].value() != [] && pad_char[0].value() == []) { pad_char[0].setValue(" ") } } } def writeXmlToFile(node, file) { def out = new FileOutputStream(file) def writer = new OutputStreamWriter(out, StandardCharsets.UTF_8); def pw = new PrintWriter(writer) pw.append("") def nodePrinter = new XmlNodePrinter(pw) nodePrinter.setPreserveWhitespace(true) nodePrinter.print(node) } // function to get the path to a config file // first version just tries to read environment variable COPY_KETTLEJOB_CONFIG_FILE_PATH // later versions might also accept a CLI parameter or look at default locations (OS dependent) def getConfigFilePath() { def env = System.getenv() def cfp = env['COPY_KETTLEJOB_CONFIG_FILE_PATH'] if (cfp == null) { cfp = "" } println "Config file: " + cfp return cfp } def getConnectionConfig(configFile = null) { def parser = new XmlParser() def connectionNode if (configFile == null || ! configFile.canRead()) { def connectionXml = ''' eduetl localhost POSTGRESQL Native dev_java8_eduetl 5432 postgres Encrypted 2be98afc86aa7f2e4bb16bd64d980aac9 FORCE_IDENTIFIERS_TO_LOWERCASEN FORCE_IDENTIFIERS_TO_UPPERCASEN IS_CLUSTEREDN PORT_NUMBER5432 PRESERVE_RESERVED_WORD_CASEN QUOTE_ALL_FIELDSN SUPPORTS_BOOLEAN_DATA_TYPEY SUPPORTS_TIMESTAMP_DATA_TYPEY USE_POOLINGN ''' connectionNode = parser.parseText(connectionXml) } else { def config = parser.parse(configFile) connectionNode = config.connection } return connectionNode } def getParamsConfig(configFile = null) { def parser = new XmlParser() def paramsNodeList if (configFile == null || ! configFile.canRead()) { println "Warning: No configFile. Did you forget to set the environment variable COPY_KETTLE_JOB_CONFIG_FILE_PATH?" def paramsXml = ''' bundesland 3 fsem_pred 1=1 hs_nr 9991 hssem_pred 1=1 landesspezifika NW lieferungart mtk_pred0 matrikel_nr=11228729 mtk_pred 1=1 mtk_pred1 matrikel_nr=897190 mtk_pred2 matrikel_nr=11230911 mtk_pred4 matrikel_nr=11230825 mtk_pred5 matrikel_nr=11200110 semester 20181 semester2 20161 semester_zuvor 20172 semester_zuvor2 20152 stg_pred 1=1 lab_vorsemester 2 trimester false stichtagsart_stud_vorsem 6 stichtagsart_stud_berichtssem 1 stichtagsart_pruef_vorsem 2 stichtagsart_pruef_berichtssem 4 vorsem_exmas_start 20172 exma_max_rueck_beur_ein_pred sem_rueck_beur_ein=20172 ''' def params = parser.parseText(paramsXml) paramsNodeList = params.parameter } else { def content = parser.parse(configFile) paramsNodeList = content.get("parameters").parameter } return paramsNodeList } def getSubtransformations(transFile, parser) { def subtransformations = [] def steps = transFile.transformation.step steps.findAll { it.type.text() == 'Mapping' }.each { def file = (new File(it.filename.text())).name def path = (new File(jobFile.absolutePath)).parentFile def realFile = new File(path.getAbsolutePath() + File.separator + file) def subtransFile = ["transformation" : parser.parse(realFile), "fileName" : file, "entry": it] subtransformations += subtransFile } return subtransformations } } copy = new CopyKettleJob(args)