Minimalistisches Beispiel-Modul mit generierten Modulscripten für Kettle-Jobs http://www.superx-projekt.de/doku/kurse/kettle_monitoring/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

372 lines
14 KiB

#!/usr/bin/env groovy
import groovy.xml.XmlUtil
import java.nio.charset.StandardCharsets
import groovy.xml.QName
class CopyKettleJob {
def stripParams = false , stripConnections = false , addParams = false , addConnections = false , jobFile, toDir, transFiles = [], dryRun = false
def CopyKettleJob(args) {
def cli = new CliBuilder(usage: 'copy_kettlejob.groovy [options] <job.kjb> [<to_dir>]')
cli.with {
h(longOpt: 'help', 'get usage info')
p(longOpt: 'strip-parameters', 'Strip all parameters from transformations')
P(longOpt: 'add-parameters', 'Add parameters to transformation')
c(longOpt: 'strip-connections', 'Strip all connections from transformations')
C(longOpt: 'add-connections', 'Add connections to transformation. If no <to_dir> given, modify files in place.')
d(longOpt: 'dry-run' , 'Just print out what would happen, don\'t actually do anything')
configFile(args:1, argName: 'myconfigfile', 'Use this config file for kettle parameters and database connection')
cli._(longOpt:'jobfile', args:1, argName:'jobfile', 'Job to copy')
cli._(longOpt:'to', args:1, argName:'to dir', 'Directory to copy job to')
}
def options = cli.parse(args)
if (!args || options.h) {
cli.usage()
return
}
def extraArgs = options.arguments();
stripParams = options.p
stripConnections = options.c
addParams = options.P
addConnections = options.C
dryRun = options.d
if (stripParams && addParams) {
println "Cannot use parameters 'p' and 'P' at the same time"
}
if (stripConnections && addConnections) {
println "Cannot use parameters 'c' and 'C' at the same time"
}
jobFile = new File(extraArgs[0])
if (extraArgs[1] != null) {
toDir = new File(extraArgs[1])
} else {
toDir = jobFile.getAbsoluteFile().getParentFile()
}
def configFilePath = options.configFile ?: getConfigFilePath()
def configFile = new File(configFilePath)
println "Copying ${jobFile} to ${toDir} strip params: ${stripParams}, strip connections : ${stripConnections}"
if (! jobFile.canRead()) {
println "Cannot open ${jobFile}"
return
}
if (toDir != null && !toDir.isDirectory()) {
println "Cannot open ${toDir} or no directory"
return
}
def parser = new XmlParser()
def job = parser.parse(jobFile)
def entries = job.entries.entry
def transforms = []
entries.each {
if (it.type.text() == 'TRANS') {
transforms.add(it)
}
}
def transFiles = []
transforms.each {
def file = (new File(it.filename.text())).name
def path = (new File(jobFile.absolutePath)).parentFile
def realFile = new File(path.getAbsolutePath() + File.separator + file)
def transFile = ["transformation" : parser.parse(realFile), "fileName" : file, "entry": it]
transFiles += transFile
transFiles.addAll(getSubtransformations(transFile, parser))
}
// remove duplicate transformations
transFiles = transFiles.unique { a,b -> a.fileName <=> b.fileName }
// strip params and connections if wanted
transFiles.each {
def transformation = it.transformation
if (stripConnections || addConnections) {
def connections = transformation.connection
connections.each { connection ->
connection.parent().remove(connection)
}
}
if (addConnections) {
def connections = transformation.connection
def connectionsList = getConnectionConfig(configFile)
connectionsList.each { con ->
transformation.append(con)
}
}
if (stripParams) {
def params = transformation.info.parameters.parameter
params.each { param ->
param.parent().remove(param)
}
// println XmlUtil.serialize(transformation.info.parameters[0])
} else if ( addParams ) {
def paramsNode = transformation.info.parameters.getAt(0)
//params.addAll(getParamsConfig())
def paramList = getParamsConfig(configFile)
paramList.each {
paramsNode.append(it)
}
}
// add empty chars for <pad_char /> xml-tags
def allNodes = transformation.breadthFirst()
allNodes.each{ node ->
fixWhitespacePaddings(node)
}
}
// insert transformation file name with job relative path (${Internal.Job.Filename.Directory})
// and write destination transformation files
transFiles.each {
println it.entry.filename.text()
def fileName = it.fileName
def filePath = '\${Internal.Job.Filename.Directory}' + "/" + fileName
it.entry.filename.replaceNode {
filename(filePath)
}
def transOut = new File(toDir.absolutePath + File.separator + fileName)
if (!dryRun) writeXmlToFile(it.transformation, transOut)
}
if (stripConnections || addConnections) {
def connections = job.connection
connections.each { connection ->
connection.parent().remove(connection)
}
}
if (addConnections) {
def connections = job.connection
def connectionsList = getConnectionConfig(configFile)
connectionsList.each { con ->
job.append(con)
}
}
def jobOut = new File(toDir.absolutePath + File.separator + jobFile.name)
if (!dryRun) writeXmlToFile(job, jobOut)
}
def fixWhitespacePaddings(node) {
if (node.name() == "field" && node.children() != []) {
def pad_char = node.pad_char
def pad_len = node.pad_len
// this one's kinda dirty, 'cause it only considers the first childnodes, but...this seems to be the only case
if (pad_len != [] && pad_char != [] && pad_len[0].value() != [] && pad_char[0].value() == []) {
pad_char[0].setValue(" ")
}
}
}
def writeXmlToFile(node, file) {
def out = new FileOutputStream(file)
def writer = new OutputStreamWriter(out, StandardCharsets.UTF_8);
def pw = new PrintWriter(writer)
pw.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")
def nodePrinter = new XmlNodePrinter(pw)
nodePrinter.setPreserveWhitespace(true)
nodePrinter.print(node)
}
// function to get the path to a config file
// first version just tries to read environment variable COPY_KETTLEJOB_CONFIG_FILE_PATH
// later versions might also accept a CLI parameter or look at default locations (OS dependent)
def getConfigFilePath() {
def env = System.getenv()
def cfp = env['COPY_KETTLEJOB_CONFIG_FILE_PATH']
if (cfp == null) { cfp = "" }
println "Config file: " + cfp
return cfp
}
def getConnectionConfig(configFile = null) {
def parser = new XmlParser()
def connectionNode
if (configFile == null || ! configFile.canRead()) {
def connectionXml = '''
<connection>
<name>eduetl</name>
<server>localhost</server>
<type>POSTGRESQL</type>
<access>Native</access>
<database>dev_java8_eduetl</database>
<port>5432</port>
<username>postgres</username>
<password>Encrypted 2be98afc86aa7f2e4bb16bd64d980aac9</password>
<servername/>
<data_tablespace/>
<index_tablespace/>
<attributes>
<attribute><code>FORCE_IDENTIFIERS_TO_LOWERCASE</code><attribute>N</attribute></attribute>
<attribute><code>FORCE_IDENTIFIERS_TO_UPPERCASE</code><attribute>N</attribute></attribute>
<attribute><code>IS_CLUSTERED</code><attribute>N</attribute></attribute>
<attribute><code>PORT_NUMBER</code><attribute>5432</attribute></attribute>
<attribute><code>PRESERVE_RESERVED_WORD_CASE</code><attribute>N</attribute></attribute>
<attribute><code>QUOTE_ALL_FIELDS</code><attribute>N</attribute></attribute>
<attribute><code>SUPPORTS_BOOLEAN_DATA_TYPE</code><attribute>Y</attribute></attribute>
<attribute><code>SUPPORTS_TIMESTAMP_DATA_TYPE</code><attribute>Y</attribute></attribute>
<attribute><code>USE_POOLING</code><attribute>N</attribute></attribute>
</attributes>
</connection>
'''
connectionNode = parser.parseText(connectionXml)
} else {
def config = parser.parse(configFile)
connectionNode = config.connection
}
return connectionNode
}
def getParamsConfig(configFile = null) {
def parser = new XmlParser()
def paramsNodeList
if (configFile == null || ! configFile.canRead()) {
println "Warning: No configFile. Did you forget to set the environment variable COPY_KETTLE_JOB_CONFIG_FILE_PATH?"
def paramsXml = '''
<parameters>
<parameter>
<name>bundesland</name>
<default_value>3</default_value>
<description/>
</parameter>
<parameter>
<name>fsem_pred</name>
<default_value>1&#x3d;1</default_value>
<description/>
</parameter>
<parameter>
<name>hs_nr</name>
<default_value>9991</default_value>
<description/>
</parameter>
<parameter>
<name>hssem_pred</name>
<default_value>1&#x3d;1</default_value>
<description/>
</parameter>
<parameter>
<name>landesspezifika</name>
<default_value>NW</default_value>
<description/>
</parameter>
<parameter>
<name>lieferungart</name>
<default_value/>
<description/>
</parameter>
<parameter>
<name>mtk_pred0</name>
<default_value>matrikel_nr&#x3d;11228729</default_value>
<description/>
</parameter>
<parameter>
<name>mtk_pred</name>
<default_value>1&#x3d;1</default_value>
<description/>
</parameter>
<parameter>
<name>mtk_pred1</name>
<default_value>matrikel_nr&#x3d;897190</default_value>
<description/>
</parameter>
<parameter>
<name>mtk_pred2</name>
<default_value>matrikel_nr&#x3d;11230911</default_value>
<description/>
</parameter>
<parameter>
<name>mtk_pred4</name>
<default_value>matrikel_nr&#x3d;11230825</default_value>
<description/>
</parameter>
<parameter>
<name>mtk_pred5</name>
<default_value>matrikel_nr&#x3d;11200110</default_value>
<description/>
</parameter>
<parameter>
<name>semester</name>
<default_value>20181</default_value>
<description/>
</parameter>
<parameter>
<name>semester2</name>
<default_value>20161</default_value>
<description/>
</parameter>
<parameter>
<name>semester_zuvor</name>
<default_value>20172</default_value>
<description/>
</parameter>
<parameter>
<name>semester_zuvor2</name>
<default_value>20152</default_value>
<description/>
</parameter>
<parameter>
<name>stg_pred</name>
<default_value>1&#x3d;1</default_value>
<description/>
</parameter>
<parameter>
<name>lab_vorsemester</name>
<default_value>2</default_value>
<description/>
</parameter>
<parameter>
<name>trimester</name>
<default_value>false</default_value>
<description/>
</parameter>
<parameter>
<name>stichtagsart_stud_vorsem</name>
<default_value>6</default_value>
<description/>
</parameter>
<parameter>
<name>stichtagsart_stud_berichtssem</name>
<default_value>1</default_value>
<description/>
</parameter>
<parameter>
<name>stichtagsart_pruef_vorsem</name>
<default_value>2</default_value>
<description/>
</parameter>
<parameter>
<name>stichtagsart_pruef_berichtssem</name>
<default_value>4</default_value>
<description/>
</parameter>
<parameter>
<name>vorsem_exmas_start</name>
<default_value>20172</default_value>
<description/>
</parameter>
<parameter>
<name>exma_max_rueck_beur_ein_pred</name>
<default_value>sem_rueck_beur_ein=20172</default_value>
<description/>
</parameter>
</parameters>
'''
def params = parser.parseText(paramsXml)
paramsNodeList = params.parameter
} else {
def content = parser.parse(configFile)
paramsNodeList = content.get("parameters").parameter
}
return paramsNodeList
}
def getSubtransformations(transFile, parser) {
def subtransformations = []
def steps = transFile.transformation.step
steps.findAll { it.type.text() == 'Mapping' }.each {
def file = (new File(it.filename.text())).name
def path = (new File(jobFile.absolutePath)).parentFile
def realFile = new File(path.getAbsolutePath() + File.separator + file)
def subtransFile = ["transformation" : parser.parse(realFile), "fileName" : file, "entry": it]
subtransformations += subtransFile
}
return subtransformations
}
}
copy = new CopyKettleJob(args)