Minimalistisches Beispiel-Modul mit generierten Modulscripten für Kettle-Jobs
http://www.superx-projekt.de/doku/kurse/kettle_monitoring/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
372 lines
14 KiB
372 lines
14 KiB
#!/usr/bin/env groovy |
|
|
|
import groovy.xml.XmlUtil |
|
|
|
import java.nio.charset.StandardCharsets |
|
|
|
import groovy.xml.QName |
|
|
|
class CopyKettleJob { |
|
|
|
def stripParams = false , stripConnections = false , addParams = false , addConnections = false , jobFile, toDir, transFiles = [], dryRun = false |
|
|
|
def CopyKettleJob(args) { |
|
def cli = new CliBuilder(usage: 'copy_kettlejob.groovy [options] <job.kjb> [<to_dir>]') |
|
cli.with { |
|
h(longOpt: 'help', 'get usage info') |
|
p(longOpt: 'strip-parameters', 'Strip all parameters from transformations') |
|
P(longOpt: 'add-parameters', 'Add parameters to transformation') |
|
c(longOpt: 'strip-connections', 'Strip all connections from transformations') |
|
C(longOpt: 'add-connections', 'Add connections to transformation. If no <to_dir> given, modify files in place.') |
|
d(longOpt: 'dry-run' , 'Just print out what would happen, don\'t actually do anything') |
|
configFile(args:1, argName: 'myconfigfile', 'Use this config file for kettle parameters and database connection') |
|
cli._(longOpt:'jobfile', args:1, argName:'jobfile', 'Job to copy') |
|
cli._(longOpt:'to', args:1, argName:'to dir', 'Directory to copy job to') |
|
} |
|
def options = cli.parse(args) |
|
if (!args || options.h) { |
|
cli.usage() |
|
return |
|
} |
|
def extraArgs = options.arguments(); |
|
stripParams = options.p |
|
stripConnections = options.c |
|
addParams = options.P |
|
addConnections = options.C |
|
dryRun = options.d |
|
if (stripParams && addParams) { |
|
println "Cannot use parameters 'p' and 'P' at the same time" |
|
} |
|
if (stripConnections && addConnections) { |
|
println "Cannot use parameters 'c' and 'C' at the same time" |
|
} |
|
jobFile = new File(extraArgs[0]) |
|
if (extraArgs[1] != null) { |
|
toDir = new File(extraArgs[1]) |
|
} else { |
|
toDir = jobFile.getAbsoluteFile().getParentFile() |
|
} |
|
def configFilePath = options.configFile ?: getConfigFilePath() |
|
def configFile = new File(configFilePath) |
|
println "Copying ${jobFile} to ${toDir} strip params: ${stripParams}, strip connections : ${stripConnections}" |
|
if (! jobFile.canRead()) { |
|
println "Cannot open ${jobFile}" |
|
return |
|
} |
|
if (toDir != null && !toDir.isDirectory()) { |
|
println "Cannot open ${toDir} or no directory" |
|
return |
|
} |
|
def parser = new XmlParser() |
|
def job = parser.parse(jobFile) |
|
def entries = job.entries.entry |
|
def transforms = [] |
|
entries.each { |
|
if (it.type.text() == 'TRANS') { |
|
transforms.add(it) |
|
} |
|
} |
|
def transFiles = [] |
|
transforms.each { |
|
def file = (new File(it.filename.text())).name |
|
def path = (new File(jobFile.absolutePath)).parentFile |
|
def realFile = new File(path.getAbsolutePath() + File.separator + file) |
|
def transFile = ["transformation" : parser.parse(realFile), "fileName" : file, "entry": it] |
|
transFiles += transFile |
|
transFiles.addAll(getSubtransformations(transFile, parser)) |
|
} |
|
// remove duplicate transformations |
|
transFiles = transFiles.unique { a,b -> a.fileName <=> b.fileName } |
|
// strip params and connections if wanted |
|
transFiles.each { |
|
def transformation = it.transformation |
|
if (stripConnections || addConnections) { |
|
def connections = transformation.connection |
|
connections.each { connection -> |
|
connection.parent().remove(connection) |
|
} |
|
} |
|
if (addConnections) { |
|
def connections = transformation.connection |
|
def connectionsList = getConnectionConfig(configFile) |
|
connectionsList.each { con -> |
|
transformation.append(con) |
|
} |
|
} |
|
if (stripParams) { |
|
def params = transformation.info.parameters.parameter |
|
params.each { param -> |
|
param.parent().remove(param) |
|
} |
|
// println XmlUtil.serialize(transformation.info.parameters[0]) |
|
} else if ( addParams ) { |
|
def paramsNode = transformation.info.parameters.getAt(0) |
|
//params.addAll(getParamsConfig()) |
|
def paramList = getParamsConfig(configFile) |
|
paramList.each { |
|
paramsNode.append(it) |
|
} |
|
} |
|
// add empty chars for <pad_char /> xml-tags |
|
def allNodes = transformation.breadthFirst() |
|
allNodes.each{ node -> |
|
fixWhitespacePaddings(node) |
|
} |
|
} |
|
// insert transformation file name with job relative path (${Internal.Job.Filename.Directory}) |
|
// and write destination transformation files |
|
transFiles.each { |
|
println it.entry.filename.text() |
|
def fileName = it.fileName |
|
def filePath = '\${Internal.Job.Filename.Directory}' + "/" + fileName |
|
it.entry.filename.replaceNode { |
|
filename(filePath) |
|
} |
|
def transOut = new File(toDir.absolutePath + File.separator + fileName) |
|
if (!dryRun) writeXmlToFile(it.transformation, transOut) |
|
} |
|
if (stripConnections || addConnections) { |
|
def connections = job.connection |
|
connections.each { connection -> |
|
connection.parent().remove(connection) |
|
} |
|
} |
|
if (addConnections) { |
|
def connections = job.connection |
|
def connectionsList = getConnectionConfig(configFile) |
|
connectionsList.each { con -> |
|
job.append(con) |
|
} |
|
} |
|
def jobOut = new File(toDir.absolutePath + File.separator + jobFile.name) |
|
if (!dryRun) writeXmlToFile(job, jobOut) |
|
} |
|
|
|
def fixWhitespacePaddings(node) { |
|
if (node.name() == "field" && node.children() != []) { |
|
def pad_char = node.pad_char |
|
def pad_len = node.pad_len |
|
// this one's kinda dirty, 'cause it only considers the first childnodes, but...this seems to be the only case |
|
if (pad_len != [] && pad_char != [] && pad_len[0].value() != [] && pad_char[0].value() == []) { |
|
pad_char[0].setValue(" ") |
|
} |
|
} |
|
} |
|
|
|
def writeXmlToFile(node, file) { |
|
def out = new FileOutputStream(file) |
|
def writer = new OutputStreamWriter(out, StandardCharsets.UTF_8); |
|
def pw = new PrintWriter(writer) |
|
pw.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>") |
|
def nodePrinter = new XmlNodePrinter(pw) |
|
nodePrinter.setPreserveWhitespace(true) |
|
nodePrinter.print(node) |
|
} |
|
|
|
// function to get the path to a config file |
|
// first version just tries to read environment variable COPY_KETTLEJOB_CONFIG_FILE_PATH |
|
// later versions might also accept a CLI parameter or look at default locations (OS dependent) |
|
def getConfigFilePath() { |
|
def env = System.getenv() |
|
def cfp = env['COPY_KETTLEJOB_CONFIG_FILE_PATH'] |
|
if (cfp == null) { cfp = "" } |
|
println "Config file: " + cfp |
|
return cfp |
|
} |
|
|
|
def getConnectionConfig(configFile = null) { |
|
def parser = new XmlParser() |
|
def connectionNode |
|
if (configFile == null || ! configFile.canRead()) { |
|
def connectionXml = ''' |
|
<connection> |
|
<name>eduetl</name> |
|
<server>localhost</server> |
|
<type>POSTGRESQL</type> |
|
<access>Native</access> |
|
<database>dev_java8_eduetl</database> |
|
<port>5432</port> |
|
<username>postgres</username> |
|
<password>Encrypted 2be98afc86aa7f2e4bb16bd64d980aac9</password> |
|
<servername/> |
|
<data_tablespace/> |
|
<index_tablespace/> |
|
<attributes> |
|
<attribute><code>FORCE_IDENTIFIERS_TO_LOWERCASE</code><attribute>N</attribute></attribute> |
|
<attribute><code>FORCE_IDENTIFIERS_TO_UPPERCASE</code><attribute>N</attribute></attribute> |
|
<attribute><code>IS_CLUSTERED</code><attribute>N</attribute></attribute> |
|
<attribute><code>PORT_NUMBER</code><attribute>5432</attribute></attribute> |
|
<attribute><code>PRESERVE_RESERVED_WORD_CASE</code><attribute>N</attribute></attribute> |
|
<attribute><code>QUOTE_ALL_FIELDS</code><attribute>N</attribute></attribute> |
|
<attribute><code>SUPPORTS_BOOLEAN_DATA_TYPE</code><attribute>Y</attribute></attribute> |
|
<attribute><code>SUPPORTS_TIMESTAMP_DATA_TYPE</code><attribute>Y</attribute></attribute> |
|
<attribute><code>USE_POOLING</code><attribute>N</attribute></attribute> |
|
</attributes> |
|
</connection> |
|
''' |
|
connectionNode = parser.parseText(connectionXml) |
|
} else { |
|
def config = parser.parse(configFile) |
|
connectionNode = config.connection |
|
} |
|
return connectionNode |
|
} |
|
|
|
def getParamsConfig(configFile = null) { |
|
def parser = new XmlParser() |
|
def paramsNodeList |
|
if (configFile == null || ! configFile.canRead()) { |
|
println "Warning: No configFile. Did you forget to set the environment variable COPY_KETTLE_JOB_CONFIG_FILE_PATH?" |
|
def paramsXml = ''' |
|
<parameters> |
|
<parameter> |
|
<name>bundesland</name> |
|
<default_value>3</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>fsem_pred</name> |
|
<default_value>1=1</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>hs_nr</name> |
|
<default_value>9991</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>hssem_pred</name> |
|
<default_value>1=1</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>landesspezifika</name> |
|
<default_value>NW</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>lieferungart</name> |
|
<default_value/> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>mtk_pred0</name> |
|
<default_value>matrikel_nr=11228729</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>mtk_pred</name> |
|
<default_value>1=1</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>mtk_pred1</name> |
|
<default_value>matrikel_nr=897190</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>mtk_pred2</name> |
|
<default_value>matrikel_nr=11230911</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>mtk_pred4</name> |
|
<default_value>matrikel_nr=11230825</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>mtk_pred5</name> |
|
<default_value>matrikel_nr=11200110</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>semester</name> |
|
<default_value>20181</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>semester2</name> |
|
<default_value>20161</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>semester_zuvor</name> |
|
<default_value>20172</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>semester_zuvor2</name> |
|
<default_value>20152</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>stg_pred</name> |
|
<default_value>1=1</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>lab_vorsemester</name> |
|
<default_value>2</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>trimester</name> |
|
<default_value>false</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>stichtagsart_stud_vorsem</name> |
|
<default_value>6</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>stichtagsart_stud_berichtssem</name> |
|
<default_value>1</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>stichtagsart_pruef_vorsem</name> |
|
<default_value>2</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>stichtagsart_pruef_berichtssem</name> |
|
<default_value>4</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>vorsem_exmas_start</name> |
|
<default_value>20172</default_value> |
|
<description/> |
|
</parameter> |
|
<parameter> |
|
<name>exma_max_rueck_beur_ein_pred</name> |
|
<default_value>sem_rueck_beur_ein=20172</default_value> |
|
<description/> |
|
</parameter> |
|
</parameters> |
|
''' |
|
def params = parser.parseText(paramsXml) |
|
paramsNodeList = params.parameter |
|
} else { |
|
def content = parser.parse(configFile) |
|
paramsNodeList = content.get("parameters").parameter |
|
} |
|
return paramsNodeList |
|
} |
|
|
|
def getSubtransformations(transFile, parser) { |
|
def subtransformations = [] |
|
def steps = transFile.transformation.step |
|
steps.findAll { it.type.text() == 'Mapping' }.each { |
|
def file = (new File(it.filename.text())).name |
|
def path = (new File(jobFile.absolutePath)).parentFile |
|
def realFile = new File(path.getAbsolutePath() + File.separator + file) |
|
def subtransFile = ["transformation" : parser.parse(realFile), "fileName" : file, "entry": it] |
|
subtransformations += subtransFile |
|
} |
|
return subtransformations |
|
} |
|
} |
|
|
|
copy = new CopyKettleJob(args)
|
|
|