Myjobs Grundstruktur
This commit is contained in:
372
scripts/groovy/copy_kettlejob.groovy
Executable file
372
scripts/groovy/copy_kettlejob.groovy
Executable file
@@ -0,0 +1,372 @@
|
||||
#!/usr/bin/env groovy
|
||||
|
||||
import groovy.xml.XmlUtil
|
||||
|
||||
import java.nio.charset.StandardCharsets
|
||||
|
||||
import groovy.xml.QName
|
||||
|
||||
class CopyKettleJob {
|
||||
|
||||
def stripParams = false , stripConnections = false , addParams = false , addConnections = false , jobFile, toDir, transFiles = [], dryRun = false
|
||||
|
||||
def CopyKettleJob(args) {
|
||||
def cli = new CliBuilder(usage: 'copy_kettlejob.groovy [options] <job.kjb> [<to_dir>]')
|
||||
cli.with {
|
||||
h(longOpt: 'help', 'get usage info')
|
||||
p(longOpt: 'strip-parameters', 'Strip all parameters from transformations')
|
||||
P(longOpt: 'add-parameters', 'Add parameters to transformation')
|
||||
c(longOpt: 'strip-connections', 'Strip all connections from transformations')
|
||||
C(longOpt: 'add-connections', 'Add connections to transformation. If no <to_dir> given, modify files in place.')
|
||||
d(longOpt: 'dry-run' , 'Just print out what would happen, don\'t actually do anything')
|
||||
configFile(args:1, argName: 'myconfigfile', 'Use this config file for kettle parameters and database connection')
|
||||
cli._(longOpt:'jobfile', args:1, argName:'jobfile', 'Job to copy')
|
||||
cli._(longOpt:'to', args:1, argName:'to dir', 'Directory to copy job to')
|
||||
}
|
||||
def options = cli.parse(args)
|
||||
if (!args || options.h) {
|
||||
cli.usage()
|
||||
return
|
||||
}
|
||||
def extraArgs = options.arguments();
|
||||
stripParams = options.p
|
||||
stripConnections = options.c
|
||||
addParams = options.P
|
||||
addConnections = options.C
|
||||
dryRun = options.d
|
||||
if (stripParams && addParams) {
|
||||
println "Cannot use parameters 'p' and 'P' at the same time"
|
||||
}
|
||||
if (stripConnections && addConnections) {
|
||||
println "Cannot use parameters 'c' and 'C' at the same time"
|
||||
}
|
||||
jobFile = new File(extraArgs[0])
|
||||
if (extraArgs[1] != null) {
|
||||
toDir = new File(extraArgs[1])
|
||||
} else {
|
||||
toDir = jobFile.getAbsoluteFile().getParentFile()
|
||||
}
|
||||
def configFilePath = options.configFile ?: getConfigFilePath()
|
||||
def configFile = new File(configFilePath)
|
||||
println "Copying ${jobFile} to ${toDir} strip params: ${stripParams}, strip connections : ${stripConnections}"
|
||||
if (! jobFile.canRead()) {
|
||||
println "Cannot open ${jobFile}"
|
||||
return
|
||||
}
|
||||
if (toDir != null && !toDir.isDirectory()) {
|
||||
println "Cannot open ${toDir} or no directory"
|
||||
return
|
||||
}
|
||||
def parser = new XmlParser()
|
||||
def job = parser.parse(jobFile)
|
||||
def entries = job.entries.entry
|
||||
def transforms = []
|
||||
entries.each {
|
||||
if (it.type.text() == 'TRANS') {
|
||||
transforms.add(it)
|
||||
}
|
||||
}
|
||||
def transFiles = []
|
||||
transforms.each {
|
||||
def file = (new File(it.filename.text())).name
|
||||
def path = (new File(jobFile.absolutePath)).parentFile
|
||||
def realFile = new File(path.getAbsolutePath() + File.separator + file)
|
||||
def transFile = ["transformation" : parser.parse(realFile), "fileName" : file, "entry": it]
|
||||
transFiles += transFile
|
||||
transFiles.addAll(getSubtransformations(transFile, parser))
|
||||
}
|
||||
// remove duplicate transformations
|
||||
transFiles = transFiles.unique { a,b -> a.fileName <=> b.fileName }
|
||||
// strip params and connections if wanted
|
||||
transFiles.each {
|
||||
def transformation = it.transformation
|
||||
if (stripConnections || addConnections) {
|
||||
def connections = transformation.connection
|
||||
connections.each { connection ->
|
||||
connection.parent().remove(connection)
|
||||
}
|
||||
}
|
||||
if (addConnections) {
|
||||
def connections = transformation.connection
|
||||
def connectionsList = getConnectionConfig(configFile)
|
||||
connectionsList.each { con ->
|
||||
transformation.append(con)
|
||||
}
|
||||
}
|
||||
if (stripParams) {
|
||||
def params = transformation.info.parameters.parameter
|
||||
params.each { param ->
|
||||
param.parent().remove(param)
|
||||
}
|
||||
// println XmlUtil.serialize(transformation.info.parameters[0])
|
||||
} else if ( addParams ) {
|
||||
def paramsNode = transformation.info.parameters.getAt(0)
|
||||
//params.addAll(getParamsConfig())
|
||||
def paramList = getParamsConfig(configFile)
|
||||
paramList.each {
|
||||
paramsNode.append(it)
|
||||
}
|
||||
}
|
||||
// add empty chars for <pad_char /> xml-tags
|
||||
def allNodes = transformation.breadthFirst()
|
||||
allNodes.each{ node ->
|
||||
fixWhitespacePaddings(node)
|
||||
}
|
||||
}
|
||||
// insert transformation file name with job relative path (${Internal.Job.Filename.Directory})
|
||||
// and write destination transformation files
|
||||
transFiles.each {
|
||||
println it.entry.filename.text()
|
||||
def fileName = it.fileName
|
||||
def filePath = '\${Internal.Job.Filename.Directory}' + "/" + fileName
|
||||
it.entry.filename.replaceNode {
|
||||
filename(filePath)
|
||||
}
|
||||
def transOut = new File(toDir.absolutePath + File.separator + fileName)
|
||||
if (!dryRun) writeXmlToFile(it.transformation, transOut)
|
||||
}
|
||||
if (stripConnections || addConnections) {
|
||||
def connections = job.connection
|
||||
connections.each { connection ->
|
||||
connection.parent().remove(connection)
|
||||
}
|
||||
}
|
||||
if (addConnections) {
|
||||
def connections = job.connection
|
||||
def connectionsList = getConnectionConfig(configFile)
|
||||
connectionsList.each { con ->
|
||||
job.append(con)
|
||||
}
|
||||
}
|
||||
def jobOut = new File(toDir.absolutePath + File.separator + jobFile.name)
|
||||
if (!dryRun) writeXmlToFile(job, jobOut)
|
||||
}
|
||||
|
||||
def fixWhitespacePaddings(node) {
|
||||
if (node.name() == "field" && node.children() != []) {
|
||||
def pad_char = node.pad_char
|
||||
def pad_len = node.pad_len
|
||||
// this one's kinda dirty, 'cause it only considers the first childnodes, but...this seems to be the only case
|
||||
if (pad_len != [] && pad_char != [] && pad_len[0].value() != [] && pad_char[0].value() == []) {
|
||||
pad_char[0].setValue(" ")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def writeXmlToFile(node, file) {
|
||||
def out = new FileOutputStream(file)
|
||||
def writer = new OutputStreamWriter(out, StandardCharsets.UTF_8);
|
||||
def pw = new PrintWriter(writer)
|
||||
pw.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")
|
||||
def nodePrinter = new XmlNodePrinter(pw)
|
||||
nodePrinter.setPreserveWhitespace(true)
|
||||
nodePrinter.print(node)
|
||||
}
|
||||
|
||||
// function to get the path to a config file
|
||||
// first version just tries to read environment variable COPY_KETTLEJOB_CONFIG_FILE_PATH
|
||||
// later versions might also accept a CLI parameter or look at default locations (OS dependent)
|
||||
def getConfigFilePath() {
|
||||
def env = System.getenv()
|
||||
def cfp = env['COPY_KETTLEJOB_CONFIG_FILE_PATH']
|
||||
if (cfp == null) { cfp = "" }
|
||||
println "Config file: " + cfp
|
||||
return cfp
|
||||
}
|
||||
|
||||
def getConnectionConfig(configFile = null) {
|
||||
def parser = new XmlParser()
|
||||
def connectionNode
|
||||
if (configFile == null || ! configFile.canRead()) {
|
||||
def connectionXml = '''
|
||||
<connection>
|
||||
<name>eduetl</name>
|
||||
<server>localhost</server>
|
||||
<type>POSTGRESQL</type>
|
||||
<access>Native</access>
|
||||
<database>dev_java8_eduetl</database>
|
||||
<port>5432</port>
|
||||
<username>postgres</username>
|
||||
<password>Encrypted 2be98afc86aa7f2e4bb16bd64d980aac9</password>
|
||||
<servername/>
|
||||
<data_tablespace/>
|
||||
<index_tablespace/>
|
||||
<attributes>
|
||||
<attribute><code>FORCE_IDENTIFIERS_TO_LOWERCASE</code><attribute>N</attribute></attribute>
|
||||
<attribute><code>FORCE_IDENTIFIERS_TO_UPPERCASE</code><attribute>N</attribute></attribute>
|
||||
<attribute><code>IS_CLUSTERED</code><attribute>N</attribute></attribute>
|
||||
<attribute><code>PORT_NUMBER</code><attribute>5432</attribute></attribute>
|
||||
<attribute><code>PRESERVE_RESERVED_WORD_CASE</code><attribute>N</attribute></attribute>
|
||||
<attribute><code>QUOTE_ALL_FIELDS</code><attribute>N</attribute></attribute>
|
||||
<attribute><code>SUPPORTS_BOOLEAN_DATA_TYPE</code><attribute>Y</attribute></attribute>
|
||||
<attribute><code>SUPPORTS_TIMESTAMP_DATA_TYPE</code><attribute>Y</attribute></attribute>
|
||||
<attribute><code>USE_POOLING</code><attribute>N</attribute></attribute>
|
||||
</attributes>
|
||||
</connection>
|
||||
'''
|
||||
connectionNode = parser.parseText(connectionXml)
|
||||
} else {
|
||||
def config = parser.parse(configFile)
|
||||
connectionNode = config.connection
|
||||
}
|
||||
return connectionNode
|
||||
}
|
||||
|
||||
def getParamsConfig(configFile = null) {
|
||||
def parser = new XmlParser()
|
||||
def paramsNodeList
|
||||
if (configFile == null || ! configFile.canRead()) {
|
||||
println "Warning: No configFile. Did you forget to set the environment variable COPY_KETTLE_JOB_CONFIG_FILE_PATH?"
|
||||
def paramsXml = '''
|
||||
<parameters>
|
||||
<parameter>
|
||||
<name>bundesland</name>
|
||||
<default_value>3</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>fsem_pred</name>
|
||||
<default_value>1=1</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>hs_nr</name>
|
||||
<default_value>9991</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>hssem_pred</name>
|
||||
<default_value>1=1</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>landesspezifika</name>
|
||||
<default_value>NW</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>lieferungart</name>
|
||||
<default_value/>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>mtk_pred0</name>
|
||||
<default_value>matrikel_nr=11228729</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>mtk_pred</name>
|
||||
<default_value>1=1</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>mtk_pred1</name>
|
||||
<default_value>matrikel_nr=897190</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>mtk_pred2</name>
|
||||
<default_value>matrikel_nr=11230911</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>mtk_pred4</name>
|
||||
<default_value>matrikel_nr=11230825</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>mtk_pred5</name>
|
||||
<default_value>matrikel_nr=11200110</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>semester</name>
|
||||
<default_value>20181</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>semester2</name>
|
||||
<default_value>20161</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>semester_zuvor</name>
|
||||
<default_value>20172</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>semester_zuvor2</name>
|
||||
<default_value>20152</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>stg_pred</name>
|
||||
<default_value>1=1</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>lab_vorsemester</name>
|
||||
<default_value>2</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>trimester</name>
|
||||
<default_value>false</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>stichtagsart_stud_vorsem</name>
|
||||
<default_value>6</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>stichtagsart_stud_berichtssem</name>
|
||||
<default_value>1</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>stichtagsart_pruef_vorsem</name>
|
||||
<default_value>2</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>stichtagsart_pruef_berichtssem</name>
|
||||
<default_value>4</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>vorsem_exmas_start</name>
|
||||
<default_value>20172</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
<parameter>
|
||||
<name>exma_max_rueck_beur_ein_pred</name>
|
||||
<default_value>sem_rueck_beur_ein=20172</default_value>
|
||||
<description/>
|
||||
</parameter>
|
||||
</parameters>
|
||||
'''
|
||||
def params = parser.parseText(paramsXml)
|
||||
paramsNodeList = params.parameter
|
||||
} else {
|
||||
def content = parser.parse(configFile)
|
||||
paramsNodeList = content.get("parameters").parameter
|
||||
}
|
||||
return paramsNodeList
|
||||
}
|
||||
|
||||
def getSubtransformations(transFile, parser) {
|
||||
def subtransformations = []
|
||||
def steps = transFile.transformation.step
|
||||
steps.findAll { it.type.text() == 'Mapping' }.each {
|
||||
def file = (new File(it.filename.text())).name
|
||||
def path = (new File(jobFile.absolutePath)).parentFile
|
||||
def realFile = new File(path.getAbsolutePath() + File.separator + file)
|
||||
def subtransFile = ["transformation" : parser.parse(realFile), "fileName" : file, "entry": it]
|
||||
subtransformations += subtransFile
|
||||
}
|
||||
return subtransformations
|
||||
}
|
||||
}
|
||||
|
||||
copy = new CopyKettleJob(args)
|
||||
Reference in New Issue
Block a user