How to execute parallel jobs in oozie How to execute parallel jobs in oozie shell shell

How to execute parallel jobs in oozie


To run the job parallel you can make workflow.xml with forks in it. See the below example which will help you.

If you notice the XML below you will see that I'm using the same script by passing different config file where in your case you have to pass the different table names you want from the config file or you can also pass by in your workflow.XML

Taking sqoop job as example, your sqoop should be in the .sh script as below:

sqoop job --create ${table} -- import --connect ${domain}:${port}/${database} --username ${username} --password ${password} --query "SELECT * from "${database}"."${table}" WHERE \$CONDITIONS" -m 1 --hive-import --hive-database "${hivedatabase}" --hive-table "${hivetable}" --as-parquetfile --incremental append --check-column id --last-value "${last_val}"  --target-dir /user/xxxxx/hive/${hivedatabase}.db/${table} --outdir /home/$USER/logs/outdir

So basically you will write your sqoop job as generic as you can where it should expect hive table, database, source table, source database names from the workflow.xml. That way you will call the same script for all the actions but Env-var in the workflow actions will change. See the below changes I made to the first action.

 <workflow-app xmlns='uri:oozie:workflow:0.5' name='Workflow_Name'>    <start to="forking"/>          <fork name="forking">      <path start="shell-8f63"/>      <path start="shell-8f64"/>      <path start="SCRIPT3CONFIG3"/>      <path start="SCRIPT4CONFIG4"/>      <path start="SCRIPT5CONFIG5"/>      <path start="script6config6"/>    </fork>    <action name="shell-8f63">    <shell xmlns="uri:oozie:shell-action:0.1">        <job-tracker>${jobTracker}</job-tracker>        <name-node>${nameNode}</name-node>        <exec>shell.sh</exec>        <argument>${input_file}</argument>		<env-var>database=sourcedatabase</env-var>	<env-var>table=sourcetablename</env-var>	<env-var>hivedatabase=yourhivedataabsename</env-var>	<env-var>hivetable=yourhivetablename</env-var>	<env-var>You can pass how many ever variables you want between the env-var</env-var>	<env-var>parameters should be passed with double quotes in order to work through shell actions</env-var>	<env-var></env-var>         <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>        <file>/user/xxxx/shell_script/lib/shell.sh#shell.sh</file>        <file>/user/xxxx/args/${input_file}#${input_file}</file>    </shell>	      <ok to="joining"/>     <error to="sendEmail"/>     </action>    <action name="shell-8f64">   <shell xmlns="uri:oozie:shell-action:0.1">        <job-tracker>${jobTracker}</job-tracker>        <name-node>${nameNode}</name-node>        <exec>shell.sh</exec>        <argument>${input_file}</argument>		<env-var>database=sourcedatabase1</env-var>	<env-var>table=sourcetablename1</env-var>	<env-var>hivedatabase=yourhivedataabsename1</env-var>	<env-var>hivetable=yourhivetablename2</env-var>	<env-var>You can pass how many ever variables you want between the env-var</env-var>	<env-var>parameters should be passed with double quotes in order to work through shell actions</env-var>	<env-var></env-var>         <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>        <file>/user/xxxx/shell_script/lib/shell.sh#shell.sh</file>        <file>/user/xxxx/args/${input_file}#${input_file}</file>    </shell>    <ok to="joining"/>    <error to="sendEmail"/>    </action>    <action name="SCRIPT3CONFIG3">    <shell xmlns="uri:oozie:shell-action:0.1">    <job-tracker>${jobTracker}</job-tracker>    <name-node>${nameNode}</name-node>    <configuration>    <property>    <name>mapred.job.queue.name</name>    <value>${queueName}</value>    </property>    </configuration>    <exec>COMMON_SCRIPT_YOU_WANT_TO_USE.sh</exec>    <argument>SQOOP_2</argument>    <env-var>UserName</env-var>    <file>${nameNode}/${projectPath}/COMMON_SCRIPT_YOU_WANT_TO_USE.sh#COMMON_SCRIPT_YOU_WANT_TO_USE.sh</file>    <file>${nameNode}/${projectPath}/THIRD_CONFIG</file>    </shell>	     <ok to="joining"/>    <error to="sendEmail"/>    </action>    <action name="SCRIPT4CONFIG4">    <shell xmlns="uri:oozie:shell-action:0.1">    <job-tracker>${jobTracker}</job-tracker>    <name-node>${nameNode}</name-node>    <configuration>    <property>    <name>mapred.job.queue.name</name>    <value>${queueName}</value>    </property>    </configuration>    <exec>COMMON_SCRIPT_YOU_WANT_TO_USE.sh</exec>    <argument>SQOOP_2</argument>    <env-var>UserName</env-var>    <file>${nameNode}/${projectPath}/COMMON_SCRIPT_YOU_WANT_TO_USE.sh#COMMON_SCRIPT_YOU_WANT_TO_USE.sh</file>    <file>${nameNode}/${projectPath}/FOURTH_CONFIG</file>    </shell>	     <ok to="joining"/>    <error to="sendEmail"/>    </action>    <action name="SCRIPT5CONFIG5">    <shell xmlns="uri:oozie:shell-action:0.1">    <job-tracker>${jobTracker}</job-tracker>    <name-node>${nameNode}</name-node>    <configuration>    <property>    <name>mapred.job.queue.name</name>    <value>${queueName}</value>    </property>    </configuration>    <exec>COMMON_SCRIPT_YOU_WANT_TO_USE.sh</exec>    <argument>SQOOP_2</argument>    <env-var>UserName</env-var>    <file>${nameNode}/${projectPath}/COMMON_SCRIPT_YOU_WANT_TO_USE.sh#COMMON_SCRIPT_YOU_WANT_TO_USE.sh</file>    <file>${nameNode}/${projectPath}/FIFTH_CONFIG</file>    </shell>	     <ok to="joining"/>    <error to="sendEmail"/>    </action>    <action name="script6config6">    <shell xmlns="uri:oozie:shell-action:0.1">    <job-tracker>${jobTracker}</job-tracker>    <name-node>${nameNode}</name-node>    <configuration>    <property>    <name>mapred.job.queue.name</name>    <value>${queueName}</value>    </property>    </configuration>    <exec>COMMON_SCRIPT_YOU_WANT_TO_USE.sh</exec>    <argument>SQOOP_2</argument>    <env-var>UserName</env-var>    <file>${nameNode}/${projectPath}/COMMON_SCRIPT_YOU_WANT_TO_USE.sh#COMMON_SCRIPT_YOU_WANT_TO_USE.sh</file>    <file>${nameNode}/${projectPath}/SIXTH_CONFIG</file>    </shell>	     <ok to="joining"/>    <error to="sendEmail"/>    </action>    <join name="joining" to="end"/>    <action name="sendEmail">    <email xmlns="uri:oozie:email-action:0.1">    <to>youremail.com</to>    <subject>your subject</subject>    <body>your email body</body>    </email>    <ok to="kill"/>    <error to="kill"/>    </action>         <kill name="kill">    <message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>    </kill>    <end name="end"/>    </workflow-app>