Description: CloudFormation template for creating an EMR cluster
Outputs:
  IPAddress:
    Description: IP address of EMR cluster MasterNode
    Value: !GetAtt 'EMRSampleCluster.MasterPublicDNS'
Parameters:
  myDirectoryBindUser:
    Description: BindUser Simple AD server
    Type: String
    AllowedValues:
      - binduser@corp.emr.local
    Default: binduser@corp.emr.local
  myDirectoryBindPassword:
    Description: BindPassword Simple AD server
    Type: String
  CoreInstanceCount:
    Default: '3'
    Description: Number of core instances
    Type: Number
  CoreInstanceType:
    Default: m5.2xlarge
    Description: Instance Type of the core node
    Type: String
  EMRClusterName:
    Default: EMR-EMRSecurityWithRangerV1
    Description: Cluster name for the EMR
    Type: String
  EMRLogDir:
    Description: Log Dir for the EMR cluster
    Type: String
  KeyName:
    Description: Name of an existing EC2 KeyPair to enable SSH to the instances
    Type: AWS::EC2::KeyPair::KeyName
  LDAPServerIP:
    Description: IP address of the LDAP server
    Type: String
  MasterInstanceType:
    Default: m5.2xlarge
    Description: Instance Type of the master node
    Type: String
  RangerHostname:
    Description: Internal IP address of the Ranger Server
    Type: String
  Subnet:
    Description: Subnet ID for creating the EMR cluster
    Type: AWS::EC2::Subnet::Id
  VPC:
    Description: VPC ID for creating the EMR cluster
    Type: AWS::EC2::VPC::Id
  emrReleaseLabel:
    Default: emr-5.29.0
    AllowedValues:
      - emr-5.0.0
      - emr-5.4.0
      - emr-5.16.0
      - emr-5.17.0
      - emr-5.26.0
      - emr-5.27.0
      - emr-5.28.0
      - emr-5.28.1
      - emr-5.29.0
      - emr-5.30.0
    Description: Release label for the EMR cluster
    Type: String
  rangerVersion:
    Default: '2.0'
    Description: Version of the Ranger Server.
    Type: String
    AllowedValues:
      - '0.6'
      - '0.7'
      - '1.0'
      - '2.0'
  s3artifactsRepo:
    Default: s3://aws-bigdata-blog/artifacts/aws-blog-emr-ranger
    Description: Git Repo URL for this blog.
    Type: String
  InstallPrestoRangerPlugin:
    Description: Install the Ranger Presto Plugin. NOTE - This needs Ranger 2.0 and has not been tested with all versions
    Default: false
    Type: String
    AllowedValues: [true, false]
#  InstallSparkRangerPlugin:
#    Description: Install the Ranger Spark Plugin. NOTE - This needs Ranger 2.0 and has not been tested with all versions
#    Default: false
#    Type: String
#    AllowedValues: [true, false]
Conditions:
  emr-5.4: !Equals
    - !Ref 'emrReleaseLabel'
    - emr-5.4.0
  emr-5.0: !Equals
    - !Ref 'emrReleaseLabel'
    - emr-5.0.0
  emr-5.16: !Equals
    - !Ref 'emrReleaseLabel'
    - emr-5.16.0
  emr-5.17: !Equals
    - !Ref 'emrReleaseLabel'
    - emr-5.17.0
  emr-5.20: !Equals
    - !Ref 'emrReleaseLabel'
    - emr-5.20.0
  emr-5.29: !Equals
    - !Ref 'emrReleaseLabel'
    - emr-5.29.0
  emr-5.30: !Equals
    - !Ref 'emrReleaseLabel'
    - emr-5.30.0
  emr-6.0: !Equals
    - !Ref 'emrReleaseLabel'
    - emr-6.0.0
  InstallPrestoRangerPlugin:
    !Equals [true, !Ref InstallPrestoRangerPlugin]
#  InstallSparkRangerPlugin:
#    !Equals [true, !Ref InstallSparkRangerPlugin]
  OlderEmrVersion: !Or
    - !Condition 'emr-5.0'
    - !Condition 'emr-5.4'
Resources:
  EMRSampleCluster:
    Properties:
      Applications:
        - Name: Hive
        - Name: Hadoop
        - Name: Hue
        - Name: Presto
        - Name: Spark
        - Name: Livy
      BootstrapActions:
        - Name: Download scripts
          ScriptBootstrapAction:
            Path: !Join
              - ''
              - - !Ref 's3artifactsRepo'
                - /scripts/download-scripts.sh
            Args:
              - !Ref 's3artifactsRepo'
      Configurations:
        - Classification: hue-ini
          Configurations:
            - Classification: desktop
              Configurations:
                - Classification: auth
                  ConfigurationProperties:
                    backend: desktop.auth.backend.LdapBackend
                - Classification: ldap
                  ConfigurationProperties:
                    base_dn: dc=corp,dc=emr,dc=local
                    bind_dn: !If
                      - OlderEmrVersion
                      - !Ref 'myDirectoryBindUser'
                      - !Select
                        - '0'
                        - !Split
                          - '@'
                          - !Ref 'myDirectoryBindUser'
                    bind_password: !Ref 'myDirectoryBindPassword'
                    create_users_on_login: 'true'
                    debug: 'true'
                    force_username_lowercase: 'true'
                    ignore_username_case: 'true'
                    ldap_url: !Join
                      - ''
                      - - ldap://
                        - !Ref 'LDAPServerIP'
                    ldap_username_pattern: uid=<username>,cn=users,dc=corp,dc=emr,dc=local
                    nt_domain: corp.emr.local
                    search_bind_authentication: 'false'
                    trace_level: '0'
                  Configurations:
                    - Classification: groups
                      ConfigurationProperties:
                        group_filter: objectclass=*
                        group_name_attr: cn
                    - Classification: users
                      ConfigurationProperties:
                        user_filter: objectclass=*
                        user_name_attr: sAMAccountName
        - Classification: core-site
          ConfigurationProperties:
            hadoop.security.group.mapping: org.apache.hadoop.security.LdapGroupsMapping
            hadoop.security.group.mapping.ldap.bind.user: !Ref 'myDirectoryBindUser'
            hadoop.security.group.mapping.ldap.bind.password: !Ref 'myDirectoryBindPassword'
            hadoop.security.group.mapping.ldap.url: !Join
              - ''
              - - ldap://
                - !Ref 'LDAPServerIP'
            hadoop.security.group.mapping.ldap.base: dc=corp,dc=emr,dc=local
            hadoop.security.group.mapping.ldap.search.filter.user: (objectclass=*)
            hadoop.security.group.mapping.ldap.search.filter.group: (objectclass=*)
            hadoop.security.group.mapping.ldap.search.attr.member: member
            hadoop.security.group.mapping.ldap.search.attr.group.name: cn
        - Classification: hdfs-site
          ConfigurationProperties:
            fs.permissions.umask-mode: '066'
      Instances:
        CoreInstanceGroup:
          EbsConfiguration:
            EbsBlockDeviceConfigs:
              - VolumeSpecification:
                  SizeInGB: '100'
                  VolumeType: gp2
                VolumesPerInstance: '1'
            EbsOptimized: 'true'
          InstanceCount: !Ref 'CoreInstanceCount'
          InstanceType: !Ref 'CoreInstanceType'
          Market: ON_DEMAND
          Name: Core Instance
        Ec2KeyName: !Ref 'KeyName'
        Ec2SubnetId: !Ref 'Subnet'
        MasterInstanceGroup:
          InstanceCount: '1'
          EbsConfiguration:
            EbsBlockDeviceConfigs:
              - VolumeSpecification:
                  SizeInGB: '100'
                  VolumeType: gp2
                VolumesPerInstance: '1'
            EbsOptimized: 'true'
          InstanceType: !Ref 'MasterInstanceType'
          Market: ON_DEMAND
          Name: Master Instance
        TerminationProtected: 'false'
      JobFlowRole: EMR_EC2_DefaultRole
      LogUri: !Ref 'EMRLogDir'
      Name: !Ref 'EMRClusterName'
      ReleaseLabel: !Ref 'emrReleaseLabel'
      ServiceRole: EMR_DefaultRole
      Tags:
        - Key: Name
          Value: EMR Sample Cluster
      VisibleToAllUsers: 'true'
    Type: AWS::EMR::Cluster
  LoadHDFSData:
    Properties:
      ActionOnFailure: CONTINUE
      HadoopJarStep:
        Args:
          - /mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/loadDataIntoHDFS.sh
          - !Ref 'AWS::Region'
        Jar: s3://elasticmapreduce/libs/script-runner/script-runner.jar
        MainClass: ''
      JobFlowId: !Ref 'EMRSampleCluster'
      Name: LoadHDFSData
    Type: AWS::EMR::Step
  CreateHiveTables:
    Properties:
      ActionOnFailure: CONTINUE
      HadoopJarStep:
        Args:
          - /mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/createHiveTables.sh
          - !Ref 'AWS::Region'
        Jar: s3://elasticmapreduce/libs/script-runner/script-runner.jar
        MainClass: ''
      JobFlowId: !Ref 'EMRSampleCluster'
      Name: CreateHiveTables
    Type: AWS::EMR::Step
#  SparkStep:
#    Properties:
#      ActionOnFailure: CONTINUE
#      HadoopJarStep:
#        Args:
#          - spark-submit
#          - --deploy-mode
#          - cluster
#          - --class
#          - org.apache.spark.examples.SparkPi
#          - /usr/lib/spark/examples/jars/spark-examples.jar
#          - '10'
#        Jar: command-runner.jar
#        MainClass: ''
#      JobFlowId: !Ref 'EMRSampleCluster'
#      Name: SparkStep
#    Type: AWS::EMR::Step
  InstallRangerPlugin:
    Properties:
      ActionOnFailure: CONTINUE
      HadoopJarStep:
        Args:
          - /mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-hive-hdfs-ranger-plugin.sh
          - !Ref 'RangerHostname'
          - !Ref 'rangerVersion'
          - !Ref 's3artifactsRepo'
        Jar: s3://elasticmapreduce/libs/script-runner/script-runner.jar
        MainClass: ''
      JobFlowId: !Ref 'EMRSampleCluster'
      Name: InstallRangerPlugin
    Type: AWS::EMR::Step
  InstallRangerPolicies:
    Properties:
      ActionOnFailure: CONTINUE
      HadoopJarStep:
        Args:
          - /mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-hive-hdfs-ranger-policies.sh
          - !Ref 'RangerHostname'
          - !Join
            - ''
            - - !Ref 's3artifactsRepo'
              - /inputdata
        Jar: s3://elasticmapreduce/libs/script-runner/script-runner.jar
        MainClass: ''
      JobFlowId: !Ref 'EMRSampleCluster'
      Name: InstallRangerPolicies
    Type: AWS::EMR::Step
  InstallRangerPrestoPlugin:
    Properties:
      ActionOnFailure: CONTINUE
      HadoopJarStep:
        Args:
          - /mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-presto-ranger-plugin.sh
          - !Ref 'RangerHostname'
          - !Ref 'rangerVersion'
          - !Ref 's3artifactsRepo'
          - !Ref 'emrReleaseLabel'
        Jar: s3://elasticmapreduce/libs/script-runner/script-runner.jar
        MainClass: ''
      JobFlowId: !Ref 'EMRSampleCluster'
      Name: InstallRangerPrestoPlugin
    Condition: InstallPrestoRangerPlugin
    Type: AWS::EMR::Step
  InstallRangerPrestoPolicies:
    Properties:
      ActionOnFailure: CONTINUE
      HadoopJarStep:
        Args:
          - /mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-presto-ranger-policies.sh
          - !Ref 'RangerHostname'
          - !Join
            - ''
            - - !Ref 's3artifactsRepo'
              - /inputdata
        Jar: s3://elasticmapreduce/libs/script-runner/script-runner.jar
        MainClass: ''
      JobFlowId: !Ref 'EMRSampleCluster'
      Name: InstallRangerPrestoPolicies
    Condition: InstallPrestoRangerPlugin
    Type: AWS::EMR::Step
  UpdateHueConfiguration:
    Properties:
      ActionOnFailure: CONTINUE
      HadoopJarStep:
        Args:
          - /mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/hue-update.sh
        Jar: s3://elasticmapreduce/libs/script-runner/script-runner.jar
        MainClass: ''
      JobFlowId: !Ref 'EMRSampleCluster'
      Name: UpdateHueConfiguration
    Condition: emr-5.30
    Type: AWS::EMR::Step
#  InstallRangerSparkPlugin:
#    Properties:
#      ActionOnFailure: CONTINUE
#      HadoopJarStep:
#        Args:
#          - /mnt/tmp/aws-blog-emr-ranger/scripts/emr-steps/install-spark-ranger-plugin.sh
#          - !Ref 'RangerHostname'
#          - !Ref 'rangerVersion'
#          - !Ref 's3artifactsRepo'
#        Jar: s3://elasticmapreduce/libs/script-runner/script-runner.jar
#        MainClass: ''
#      JobFlowId: !Ref 'EMRSampleCluster'
#      Name: InstallRangerSparkPlugin
#    Condition: InstallSparkRangerPlugin
#    Type: AWS::EMR::Step
