images/blog/202107_ecomp/accumulo-compactor-muchos.yaml (150 lines of code) (raw):

# The following instance properties are mapped into the containers as # accumulo.properties and will be picked up by compactor processes. Its # important that the instance props exactly match those of all other Accumulo # processes in the cluster in order for them to be able to communicate with each # other. So if all other processes use ect2-0 as the zookeeper host, we could # not use the IP addr here and still authenticate with other proccesses because # the values for the prop would not match exactly. See the comment about # hostnames further down. apiVersion: v1 kind: ConfigMap metadata: name: accumulo-properties data: accumulo.properties: | coordinator.port.client=30100 general.rpc.timeout=240s instance.secret=muchos instance.volumes=hdfs://accucluster/accumulo instance.zookeeper.host=ect2-0:2181 --- apiVersion: v1 kind: ConfigMap metadata: name: accumulo-logging data: log4j2-service.properties: | status = info dest = err name = AccumuloCompactorLoggingProperties appender.console.type = Console appender.console.name = STDOUT appender.console.target = SYSTEM_OUT appender.console.layout.type = PatternLayout appender.console.layout.pattern = %d{ISO8601} [%-8c{2}] %-5p: %m%n appender.console.filter.threshold.type = ThresholdFilter appender.console.filter.threshold.level = info logger.hadoop.name = org.apache.hadoop logger.hadoop.level = debug logger.zookeeper.name = org.apache.zookeeper logger.zookeeper.level = error logger.accumulo.name = org.apache.accumulo logger.accumulo.level = debug rootLogger.level = info rootLogger.appenderRef.console.ref = STDOUT --- # Muchos configured a nameservice of accucluster for HDFS. Urls sent to # compactors from the coordinator would be of the form hdfs://accucluster/... . # Therefore compactors need to be able to resolve the accucluster nameservice. # The following data is mapped into the containers as the hdfs-site.xml file in # order to support resolving accucluster. apiVersion: v1 kind: ConfigMap metadata: name: hdfs-site data: hdfs-site.xml: | <configuration> <property> <name>dfs.datanode.synconclose</name> <value>true</value> </property> <property> <name>dfs.nameservices</name> <value>accucluster</value> </property> <property> <name>dfs.internal.nameservices</name> <value>accucluster</value> </property> <property> <name>dfs.namenode.secondary.http-address.accucluster</name> <value>ect2-0:50090</value> </property> <property> <name>dfs.namenode.secondary.https-address.accucluster</name> <value>ect2-0:50091</value> </property> <property> <name>dfs.namenode.rpc-address.accucluster</name> <value>ect2-0:8020</value> </property> <property> <name>dfs.namenode.http-address.accucluster</name> <value>ect2-0:50070</value> </property> <property> <name>dfs.namenode.https-address.accucluster</name> <value>ect2-0:50071</value> </property> <property> <name>dfs.namenode.servicerpc-address.accucluster</name> <value>ect2-0:8025</value> </property> <property> <name>dfs.namenode.lifeline.rpc-address.accucluster</name> <value>ect2-0:8050</value> </property> <property> <name>dfs.client.failover.proxy.provider.accucluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> </configuration> --- apiVersion: apps/v1 kind: Deployment metadata: name: accumulo-compactor labels: app: accumulo-compactor spec: replicas: 10 selector: matchLabels: app: accumulo-compactor template: metadata: labels: app: accumulo-compactor spec: # There was no shared DNS setup between the Muchos cluster and Kubernetes # cluster, however they did share a private vnet. Also Kubernetes was # configured to give each pod an IP addr on the vnet, making the pods IP # addrs visible to any process on the Muchos cluster. So the two # clusters could see each others IP addrs, but did not share a common # DNS. Compactors need to access Zookeeper and the Coordinator and both # of these ran on host ect2-0, a hostname defined on the Muchos cluster. # Therefore the hostname ect2-0 is defined for each pod below so that it # can resolve this VM running in the Muchos cluster. Given time a # solution using DNS between the two clusters could probably be figured # out. This solution would probably be specific to an environment and # Muchos tries to make very few assumptions. hostNetwork: false hostAliases: - ip: "10.1.0.5" hostnames: - "ect2-0" containers: - name: accumulo-compactor image: extcompne.azurecr.io/accumulo command: ["/bin/bash","-c"] # Since the Kubernetes and Muchos clusters do not share a common DNS, # its very important that compactor processes advertise themselves in # Zookeeper using IP addrs. In the following command -a $(hostname -I) # accomplishes this. args: ["accumulo compactor -q DCQ1 -a $(hostname -I)"] ports: - containerPort: 9101 resources: requests: cpu: .9 memory: "2048Mi" limits: cpu: 1 memory: "2048Mi" env: - name: HADOOP_USER_NAME value: centos - name: ACCUMULO_JAVA_OPTS value: "-Xmx1g" volumeMounts: - name: "config" mountPath: "/opt/accumulo/conf/accumulo.properties" subPath: "accumulo.properties" - name: "logging" mountPath: "/opt/accumulo/conf/log4j2-service.properties" subPath: "log4j2-service.properties" - name: "hdfs" mountPath: "/opt/hadoop/etc/hadoop/hdfs-site.xml" subPath: "hdfs-site.xml" volumes: - name: "config" configMap: name: "accumulo-properties" - name: "logging" configMap: name: "accumulo-logging" - name: "hdfs" configMap: name: "hdfs-site"