Kubernetes-在Kubernetes集群上搭建Hadoop集群

  1. 云栖社区>
  2. 博客>
  3. 正文

Kubernetes-在Kubernetes集群上搭建Hadoop集群

statmoon 2018-07-31 23:04:00 浏览3041
展开阅读全文

准备工作


  1. Hadoop镜像,到docker hub上拉取
docker pull kubeguide/hadoop:latest
  1. Kubernetes集群
    参考:Kubernetes-离线部署Kubernetes 1.9.0

开始搭建


  1. 编写好hadoop.yaml
hadoop.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: kube-hadoop-conf
  namespace: default
data:
  HDFS_MASTER_SERVICE: hadoop-hdfs-master
  HDOOP_YARN_MASTER: hadoop-yarn-master
---
apiVersion: v1
kind: Service
metadata:
  name: hadoop-hdfs-master
spec:
  type: NodePort
  selector:
    app: hdfs-master
  ports:
    - name: rpc
      port: 9000
      targetPort: 9000
    - name: http
      port: 50070
      targetPort: 50070
      nodePort: 32007
---
apiVersion: v1
kind: Pod
metadata:
  name: hdfs-master
  labels:
    app: hdfs-master
spec:
  containers:
    - name: hdfs-master
      image: 192.168.242.132/library/kubernetes-hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070    
      env:
        - name: HADOOP_NODE_TYPE
          value: namenode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
    name: hadoop-datanode-1
    labels:
      app: hadoop-datanode-1
spec:
  containers:
    - name: hadoop-datanode-1
      image: 192.168.242.132/library/kubernetes-hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070    
      env:
        - name: HADOOP_NODE_TYPE
          value: datanode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER        
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
    name: hadoop-datanode-2
    labels:
      app: hadoop-datanode-2
spec:
  containers:
    - name: hadoop-datanode-2
      image: 192.168.242.132/library/kubernetes-hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070    
      env:
        - name: HADOOP_NODE_TYPE
          value: datanode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER        
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
    name: hadoop-datanode-3
    labels:
      app: hadoop-datanode-3
spec:
  containers:
    - name: hadoop-datanode-3
      image: 192.168.242.132/library/kubernetes-hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070    
      env:
        - name: HADOOP_NODE_TYPE
          value: datanode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER        
  restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
  name: hadoop-yarn-master
spec:
  type: NodePort
  selector:
    app: yarn-master
  ports:
     - name: "8030"       
       port: 8030
     - name: "8031"     
       port: 8031
     - name: "8032"
       port: 8032     
     - name: http
       port: 8088
       targetPort: 8088
       nodePort: 32088
---
apiVersion: v1
kind: Pod
metadata:
  name: yarn-master
  labels:
    app: yarn-master
spec:
  containers:
    - name: yarn-master
      image: 192.168.242.132/library/kubernetes-hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 9000
        - containerPort: 50070    
      env:
        - name: HADOOP_NODE_TYPE
          value: resourceman
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER          
  restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
  name: yarn-node-1
spec:
  clusterIP: None
  selector:
    app: yarn-node-1
  ports:
     - port: 8040
---
apiVersion: v1
kind: Service
metadata:
  name: yarn-node-2
spec:
  clusterIP: None
  selector:
    app: yarn-node-2
  ports:
     - port: 8040
---
apiVersion: v1
kind: Service
metadata:
  name: yarn-node-3
spec:
  clusterIP: None
  selector:
    app: yarn-node-3
  ports:
     - port: 8040
---
apiVersion: v1
kind: Pod
metadata:
  name: yarn-node-1
  labels:
    app: yarn-node-1
spec:
  containers:
    - name: yarn-node-1
      image: 192.168.242.132/library/kubernetes-hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 8040
        - containerPort: 8041   
        - containerPort: 8042        
      env:
        - name: HADOOP_NODE_TYPE
          value: yarnnode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER          
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
  name: yarn-node-2
  labels:
    app: yarn-node-2
spec:
  containers:
    - name: yarn-node-2
      image: 192.168.242.132/library/kubernetes-hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 8040
        - containerPort: 8041   
        - containerPort: 8042        
      env:
        - name: HADOOP_NODE_TYPE
          value: yarnnode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER          
  restartPolicy: Always
---
apiVersion: v1
kind: Pod
metadata:
  name: yarn-node-3
  labels:
    app: yarn-node-3
spec:
  containers:
    - name: yarn-node-3
      image: 192.168.242.132/library/kubernetes-hadoop:latest
      imagePullPolicy: IfNotPresent
      ports:
        - containerPort: 8040
        - containerPort: 8041   
        - containerPort: 8042        
      env:
        - name: HADOOP_NODE_TYPE
          value: yarnnode
        - name: HDFS_MASTER_SERVICE
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDFS_MASTER_SERVICE
        - name: HDOOP_YARN_MASTER
          valueFrom:
            configMapKeyRef:
              name: kube-hadoop-conf
              key: HDOOP_YARN_MASTER          
  restartPolicy: Always

这个yaml文件包含一个ConfigMap,5个Service,8个pod,这里需要注意的是ConfigMap中HDFS_MASTER_SERVICE和HDOOP_YARN_MASTER不要使用IP,使用HDFS service的名称,否则datanode将会连接不上namenode,出现错误【ipc.Client: Retrying connect to server: xxx:9000.】

  1. 执行创建命令
kubectl create -f hadoop.yaml
img_9a789c9c9680d17deff0181eecc36dbd.png
create
  1. 检查是否创建成功
  • 查看config map
kubectl get configmap -o wide
img_0a8e1df53770592890db41ab4cb22f00.png
config map
  • 查看service
kubectl get svc -o wide
img_448f5d9248cf254be7693b5a2bf48224.png
service
  • 查看pod
kubectl get po -o wide
img_08cbcd57d21355d21be684d42980e772.png
pod
  • 通过浏览器访问HDFS管理界面【http://192.168.242.136:32007】
img_bfbd347a19a5770f3bcccff3946a20f8.png
overview

查看datanode

img_ebc353c87140868a4419b477840d3e5a.png
datanodes

全部正常,搭建成功!
但是上面的搭建方式是以单个POD声明的,这种方式不稳定,如果不小心删除后就没有了,其实我们可以使用Replication Controller方式进行搭建,这样的话始终可以确保保留相应数量的POD,具体的yaml文件如下:

apiVersion: v1
kind: ConfigMap
metadata:
  name: kube-hadoop-conf
  namespace: default
data:
  HDFS_MASTER_SERVICE: hadoop-hdfs-master
  HDOOP_YARN_MASTER: hadoop-yarn-master
---
apiVersion: v1
kind: Service
metadata:
  name: hadoop-hdfs-master
spec:
  type: NodePort
  selector:
    name: hdfs-master
  ports:
    - name: rpc
      port: 9000
      targetPort: 9000
    - name: http
      port: 50070
      targetPort: 50070
      nodePort: 32007
---
apiVersion: v1
kind: ReplicationController
metadata:
  name: hdfs-master
  labels:
    name: hdfs-master
spec:
  replicas: 1
  selector:
    name: hdfs-master
  template:
    metadata:
      labels:
        name: hdfs-master
    spec:
      containers:
        - name: hdfs-master
          image: 10.3.13.184/library/kubernetes-hadoop:latest
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 9000
            - containerPort: 50070    
          env:
            - name: HADOOP_NODE_TYPE
              value: namenode
            - name: HDFS_MASTER_SERVICE
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDFS_MASTER_SERVICE
            - name: HDOOP_YARN_MASTER
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDOOP_YARN_MASTER
      restartPolicy: Always
---
apiVersion: v1
kind: ReplicationController
metadata:
  name: hadoop-datanode
  labels:
    app: hadoop-datanode
spec:
  replicas: 3
  selector:
    name: hadoop-datanode
  template:
    metadata:
      labels:
        name: hadoop-datanode
    spec:
      containers:
        - name: hadoop-datanode
          image: 10.3.13.184/library/kubernetes-hadoop:latest
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 9000
            - containerPort: 50070    
          env:
            - name: HADOOP_NODE_TYPE
              value: datanode
            - name: HDFS_MASTER_SERVICE
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDFS_MASTER_SERVICE
            - name: HDOOP_YARN_MASTER
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDOOP_YARN_MASTER
      restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
  name: hadoop-yarn-master
spec:
  type: NodePort
  selector:
    name: yarn-master
  ports:
     - name: "8030"       
       port: 8030
     - name: "8031"     
       port: 8031
     - name: "8032"
       port: 8032     
     - name: http
       port: 8088
       targetPort: 8088
       nodePort: 32088
---
apiVersion: v1
kind: ReplicationController
metadata:
  name: yarn-master
  labels:
    name: yarn-master
spec:
  replicas: 1
  selector:
    name: yarn-master
  template:
    metadata:
      labels:
        name: yarn-master
    spec:
      containers:
        - name: yarn-master
          image: 10.3.13.184/library/kubernetes-hadoop:latest
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 9000
            - containerPort: 50070    
          env:
            - name: HADOOP_NODE_TYPE
              value: resourceman
            - name: HDFS_MASTER_SERVICE
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDFS_MASTER_SERVICE
            - name: HDOOP_YARN_MASTER
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDOOP_YARN_MASTER          
      restartPolicy: Always
---
apiVersion: v1
kind: Service
metadata:
  name: yarn-node
spec:
  clusterIP: None
  selector:
    name: yarn-node
  ports:
     - port: 8040
---
apiVersion: v1
kind: ReplicationController
metadata:
  name: yarn-node
  labels:
    name: yarn-node
spec:
  replicas: 3
  selector:
    name: yarn-node
  template:
    metadata:
      labels:
        name: yarn-node
    spec:
      containers:
        - name: yarn-node
          image: 10.3.13.184/library/kubernetes-hadoop:latest
          imagePullPolicy: IfNotPresent
          ports:
            - containerPort: 8040
            - containerPort: 8041   
            - containerPort: 8042        
          env:
            - name: HADOOP_NODE_TYPE
              value: yarnnode
            - name: HDFS_MASTER_SERVICE
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDFS_MASTER_SERVICE
            - name: HDOOP_YARN_MASTER
              valueFrom:
                configMapKeyRef:
                  name: kube-hadoop-conf
                  key: HDOOP_YARN_MASTER          
      restartPolicy: Always

使用kubectl命令创建

img_9224413ada8166f6b92a0bff0ad74cec.png
创建

创建完成后就可以通过浏览器看到熟悉的HDFS管理界面了

img_bbf3794a014f83e3326d0aa38c6d4d50.png
HDFS
img_3764a3a588572df7329db33298fd30fd.png
image.png

参考:Hadoop 运行在 Kubernetes平台实践

网友评论

登录后评论
0/500
评论
statmoon
+ 关注