大数据部署

大数据部署

大数据单机版部署

基础环境

目录安排

规划 目录 备注
应用程序安装路径 /data/application/app/
数据存储目录 /data/application/data/
数据日志目录 /data/application/logs/
数据备份目录(备份服务器) /data/application/backup/
服务器上临时存放地 /data/application/tmp/
服务器上工具存放地 /data/application/tools//
服务器上监控存放地 /data/application/prometheus/
mkdir   /data/application/{app,data,logs,backup,tmp,tools,prometheus} -p
配置免密
ssh-keygen
ssh-copy-id root@IP 
添加解析
echo " IP地址          masters  secondarynamenode  node1 zookeeper1 namenode  datanode1"  >> /etc/hosts 
安装JDK
tar xvf jdk-8u121-linux-x64.tar.gz  -C /data/application/app/ 

vim /etc/profile 
export JAVA_HOME=/data/application/app/jdk1.8.0_121/
export JRE_HOME=/data/application/app/jdk1.8.0_121/jre
export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
zookeeper部署
tar xvf apache-zookeeper-3.8.0-bin.tar.gz -C /data/application/app/
mkdir  /data/application/data/zookeeper
配置启动文件

vim /usr/lib/systemd/system/zookeeper.service

[Unit]
Description=Zookeeper Server Service
After=network.target

[Service]
Type=forking
UMask=0027
User=root
Group=root
ExecStart=/data/application/app/apache-zookeeper-3.8.0-bin/bin/zkServer.sh start
ExecStop=/data/application/app/apache-zookeeper-3.8.0-bin/bin/zkServer.sh stop
Restart=on-failure
RestartSec=10

[Install]
WantedBy=multi-user.target

vim /data/application/app/apache-zookeeper-3.8.0-bin/bin/zkEnv.sh

JAVA_HOME="/data/application/app/jdk1.8.0_121/"
启动
systemctl  start     zookeeper
Kafka 部署
cd /data/application/tools/ 
tar xvf kafka_2.12-2.4.1\ .tgz -C /data/application/app/ 
mkdir /data/application/data/kafka/kafka-log

vim server.properties

broker.id=1 #每个ID 不一样
listeners=PLAINTEXT://IP:9092   # 本机的ip地址  
num.network.threads=9
num.io.threads=16
socket.send.buffer.bytes=1024000
socket.receive.buffer.bytes=1024000
socket.request.max.bytes=104857600
log.dirs=/data/application/data/kafka/kafka-log  
num.partitions=30
num.recovery.threads.per.data.dir=1
log.retention.hours=24
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
zookeeper.connect=10.1.61.128:2181,10.1.61.129:2181,10.1.61.130:2181    
zookeeper.connection.timeout.ms=6000
delete.topic.enable = true
auto.create.topics.enable=true
message.max.bytes=20000000
replica.fetch.max.bytes=20485760
acks=1
log.cleanup.policy=delete    
num.network.threads主要处理网络io,读写缓冲区数据,基本没有io等待,配置线程数量为cpu核数加1。
num.io.threads主要进行磁盘io操作,高峰期可能有些io等待,因此配置需要大些。配置线程数量为cpu核数2倍,最大不超过3倍。
kafka JVM 调试

(机器实际内存的50% )

vim /data/application/app/kafka_2.12-2.4.1/bin/kafka-server-start.sh

if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then    export KAFKA_HEAP_OPTS="-Xmx8G -Xms8G" fi
启动服务
/data/application/app/kafka_2.12-2.4.1/bin/kafka-server-start.sh -daemon /data/application/app/kafka_2.12-2.4.1/config/server.properties &
查看端口
netstat -lntp | grep '9092'
hbase 部署
hadoop 部署
cd /data/application/tools/
tar xvf hadoop-3.2.2.tar.gz -C /data/application/app/
mkdir -p  /data/application/app/hadoop-3.2.2/conf/
touch     /data/application/app/hadoop-3.2.2/conf/excludes
mkdir -p  /data/application/app/hadoop-3.2.2/log/
mkdir -p  /data/application/app/hadoop-3.2.2/mapred/log/
mkdir -p  /data/application/app/hadoop-3.2.2/pids/

mkdir -p  /data/application/data/hadoop/tmp/
mkdir -p  /data/application/data/hadoop/hdfs/name/coredata/
mkdir -p  /data/application/data/hadoop/hdfs/data-1/coredata/
mkdir -p  /data/application/data/hadoop/hdfs/journal/
mkdir -p  /data/application/data/hadoop/yarn/local-1/coredata/
mkdir -p  /data/application/data/hadoop/namesecondary/coredata 
hadoop 添加环境变量
cat << EOF >> /etc/profile

#HADOOP_HOME
export HADOOP_HOME=/data/application/app/hadoop-3.2.2/
export PATH=\$PATH:\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=/data/application/app/hadoop-3.2.2/conf

EOF

cp -rp  /data/application/app/hadoop-3.2.2/etc/hadoop/*   /data/application/app/hadoop-3.2.2/conf/

cat << EOF >> /data/application/app/hadoop-3.2.2/conf/hadoop-env.sh
export JAVA_HOME=/data/application/app/jdk1.8.0_121/
export HADOOP_LOG_DIR=/data/application/app/hadoop-3.2.2/log
export HADOOP_PID_DIR=/data/application/app/hadoop-3.2.2/pids
export HADOOP_HEAPSIZE=4096
export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true -XX:+UseCompressedOops -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+DisableExplicitGC -XX:+UseCMSInitiatingOccupancyOnly -XX:CMSInitiatingOccupancyFraction=75 -XX:SoftRefLRUPolicyMSPerMB=0"
EOF
hadoop 配置文件

core-site.xml

cat << EOF >  /data/application/app/hadoop-3.2.2/conf/core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
   <property>
        <name>fs.defaultFS</name>
        <value>hdfs://masters</value>
    </property>
    <property>
    <name>ha.zookeeper.quorum</name>
    <value>192.168.101.72:2181</value>
</property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>file:/data/application/data/hadoop/tmp/</value>
    </property>
    <property>
        <name>hadoop.logfile.size</name>
        <value>10000000</value>
    </property>
    <property>
        <name>hadoop.logfile.count</name>
        <value>10</value>
    </property>
</configuration>

EOF
hdfs-site.xml
cat << EOF > /data/application/app/hadoop-3.2.2/conf/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:/data/application/data/hadoop/hdfs/name/coredata/</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:/data/application/data/hadoop/hdfs/data-1/coredata/</value>
    </property>
    <property>
        <name>dfs.namenode.checkpoint.dir</name>
        <value>file:/data/application/data/hadoop/namesecondary/coredata</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.namenode.http-address</name>
        <value>namenode:50070</value>
    </property>
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>secondarynamenode:50090</value>
    </property>
    <property>
        <name>dfs.hosts.exclude</name>
        <value>/data/application/app/hadoop-3.2.2/conf/excludes</value>
</property>
<property>
      <name>dfs.namenode.fs-limits.max-directory-items</name>
      <value>6400000</value>
</property>
</configuration>

EOF
mapred-site.xml
cat << EOF > /data/application/app/hadoop-3.2.2/conf/mapred-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>namenode:10020</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>namenode:19888</value>
    </property>
    <property>
        <name>mapred.hosts.exclude</name>
        <value>/data/application/app/hadoop-3.2.2/conf/excludes</value>
    </property>
    <!-- Windows commit MapReduce job -->
    <property>
        <name>mapreduce.app-submission.cross-platform</name>
        <value>true</value>
    </property>
<!-- 开启任务调度,不同集群拷贝镜像时双方都需加以下参数 -->
<!-- 
 <property>
  <name>yarn.app.mapreduce.am.env</name>
  <value>HADOOP_MAPRED_HOME=/data/application/app/hadoop-3.2.2</value>
</property>
<property>
  <name>mapreduce.map.env</name>
  <value>HADOOP_MAPRED_HOME=/data/application/app/hadoop-3.2.2</value>
</property>
<property>
  <name>mapreduce.reduce.env</name>
  <value>HADOOP_MAPRED_HOME=/data/application/app/hadoop-3.2.2</value>
</property>
-->
<property> 
<name>mapred.child.java.opts</name>
<value>-Xmx4096m</value>
</property>
</configuration>

EOF
yarn-site.xml
cat << EOF > /data/application/app/hadoop-3.2.2/conf/yarn-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>namenode</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>namenode:8032</value>
    </property>
    <!-- comma-separated directory for round-robin -->
    <property>
        <name>yarn.nodemanager.local-dirs</name>
        <value>file:/data/application/data/hadoop/yarn/local-1/coredata/</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>8192</value>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-pmem-ratio</name>
        <value>2.1</value>
    </property>
    <property>
        <name>yarn.nodemanager.resource.cpu-vcores</name>
        <value>8</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>namenode:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.nodes.exclude-path</name>
        <value>/data/application/app/hadoop-3.2.2/conf/excludes</value>
    </property>
</configuration>

EOF
hbase 部署
解压
cd /data/application/tools
tar xvf hbase-2.3.5-bin.tar.gz -C /data/application/app/
环境变量
cat << EOF >> /etc/profile
#HBASE_HOME
export HBASE_HOME=/data/application/app/hbase-2.3.5
export PATH=\$PATH:\$HBASE_HOME/bin
export HBASE_CONF_DIR=/data/application/app/hbase-2.3.5/conf
EOF

cat << EOF >> /data/application/app/hadoop-3.2.2/conf/hadoop-env.sh
export HBASE_HOME=/data/application/app/hbase-2.3.5
for f in \$HBASE_HOME/lib/*.jar; do
HADOOP_CLASSPATH=\${HADOOP_CLASSPATH}:\$f
done
EOF

cat << EOF >> /data/application/app/hbase-2.3.5/conf/hbase-env.sh
export JAVA_HOME=/data/application/app/jdk1.8.0_121/
export HBASE_LOG_DIR=/data/application/app/hbase-2.3.5/logs
export HBASE_PID_DIR=/data/application/app/hbase-2.3.5/pids
export HBASE_HEAPSIZE=2G
export HBASE_MANAGES_ZK=false
export HBASE_OPTS="-XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:GCPauseIntervalMillis=100"
export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=5 -XX:GCLogFileSize=50M"
EOF

cp -rp /data/application/app/hbase-2.3.5/conf/ /data/application/app/hbase-2.3.5/conf-bak
配置文件hbase-site.xml
cat << EOF >  /data/application/app/hbase-2.3.5/conf/hbase-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
  <property>
    <name>hbase.rootdir</name>
    <value>hdfs://masters/hbase</value>
  </property>
  <property>
    <name>hbase.cluster.distributed</name>
    <value>true</value>
  </property>
  <property>
     <name>hbase.unsafe.stream.capability.enforce</name>
     <value>false</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>192.168.101.72</value>
  </property>
  <property>
    <name>zookeeper.session.timeout</name>
    <value>300000</value>
  </property>
  <property>
    <name>hbase.master.info.port</name>
    <value>60010</value>
  </property>
  <property>
    <name>hbase.regionserver.info.port</name>
    <value>60030</value>
  </property>
  <property>
    <name>hbase.regionserver.region.split.policy</name>
    <value>org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy</value>
  </property>
  <property>
    <name>hbase.hregion.max.filesize</name>
    <value>10737418240</value>
  </property>
  <property>
    <name>hbase.regionserver.handler.count</name>
    <value>200</value>
  </property>
  <property>
    <name>zookeeper.znode.parent</name>
    <value>/hbase</value>
  </property>
  <property>
    <name>hbase.table.sanity.checks</name>
    <value>false</value>
  </property>
    <property>
    <name>hbase.regionserver.wal.codec</name>
    <value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value>
  </property>
  <property>
    <name>phoenix.schema.isNamespaceMappingEnabled</name>
    <value>true</value>
  </property>
  <property>
    <name>phoenix.schema.mapSystemTablesToNamespace</name>
    <value>true</value>
  </property>
</configuration>

EOF
日志参数调整
sed -i 's/hbase.log.maxfilesize=256MB/hbase.log.maxfilesize=16MB/g' /data/application/app/hbase-2.3.5/conf/log4j.properties

sed -i 's/hbase.security.log.maxfilesize=256MB/hbase.security.log.maxfilesize=16MB/g' /data/application/app/hbase-2.3.5/conf/log4j.properties
phoenix
cd /data/application/tools
tar xvf phoenix-hbase-2.3-5.1.2-bin.tar.gz  -C /data/application/app/

# 软连接
ln -s  /data/application/app/phoenix-hbase-2.3-5.1.2-bin  /usr/local/phoenix
#拷贝jar 包
cd /data/application/app/phoenix-hbase-2.3-5.1.2-bin/
cp phoenix-server-hbase-2.3-5.1.2.jar  /data/application/app/hbase-2.3.5/lib/ 
cat << EOF >> /etc/profile
#添加环境变量
export PHOENIX_HOME=/data/application/app/phoenix-hbase-2.3-5.1.2-bin
export PATH=\$PHOENIX_HOME/bin:\$PATH

EOF
指定用户启动
cat << EOF >>  /etc/profile
#指定hbase安装用户
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_JOURNALNODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export HDFS_ZKFC_USER=root 
EOF
启动服务
hadoop 初始化
hdfs namenode -format  #第一次启动执行格式,后续不可在执行
启动服务
#hadoop hdfs 启动
start-dfs.sh

#hbase  master 启动
hbase-daemon.sh --config /data/application/app/hbase-2.3.5/conf start master 
#hbase regionserver 启动
hbase-daemon.sh --config /data/application/app/hbase-2.3.5/conf start   regionserver

#使用jps 查看服务
jps
81361 QuorumPeerMain
51648 Kafka
113840 DataNode
16132 Jps
114420 SecondaryNameNode
35254 HMaster
113304 NameNode
36170 HRegionServer
hbase 验证
hbase  shell 进入之后执行 status 命令查看状态
hbase(main):004:0* status 
1 active master, 0 backup masters, 1 servers, 0 dead, 2.0000 average load
压缩
#所有节点执行
wget https://zgxt-uat-4-0-1258776458.cos.ap-beijing.myqcloud.com/package/HBASE-2.3.5/libzstd-1.5.5-1.el7.x86_64.rpm
rpm -ivh libzstd-1.5.5-1.el7.x86_64.rpm
#查看是否支持压缩 zstd
hadoop checknative
comments powered by Disqus