-
Notifications
You must be signed in to change notification settings - Fork 16
/
InstallHadoop.sh
executable file
·339 lines (275 loc) · 8.88 KB
/
InstallHadoop.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
#!/usr/bin/env bash
# https://github.com/user501254/BD_STTP_2016
#
# InstallHadoop.sh
# Bash Script for rudimentary Hadoop Installation (Single-Node Cluster)
#
# To run:
# open terminal,
# change directory to this script's location,
# $ cd <link to InstallHadoop.sh parent directory>
# give execute permission to the script,
# $ sudo chmod +x InstallHadoop.sh
# then execute the script,
# $ ./InstallHadoop.sh
#
#
# Copyright (C) 2016 Ashesh Kumar Singh <user501254@gmail.com>
#
# This script may be modified and distributed under the terms
# of the MIT license. See the LICENSE file for details.
#
# Make sure that the script is not being run as root
if [[ "$EUID" -eq 0 ]]; then
echo -e "\e[31m
You are running this script as root which can cause problems.
Please run this script as a normal user. See script file.\n
Exiting.
\e[0m"
exit
else
echo -e "\e[34m
This will install the latest version of Hadoop on your system.\n
Make sure that you have the following before continuing:
- working internet connection
for downloading any required packages and
also the latest stable Hadoop binary if
not found in the parent directory
ie. $PWD
- fairly up to date system
- enough free disk space
I recommend that you also go through the script file once.
\e[0m"
while true
do
read -r -p 'Do you wish to continue (yes/no)? ' choice
case "$choice" in
[Nn]* ) echo 'Exiting.'; exit;;
[Yy]* ) echo ''; break;;
* ) echo 'Response not valid, try again.';;
esac
done
fi
set -euo pipefail
clear
echo -e "\e[32mSTEP (1 of 6): Installing Java, OpenSSH, rsync\e[0m"
echo -e "\e[32m##############################################\n\e[0m"
sleep 2s
if [ -f /etc/redhat-release ]; then
sudo yum clean expire-cache
sudo yum install -y java-1.8.0-openjdk-devel openssh rsync
elif [ -f /etc/debian_version ]; then
sudo apt-get update
sudo apt-get install -y openjdk-8-jdk openssh-server rsync
else
lsb_release -si
echo "\e[31mCan't use yum or apt-get, check installation script.\n\e[0m"
exit
fi
sleep 1s
echo -e "\n\n"
clear
echo -e "\e[32mSTEP (2 of 6): Setting up SSH keys\e[0m"
echo -e "\e[32m###################################\n\e[0m"
sleep 2s
if [[ -d ~/.ssh ]]; then
echo -e "\e[34mBacking up \`~/.ssh' folder contents to \`~/.ssh.old'.\e[0m"
mkdir -p ~/.ssh.old
sudo cp --backup=t ~/.ssh/* ~/.ssh.old 2>/dev/null || true
else
mkdir ~/.ssh
fi
sudo chown "$USER":"$USER" ~/.ssh
chmod 700 ~/.ssh
touch ~/.ssh/known_hosts
if [ ! -f ~/.ssh/id_rsa ]; then
echo -e "\e[34mGenerating new SSH keys.\e[0m"
echo -e 'y\n' | ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''
fi
echo -e "\e[34mAdding \`~/.ssh/id_rsa.pub' to list of authorized keys.\e[0m"
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
echo -e "\e[34mEnabling SSH service to start on boot.\e[0m"
sudo systemctl enable ssh.service || sudo service ssh enable
sudo systemctl restart sshd.service || sudo service ssh restart
echo -e "\e[34mUpdating \`~/.ssh/config'.\e[0m"
cat << EOT >> ~/.ssh/config
Host localhost
StrictHostKeyChecking no
Host 0.0.0.0
StrictHostKeyChecking no
EOT
chmod 600 ~/.ssh/config
chmod 600 ~/.ssh/authorized_keys
chmod 600 ~/.ssh/id_rsa
chmod 644 ~/.ssh/id_rsa.pub
chmod 644 ~/.ssh/known_hosts
sleep 1s
echo -e "\n\n"
clear
echo -e "\e[32mSTEP (3 of 6): Downloading and Extracting Hadoop archive\e[0m"
echo -e "\e[32m#########################################################\n\e[0m"
sleep 2s
FILE=$(wget "http://www.eu.apache.org/dist/hadoop/common/stable/" -O - | grep -Po "hadoop-[0-9].[0-9].[0-9].tar.gz" | head -n 1)
URL=http://www.eu.apache.org/dist/hadoop/common/stable/$FILE
if [[ ! -f "$FILE" ]]; then
echo -e "\e[34mDownloading file \`$FILE'; this may take a few minutes.\e[0m"
wget -c "$URL" -O "$FILE"
DEL_FILE=true
else
echo -e "\e[34mFile \`$FILE' already there; not retrieving.\e[0m"
wget -c "$URL.mds" -O - | sed '7,$ d' | tr -d " \t\n\r" | tr ":" " " | awk '{t=$1;$1=$NF;$NF=t}1' | awk '$1=$1' OFS=" " | cut -c 5- | md5sum -c
DEL_FILE=false
fi
if [[ -d /usr/local/hadoop ]]; then
echo -e "\e[34m
Removing previous Hadoop installation directory;
\`/usr/local/hadoop'
\e[0m"
/usr/local/hadoop/sbin/stop-all.sh &>/dev/null || true
sudo rm -rf /usr/local/hadoop
fi
if [[ -d ~/hadoop_store ]]; then
echo -e "\e[34m
Removing previous Hadoop distributed file system directories;
\`~/hadoop_store/hdfs/namenode'
\`~/hadoop_store/hdfs/namenode'
\e[0m"
rm -rf ~/hadoop_store/hdfs/namenode
rm -rf ~/hadoop_store/hdfs/datanode
sudo rm -rf "/tmp/hadoop-$USER"
fi
echo -e "\e[34mExtracting file \`$FILE'; this may take a few minutes.\e[0m"
sudo tar xfz "$FILE" -C /usr/local
if [[ "$DEL_FILE" == "true" ]]; then
echo -e "\e[34mDeleting file \`$FILE'; to save storage space.\e[0m"
rm -rf "$FILE"
fi
sudo mv /usr/local/hadoop-*/ /usr/local/hadoop
sudo chown -R "$USER":"$USER" /usr/local/hadoop
ls -las /usr/local
sleep 1s
echo -e "\n\n"
clear
echo -e "\e[32mSTEP (4 of 6): Editing Configuration Files\e[0m"
echo -e "\e[32m###########################################\n\e[0m"
set -xv
echo -e "\e[34mPlease choose JDK8 as default.\e[0m"
sudo update-alternatives --config java
java -version
javac -version
echo -e "\e[34mAdding Global Variables to ~/.bashrc file.\e[0m"
cp ~/.bashrc ~/.bashrc.bak
sed -i -e '/#HADOOP VARIABLES START/,+11d' ~/.bashrc
cat << 'EOT' >> ~/.bashrc
#SET JDK
export JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")
#HADOOP VARIABLES START
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar
#HADOOP VARIABLES END
EOT
sed -i.bak -e 's/# export JAVA_HOME=.*/export JAVA_HOME=$(readlink -f \/usr\/bin\/java | sed "s:\/bin\/java::")/g' /usr/local/hadoop/etc/hadoop/hadoop-env.sh
sed -n -i.bak '/<configuration>/q;p' /usr/local/hadoop/etc/hadoop/core-site.xml
cat << EOT >> /usr/local/hadoop/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
EOT
sed -n -i.bak '/<configuration>/q;p' /usr/local/hadoop/etc/hadoop/yarn-site.xml
cat << EOT >> /usr/local/hadoop/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
EOT
sed -n -i.bak '/<configuration>/q;p' /usr/local/hadoop/etc/hadoop/mapred-site.xml
cat << EOT >> /usr/local/hadoop/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/usr/local/hadoop</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/usr/local/hadoop</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/usr/local/hadoop</value>
</property>
</configuration>
EOT
mkdir -p ~/hadoop_store/hdfs/namenode
mkdir -p ~/hadoop_store/hdfs/datanode
sed -n -i.bak '/<configuration>/q;p' /usr/local/hadoop/etc/hadoop/hdfs-site.xml
cat << EOT >> /usr/local/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/$USER/hadoop_store/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/$USER/hadoop_store/hdfs/datanode</value>
</property>
</configuration>
EOT
set +xv
sleep 2s
echo -e "\n\n"
clear
echo -e "\e[32mSTEP (5 of 6): Formatting HDFS (namenode directory)\e[0m"
echo -e "\e[32m####################################################\n\e[0m"
sleep 2s
/usr/local/hadoop/bin/hdfs namenode -format
sleep 1s
echo -e "\n\n"
clear
echo -e "\e[32mSTEP (6 of 6): Strating Hadoop daemons\e[0m"
echo -e "\e[32m#######################################\n\e[0m"
sleep 2s
/usr/local/hadoop/sbin/start-all.sh
sleep 1s
echo -e "\n\n"
clear
jps
#google-chrome http://$HOSTNAME:50070 || firefox http://$HOSTNAME:50070 || midori http://$HOSTNAME:50070 || true
echo -e "\n\n"
set +euo pipefail
source ~/.bashrc &>/dev/null
clear
echo -e "\e[32m
Hadoop installation was successful!
Open a new terminal and execute:
$ hadoop
Watch step-by-step video on YouTube.
https://youtu.be/gWkbPVNER5k
\e[0m"
#echo -e "Stopping Hadoop daemons\n"
#/usr/local/hadoop/sbin/stop-dfs.sh
#/usr/local/hadoop/sbin/stop-yarn.sh