** integer_list - The input data file to count the Odd & Even number from Integer List
spark-submit --master yarn-client --executor-memory 512m --num-executors 3 --executor-cores 1 --driver-memory 512m Odd_Even_Count.py
textFile = sc.textFile("integer_list.txt") test = textFile.map(lambda x: x.strip()) ints = test.map(lambda x: int (x)) odd_rdd = ints.filter(lambda x: x % 2 != 0).count() ints.take(10) print(odd_rdd) even_rdd = ints.filter(lambda x: x % 2 == 0).count() print "even number -> %s" % (even_rdd) print "even number -> %s\n" % (even_rdd) + "odd number -> %s" % (odd_rdd) print(even_rdd)
** dept_salary - The input data file to find Salary Sum Per Department from Dept Salary Input Data
spark-submit --master yarn-client --executor-memory 512m --num-executors 3 --executor-cores 1 --driver-memory 512m Dept_Average_Salary.py
[root@sandbox lab]# hadoop fs -ls /user/root/dept_sum.txt [dept_sum output file initialized from PySpark Script]
** shakespeare_100.txt - The input data file to count the occurences of each word
spark-submit --master yarn-client --executor-memory 512m --num-executors 3 --executor-cores 1 --driver-memory 512m Top_Words_Count.py
[root@sandbox lab]# hadoop fs -ls /user/root/Top_word_count_result.txt [output file initialized from PySpark Script]