jupyter and spark2 connect
there is only python3 kernel
cat $HOME /$USER /delab-venv/share/jupyter/kernels/python3/kernel/json
mkdir $HOME /$USER /delab-venv/share/jupyter/kernels/pyspark2
vi $HOME /$USER /delab-venv/share/jupyter/kernels/pyspark2/kernel.json
{
"argv" : [
" python" ,
" -m" ,
" ipykernel_launcher" ,
" -f" ,
" {connection_file}"
],
"display_name" : " Pyspark 2" ,
"language" : " python" ,
"env" : {
"PYSPARK_PYTHON" : " /usr/bin/python3" ,
"SPARK_HOME" : " /opt/spark2/" ,
"SPARK_OPTS" : " --master yarn --conf spark.ui.port=0" ,
"PYTHONPATH" : " /opt/spark2/python/lib/py4j-0.10.7-src.zip:/opt/spark2/python/"
}
}
jupyter kernelspec install $HOME /$USER /delab-venv/share/jupyter/kernels/pyspark2 --user
open jupyter lab ui on browser and you will see pyspark2 notebook.
from pyspark .sql import SparkSession
spark = SparkSession . \
builder . \
enableHiveSupport (). \
appName ('Demo' ). \
master ('yarn' ). \
getOrCreate ()
spark .sql ('SHOW databases' ).show ()
spark .sql ('SELECT count(1) FROM retail_db.orders' ).show ()