Saturday, June 8, 2019

Course : Introduction to Spark SQL and DataFrames




# install python3

install python3

# install spark

https://spark.apache.org/downloads.html

# set environment variables


export SPARK_HOME="/Users/sri/Projects/Playground/spark/"
export PATH=$SPARK_HOME/bin:$PATH
export PYSPARK_DRIVER_PYTHON=jupyter
export PYSPRAK_DRIVER_PYTHON_OPTS='notebook'
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH


# install pyspark

pip3 install pyspark

# install jupyter notebook

python3 -m install jupyter

# launch the jupyter notebook


jupyter notebook





https://www.linkedin.com/learning/introduction-to-spark-sql-and-dataframes/load-data-into-dataframes-csv-files