NYC Crime Dataset Analysis
ssh your_netid@hpc.nyu.edu
your_netid@hpc.nyu.edu
, and then click “Open” at the bottom.scp
from local system to dumbohadoop fs -copyFromLocal NYPD_Complaint_Data_Historic.csv
alias hfs='/usr/bin/hadoop fs '
export HAS=/opt/cloudera/parcels/CDH-5.9.0-1.cdh5.9.0.p0.23/lib
export HSJ=hadoop-mapreduce/hadoop-streaming.jar
alias hjs='/usr/bin/hadoop jar $HAS/$HSJ'
spark-submit cleandata_script.py NYPD_Complaint_Data_Historic.csv
hadoop fs -getmerge cleandata.csv cleandata.csv
spark-submit 'name_of_the_file.py' NYPD_Complaint_Data_Historic.csv
hadoop fs -getmerge 'output_file_name.out' 'output_file_name.out'
python 'name_of_the_file.py' 'output_file_name.png'
'output_file_name.png'