455 B
455 B
title | updated | created |
---|---|---|
Files | 2021-05-04 14:58:11Z | 2021-05-04 14:58:11Z |
Files
Get remote files
from pyspark import SparkContext
from pyspark import SparkFiles
from pyspark.sql import SQLContext
url = "https://raw.githubusercontent.com/.../data/adult.csv"
sc = SparkContext()
sc.addFile(url)
spark = SQLContext(sc)
df = spark \
.read \
.csv(SparkFiles.get("adult.csv"),header=True,inferSchema=True)
df.printSchema()