1.3 KiB
1.3 KiB
title | updated | created |
---|---|---|
# jdbc | 2022-04-03 15:16:26Z | 2021-05-04 14:58:11Z |
method a load drivers
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars file:/home/john/opt/jars/postgresql-42.2.5.jar pyspark-shell'
method b load drivers
pyspark \
--packages org.postgresql:postgresql:42.2.5 \
--driver-class-path /home/john/opt/jars/postgresql-42.2.5.jar
alone driver-class-path is also OK
from pyspark.sql import DataFrameReader, SparkSession
spark = SparkSession.builder \
.master("local") \
.appName("jdbc data sources") \
.config("spark.sql.shuffle.partitions", "4") \
.getOrCreate()
method 1
df_company = (
spark.read.format("jdbc")
.option("url", "jdbc:postgresql://172.17.0.2/postgres")
.option("dbtable", "public.company")
.option("user", "postgres")
.option("password", "qw12aap")
.option("driver", "org.postgresql.Driver")
.load()
)
df_company.show()
method 2
dataframe = (
spark.read.format("jdbc")
.options(
url="jdbc:postgresql://172.17.0.2/postgres?user=postgres&password=qw12aap",
database="public",
dbtable="company",
driver="org.postgresql.Driver"
)
.load()
)
dataframe.show()