PySpark
Introduction
from pyspark import SparkContext, SparkConfconf = SparkConf().setAppName("MyApp").setMaster("local")
sc = SparkContext(conf=conf)rdd = sc.textFile("path/to/file.txt")# Transformation
rdd2 = rdd.filter(lambda x: "error" in x)
# Action
print(rdd2.count())from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("MyApp").getOrCreate()
df = spark.read.csv("path/to/file.csv", header=True, inferSchema=True)
df.show()Last updated