val df = sc.parallelize(Seq((1,\"Emailab\"), (2,\"Phoneab\"), (3, \"Faxab\"),(4,\"Mail\"),(5,\"Other\"),(6,\"MSL12\"),(7,\"MSL\"),(8,\"HCP\"),(9,\"HCP12\"))).toDF(\"c1\"
I used below to filter rows from dataframe and this worked form me.Spark 2.2
val spark = new org.apache.spark.sql.SQLContext(sc)
val data = spark.read.format("csv").
option("header", "true").
option("delimiter", "|").
option("inferSchema", "true").
load("D:\\test.csv")
import spark.implicits._
val filter=data.filter($"dept" === "IT" )
OR
val filter=data.filter($"dept" =!= "IT" )
This works too. Concise and very similar to SQL.
df.filter("c2 not like 'MSL%' and c2 not like 'HCP%'").show
+---+-------+
| c1| c2|
+---+-------+
| 1|Emailab|
| 2|Phoneab|
| 3| Faxab|
| 4| Mail|
| 5| Other|
+---+-------+
val df1 = df.filter(not(df("c2").rlike("MSL"))&¬(df("c2").rlike("HCP")))
This worked.
df.filter(not(
substring(col("c2"), 0, 3).isin("MSL", "HCP"))
)