È necessario utilizzare il filtro
package dataframe
import org.apache.spark.sql.SparkSession
/**
* @author [email protected]
*/
//
object DataFrameExample{
//
case class Employee(id: Integer, name: String, address: String, salary: Double, state: String,zip:Integer)
//
def main(args: Array[String]) {
val spark =
SparkSession.builder()
.appName("DataFrame-Basic")
.master("local[4]")
.getOrCreate()
import spark.implicits._
// create a sequence of case class objects
// (we defined the case class above)
val emp = Seq(
Employee(1, "vaquar khan", "111 algoinquin road chicago", 120000.00, "AZ",60173),
Employee(2, "Firdos Pasha", "1300 algoinquin road chicago", 2500000.00, "IL",50112),
Employee(3, "Zidan khan", "112 apt abcd timesqure NY", 50000.00, "NY",55490),
Employee(4, "Anwars khan", "washington dc", 120000.00, "VA",33245),
Employee(5, "Deepak sharma ", "rolling edows schumburg", 990090.00, "IL",60172),
Employee(6, "afaq khan", "saeed colony Bhopal", 1000000.00, "AZ",60173)
)
val employee=spark.sparkContext.parallelize(emp, 4).toDF()
employee.printSchema()
employee.show()
employee.select("state", "zip").show()
println("*** use filter() to choose rows")
employee.filter($"state".equalTo("IL")).show()
println("*** multi contidtion in filer || ")
employee.filter($"state".equalTo("IL") || $"state".equalTo("AZ")).show()
println("*** multi contidtion in filer && ")
employee.filter($"state".equalTo("AZ") && $"zip".equalTo("60173")).show()
}
}
fonte
2017-09-23 04:06:35
A giudicare da questa linea: 'scala> da pyspark.sql. colonna Importa colonna 'sembra che tu stia cercando di usare il codice pyspark quando sei in azione y usando scala –
@TonTorres Sì, è stato un errore che ho capito dopo aver postato questa domanda. facendo questo modifica ora. – dheee