sparksession中出现java错误,因为无法解析sparksession
我是新来的火花相关的工作。我试过下面的编码。 包装硬盘。模型
import java.util.ArrayList;
import java.util.List;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.SparkSession;
/*
* Analysis of the data using Spark SQL
*
*/
public class HrtDisDataAnalyze {
public HrtDisDataAnalyze() {
}
public static void main(String[] args) {
SparkConfAndCtxBuilder ctxBuilder = new SparkConfAndCtxBuilder();
JavaSparkContext jctx = ctxBuilder.loadSimpleSparkContext("Heart Disease Data Analysis App", "local");
JavaRDD<String> rows = jctx.textFile("file:///C:/Users/harpr/workspace/HrtDisDetection/src/resources/full_data_cleaned.csv");
String schemaString = "age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal num";
List<StructField> fields = new ArrayList<>();
for (String fieldName : schemaString.split(" ")) {
fields.add(DataTypes.createStructField(fieldName, DataTypes.StringType, true));
}
StructType schema = DataTypes.createStructType(fields);
JavaRDD<Row> rowRdd = rows.map(new Function<String, Row>() {
@Override
public Row call(String record) throws Exception {
String[] fields = record.split(",");
return RowFactory.create(fields[0],fields[1],fields[2],fields[3],fields[4],fields[5],fields[6],fields[7],fields[8],fields[9],fields[10],fields[11],fields[12],fields[13]);
}
});
SparkSession sparkSession = SparkSession.builder().config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", "org.datasyslab.geospark.serde.GeoSparkKryoRegistrator").master("local[*]").appName("testGeoSpark").getOrCreate();
Dataset df = spark.read().csv("usr/local/eclipse1/eclipse/hrtdisdetection/src/resources/cleveland_data_raw.csv");
df.createOrReplaceTempView("heartDisData");
sparksession中出现以下错误 “无法解析org.apache.spark.sql.SparkSession$Builder类型。它是从必需的.class文件间接引用的。” 注意:我使用spark-2.1.0和scala 2.10。上面的代码是我在java eclipse中尝试的
# 1 楼答案
我为spark会话添加了jar文件。 错误被清除。 https://jar-download.com/?search_box=org.apache.spark%20spark.sql
# 2 楼答案
使用builder是没有意义的。 只需在开始时创建Spark会话,并从会话中调用Spark上下文即可