标记一个博客怕找不到了:http://www.cnblogs.com/shishanyuan/p/4723713.html
第一种:
通过case class
package cn.lijie import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SQLContext / * Created by jie on 2017/7/31. */ object SparkSql01 {
def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("sparksql").setMaster("local[2]") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) System.setProperty("user.name","bigdata") val rdd = sc.textFile("C:\\Users\\jie\\Desktop\\game.txt").map{ x =>{ val split = x.split(",") Game(split(0).toLong,split(1),split(2).toInt) }} import sqlContext.implicits._ val df = rdd.toDF df.registerTempTable("t_game") sqlContext.sql("select * from t_game where id <= 2").write.json("C:\\Users\\jie\\Desktop\\game.json") val newDf = sqlContext.load("C:\\Users\\jie\\Desktop\\game.json","json") newDf.show } } case class Game(id:Long,name:String,level:Int)
第二种:
通过StructType
package cn.lijie import org.apache.spark.sql.{Row, SQLContext} import org.apache.spark.sql.types._ import org.apache.spark.{SparkConf, SparkContext} / * Created by jie on 2017/7/31. */ object SparkSql01 {
def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("sparksql").setMaster("local[2]") System.setProperty("user.name","bigdata") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) val rdd = sc.textFile("C:\\Users\\jie\\Desktop\\game.txt") val schema = StructType( List( StructField("id", LongType, true), StructField("name", StringType, true), StructField("age", IntegerType, true) ) ) val rowRDD = rdd.map(x => { Row(x(0).toLong, x(1), x(2).toInt) }) val df = sqlContext.createDataFrame(rowRDD, schema) df.registerTempTable("t_game") sqlContext.sql("select * from t_game where id <= 2").write.json("C:\\Users\\jie\\Desktop\\game.json") val newDf = sqlContext.load("C:\\Users\\jie\\Desktop\\game.json","json") newDf.show } } case class Game(id: Long, name: String, level: Int)
pom
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0
modelVersion> <groupId>spark-sparksql
groupId> <artifactId>spark-sql
artifactId> <version>1.0-SNAPSHOT
version> <properties> <maven.compiler.source>1.7
maven.compiler.source> <maven.compiler.target>1.7
maven.compiler.target> <encoding>UTF-8
encoding> <scala.version>2.10.6
scala.version> <spark.version>1.6.1
spark.version> <hadoop.version>2.6.4
hadoop.version>
properties> <dependencies> <dependency> <groupId>org.scala-lang
groupId> <artifactId>scala-library
artifactId> <version>${scala.version}
version>
dependency> <dependency> <groupId>org.apache.spark
groupId> <artifactId>spark-core_2.10
artifactId> <version>${spark.version}
version>
dependency> <dependency> <groupId>org.apache.spark
groupId> <artifactId>spark-sql_2.10
artifactId> <version>${spark.version}
version>
dependency> <dependency> <groupId>org.apache.hadoop
groupId> <artifactId>hadoop-client
artifactId> <version>${hadoop.version}
version>
dependency>
dependencies> <build> <sourceDirectory>src/main/scala
sourceDirectory> <testSourceDirectory>src/test/scala
testSourceDirectory> <plugins> <plugin> <groupId>net.alchim31.maven
groupId> <artifactId>scala-maven-plugin
artifactId> <version>3.2.2
version> <executions> <execution> <goals> <goal>compile
goal> <goal>testCompile
goal>
goals> <configuration> <args> <arg>-make:transitive
arg> <arg>-dependencyfile
arg> <arg>${project.build.directory}/.scala_dependencies
arg>
args>
configuration>
execution>
executions>
plugin> <plugin> <groupId>org.apache.maven.plugins
groupId> <artifactId>maven-shade-plugin
artifactId> <version>2.4.3
version> <executions> <execution> <phase>package
phase> <goals> <goal>shade
goal>
goals> <configuration> <filters> <filter> <artifact>*:*
artifact> <excludes> <exclude>META-INF/*.SF
exclude> <exclude>META-INF/*.DSA
exclude> <exclude>META-INF/*.RSA
exclude>
excludes>
filter>
filters> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> <mainClass>cn.lijie.SparkSql01
mainClass>
transformer>
transformers>
configuration>
execution>
executions>
plugin>
plugins>
build>
project>
发布者:全栈程序员-站长,转载请注明出处:https://javaforall.net/218826.html原文链接:https://javaforall.net
