spark foreachPartition 把df 数据插入到mysql

package com.waitingfy
import java.sql.{Connection, DriverManager, PreparedStatement}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import scala.collection.mutable.ListBuffer
object foreachPartitionTest {
  case class TopSongAuthor(songAuthor:String, songCount:Long)
  def getConnection() = {
    DriverManager.getConnection("jdbc:mysql://localhost:3306/baidusong?user=root&password=root&useUnicode=true&characterEncoding=UTF-8")
  def release(connection: Connection, pstmt: PreparedStatement): Unit = {
    try {
      if (pstmt != null) {
        pstmt.close()
    } catch {
      case e: Exception => e.printStackTrace()
    } finally {
      if (connection != null) {
        connection.close()
  def insertTopSong(list:ListBuffer[TopSongAuthor]):Unit ={
     var connect:Connection = null
     var pstmt:PreparedStatement = null
         connect = getConnection()
       connect.setAutoCommit(false)
       val sql = "insert into topSinger(song_author, song_count) values(?,?)"
       pstmt = connect.prepareStatement(sql)
       for(ele <- list){
          pstmt.setString(1, ele.songAuthor)
          pstmt.setLong(2,ele.songCount)
          pstmt.addBatch()
       pstmt.executeBatch()
       connect.commit()
     }catch {
       case e:Exception => e.printStackTrace()
     }finally {
         release(connect, pstmt)
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder()
      .master("local[2]")
      .appName("foreachPartitionTest")
      .getOrCreate()
    val gedanDF = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306").option("dbtable", "baidusong.gedan").option("user", "root").option("password", "root").option("driver", "com.mysql.jdbc.Driver").load()
//    mysqlDF.show()
    val detailDF = spark.read.format("jdbc").option("url", "jdbc:mysql://localhost:3306").option("dbtable", "baidusong.gedan_detail").option("user", "root").option("password", "root").option("driver", "com.mysql.jdbc.Driver").load()
    val joinDF = gedanDF.join(detailDF, gedanDF.col("id") === detailDF.col("gedan_id"))
//    joinDF.show()
    import spark.implicits._
    val resultDF = joinDF.groupBy("song_author").agg(count("song_name").as("song_count")).orderBy($"song_count".desc).limit(100)
//    resultDF.show()
    resultDF.foreachPartition(partitionOfRecords =>{
       val list = new ListBuffer[TopSongAuthor]
       partitionOfRecords.foreach(info =>{
           val song_author = info.getAs[String]("song_author")
           val song_count = info.getAs[Long]("song_count")
           list.append(TopSongAuthor(song_author, song_count))
      insertTopSong(list)

推荐文章

温暖的雪糕 · zh-cn:安装 WordPress « WordPress Codex

1 周前

大力的松鼠 · Mysql比较日期和时间 -

1 周前

豪情万千的上铺 · mysql 获取当前时间年月日然后进行比较_mysql 比较年月日

1 周前

果断的海豚 · MySQL数据库日期比较大小的实现方法 (msql数据库日期比较大小) – 后浪云

1 周前

曾经爱过的松树 · 批量 kill mysql 中运行时间长的sql - 思齐_

3 天前

傻傻的开水瓶 · 人生须“推敲”·重庆科技报数字报

4 月前

安静的香菇 · Accueil − Insee − Institut national de la statistique et des études économiques | Insee

4 月前

坐怀不乱的红金鱼 · AIGC 生成代码正流行，对程序员是好还是坏？-腾讯云开发者社区-腾讯云

7 月前

眼睛小的乌冬面 · OpenAI’s new GPT 3.5 Instruct | Next Idea Tech Blog

9 月前

豪爽的酱肘子 · Python读取CSV和解析json格式数据_python对csv文件中的数据切片得到什么类型的数据-CSDN博客

9 月前