spark版本:3.4.4
spark-doris-connector版本: spark-doris-connector-spark-3.4-25.2.0
doris版本:v4.0.1
问题:在保存数据的时候,报错不详细,根本看不出错误信息,应该是数据库的字段不为空,查询出来的字段有空情况,导致写入不进去,希望日志更详细。
【ERROR】2026-02-04 20:02:33.624 - 写入失败原因:{"cause":{"localizedMessage":"stream load failed, txnId: 155152, status: Fail, msg: [DATA_QUALITY_ERROR]too many filtered rows","message":"stream load failed, txnId: 155152, status: Fail, msg: [DATA_QUALITY_ERROR]too many filtered rows","stackTrace":[{"fileName":"AbstractStreamLoadProcessor.java","lineNumber":421,"className":"org.apache.doris.spark.client.write.AbstractStreamLoadProcessor","methodName":"lambda$buildReqAndExec$6"},{"fileName":"FutureTask.java","lineNumber":264,"className":"java.util.concurrent.FutureTask","methodName":"run"},{"fileName":"ThreadPoolExecutor.java","lineNumber":1136,"className":"java.util.concurrent.ThreadPoolExecutor","methodName":"runWorker"},{"fileName":"ThreadPoolExecutor.java","lineNumber":635,"className":"java.util.concurrent.ThreadPoolExecutor$Worker","methodName":"run"},{"fileName":"Thread.java","lineNumber":840,"className":"java.lang.Thread","methodName":"run"}],"suppressed":[]},"internalError":false,"localizedMessage":"Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 4) (192.168.31.226 executor 2): org.apache.doris.spark.exception.StreamLoadException: stream load failed, txnId: 155152, status: Fail, msg: [DATA_QUALITY_ERROR]too many filtered rows\n\tat org.apache.doris.spark.client.write.AbstractStreamLoadProcessor.lambda$buildReqAndExec$6(AbstractStreamLoadProcessor.java:421)\n\tat java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)\n\tat java.base/java.lang.Thread.run(Thread.java:840)\n\nDriver stacktrace:","message":"Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 4) (192.168.31.226 executor 2): org.apache.doris.spark.exception.StreamLoadException: stream load failed, txnId: 155152, status: Fail, msg: [DATA_QUALITY_ERROR]too many filtered rows\n\tat org.apache.doris.spark.client.write.AbstractStreamLoadProcessor.lambda$buildReqAndExec$6(AbstractStreamLoadProcessor.java:421)\n\tat java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)\n\tat java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)\n\tat java.base/java.lang.Thread.run(Thread.java:840)\n\nDriver stacktrace:","messageParameters":{},"queryContext":[],"stackTrace":[{"fileName":"DAGScheduler.scala","lineNumber":2790,"className":"org.apache.spark.scheduler.DAGScheduler","methodName":"failJobAndIndependentStages"},{"fileName":"DAGScheduler.scala","lineNumber":2726,"className":"org.apache.spark.scheduler.DAGScheduler","methodName":"$anonfun$abortStage$2"},{"fileName":"DAGScheduler.scala","lineNumber":2725,"className":"org.apache.spark.scheduler.DAGScheduler","methodName":"$anonfun$abortStage$2$adapted"},{"fileName":"ResizableArray.scala","lineNumber":62,"className":"scala.collection.mutable.ResizableArray","methodName":"foreach"},{"fileName":"ResizableArray.scala","lineNumber":55,"className":"scala.collection.mutable.ResizableArray","methodName":"foreach$"},{"fileName":"ArrayBuffer.scala","lineNumber":49,"className":"scala.collection.mutable.ArrayBuffer","methodName":"foreach"},{"fileName":"DAGScheduler.scala","lineNumber":2725,"className":"org.apache.spark.scheduler.DAGScheduler","methodName":"abortStage"},{"fileName":"DAGScheduler.scala","lineNumber":1211,"className":"org.apache.spark.scheduler.DAGScheduler","methodName":"$anonfun$handleTaskSetFailed$1"},{"fileName":"DAGScheduler.scala","lineNumber":1211,"className":"org.apache.spark.scheduler.DAGScheduler","methodName":"$anonfun$handleTaskSetFailed$1$adapted"},{"fileName":"Option.scala","lineNumber":407,"className":"scala.Option","methodName":"foreach"},{"fileName":"DAGScheduler.scala","lineNumber":1211,"className":"org.apache.spark.scheduler.DAGScheduler","methodName":"handleTaskSetFailed"},{"fileName":"DAGScheduler.scala","lineNumber":2989,"className":"org.apache.spark.scheduler.DAGSchedulerEventProcessLoop","methodName":"doOnReceive"},{"fileName":"DAGScheduler.scala","lineNumber":2928,"className":"org.apache.spark.scheduler.DAGSchedulerEventProcessLoop","methodName":"onReceive"},{"fileName":"DAGScheduler.scala","lineNumber":2917,"className":"org.apache.spark.scheduler.DAGSchedulerEventProcessLoop","methodName":"onReceive"},{"fileName":"EventLoop.scala","lineNumber":49,"className":"org.apache.spark.util.EventLoop$$anon$1","methodName":"run"},{"fileName":"DAGScheduler.scala","lineNumber":976,"className":"org.apache.spark.scheduler.DAGScheduler","methodName":"runJob"},{"fileName":"SparkContext.scala","lineNumber":2258,"className":"org.apache.spark.SparkContext","methodName":"runJob"},{"fileName":"WriteToDataSourceV2Exec.scala","lineNumber":408,"className":"org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec","methodName":"writeWithV2"},{"fileName":"WriteToDataSourceV2Exec.scala","lineNumber":382,"className":"org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec","methodName":"writeWithV2$"},{"fileName":"WriteToDataSourceV2Exec.scala","lineNumber":248,"className":"org.apache.spark.sql.execution.datasources.v2.AppendDataExec","methodName":"writeWithV2"},{"fileName":"WriteToDataSourceV2Exec.scala","lineNumber":360,"className":"org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec","methodName":"run"},{"fileName":"WriteToDataSourceV2Exec.scala","lineNumber":359,"className":"org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec","methodName":"run$"},{"fileName":"WriteToDataSourceV2Exec.scala","lineNumber":248,"className":"org.apache.spark.sql.execution.datasources.v2.AppendDataExec","methodName":"run"},{"fileName":"V2CommandExec.scala","lineNumber":43,"className":"org.apache.spark.sql.execution.datasources.v2.V2CommandExec","methodName":"result$lzycompute"},{"fileName":"V2CommandExec.scala","lineNumber":43,"className":"org.apache.spark.sql.execution.datasources.v2.V2CommandExec","methodName":"result"},{"fileName":"V2CommandExec.scala","lineNumber":49,"className":"org.apache.spark.sql.execution.datasources.v2.V2CommandExec","methodName":"executeCollect"},{"fileName":"QueryExecution.scala","lineNumber":98,"className":"org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1","methodName":"$anonfun$applyOrElse$1"},{"fileName":"SQLExecution.scala","lineNumber":118,"className":"org.apache.spark.sql.execution.SQLExecution$","methodName":"$anonfun$withNewExecutionId$6"},{"fileName":"SQLExecution.scala","lineNumber":195,"className":"org.apache.spark.sql.execution.SQLExecution$","methodName":"withSQLConfPropagated"},{"fileName":"SQLExecution.scala","lineNumber":103,"className":"org.apache.spark.sql.execution.SQLExecution$","methodName":"$anonfun$withNewExecutionId$1"},{"fileName":"SparkSession.scala","lineNumber":827,"className":"org.apache.spark.sql.SparkSession","methodName":"withActive"},{"fileName":"SQLExecution.scala","lineNumber":65,"className":"org.apache.spark.sql.execution.SQLExecution$","methodName":"withNewExecutionId"},{"fileName":"QueryExecution.scala","lineNumber":98,"className":"org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1","methodName":"applyOrElse"},{"fileName":"QueryExecution.scala","lineNumber":94,"className":"org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1","methodName":"applyOrElse"},{"fileName":"TreeNode.scala","lineNumber":512,"className":"org.apache.spark.sql.catalyst.trees.TreeNode","methodName":"$anonfun$transformDownWithPruning$1"},{"fileName":"TreeNode.scala","lineNumber":104,"className":"org.apache.spark.sql.catalyst.trees.CurrentOrigin$","methodName":"withOrigin"},{"fileName":"TreeNode.scala","lineNumber":512,"className":"org.apache.spark.sql.catalyst.trees.TreeNode","methodName":"transformDownWithPruning"},{"fileName":"LogicalPlan.scala","lineNumber":31,"className":"org.apache.spark.sql.catalyst.plans.logical.LogicalPlan","methodName":"org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning"},{"fileName":"AnalysisHelper.scala","lineNumber":267,"className":"org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper","methodName":"transformDownWithPruning"},{"fileName":"AnalysisHelper.scala","lineNumber":263,"className":"org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper","methodName":"transformDownWithPruning$"},{"fileName":"LogicalPlan.scala","lineNumber":31,"className":"org.apache.spark.sql.catalyst.plans.logical.LogicalPlan","methodName":"transformDownWithPruning"},{"fileName":"LogicalPlan.scala","lineNumber":31,"className":"org.apache.spark.sql.catalyst.plans.logical.LogicalPlan","methodName":"transformDownWithPruning"},{"fileName":"TreeNode.scala","lineNumber":488,"className":"org.apache.spark.sql.catalyst.trees.TreeNode","methodName":"transformDown"},{"fileName":"QueryExecution.scala","lineNumber":94,"className":"org.apache.spark.sql.execution.QueryExecution","methodName":"eagerlyExecuteCommands"},{"fileName":"QueryExecution.scala","lineNumber":81,"className":"org.apache.spark.sql.execution.QueryExecution","methodName":"commandExecuted$lzycompute"},{"fileName":"QueryExecution.scala","lineNumber":79,"className":"org.apache.spark.sql.execution.QueryExecution","methodName":"commandExecuted"},{"fileName":"QueryExecution.scala","lineNumber":133,"className":"org.apache.spark.sql.execution.QueryExecution","methodName":"assertCommandExecuted"},{"fileName":"DataFrameWriter.scala","lineNumber":856,"className":"org.apache.spark.sql.DataFrameWriter","methodName":"runCommand"},{"fileName":"DataFrameWriter.scala","lineNumber":311,"className":"org.apache.spark.sql.DataFrameWriter","methodName":"saveInternal"},{"fileName":"DataFrameWriter.scala","lineNumber":247,"className":"org.apache.spark.sql.DataFrameWriter","methodName":"save"},{"fileName":"DBWriter.java","lineNumber":204,"className":"com.project.system.spark.etl.writer.DBWriter","methodName":"processData"},{"fileName":"Trans.java","lineNumber":214,"className":"com.project.system.trans.Trans","methodName":"runTask"},{"fileName":"Trans.java","lineNumber":233,"className":"com.project.system.trans.Trans","methodName":"runTask"},{"fileName":"Trans.java","lineNumber":121,"className":"com.project.system.trans.Trans","methodName":"execute"},{"fileName":"Trans.java","lineNumber":186,"className":"com.project.system.trans.Trans","methodName":"runTask"},{"fileName":"Trans.java","lineNumber":233,"className":"com.project.system.trans.Trans","methodName":"runTask"},{"fileName":"Trans.java","lineNumber":121,"className":"com.project.system.trans.Trans","methodName":"execute"},{"fileName":"EtlApplication.java","lineNumber":219,"className":"com.project.system.EtlApplication","methodName":"main"},{"fileName":"NativeMethodAccessorImpl.java","lineNumber":-2,"className":"jdk.internal.reflect.NativeMethodAccessorImpl","methodName":"invoke0"},{"fileName":"NativeMethodAccessorImpl.java","lineNumber":77,"className":"jdk.internal.reflect.NativeMethodAccessorImpl","methodName":"invoke"},{"fileName":"DelegatingMethodAccessorImpl.java","lineNumber":43,"className":"jdk.internal.reflect.DelegatingMethodAccessorImpl","methodName":"invoke"},{"fileName":"Method.java","lineNumber":569,"className":"java.lang.reflect.Method","methodName":"invoke"},{"fileName":"DriverWrapper.scala","lineNumber":63,"className":"org.apache.spark.deploy.worker.DriverWrapper$","methodName":"main"},{"fileName":"DriverWrapper.scala","lineNumber":-1,"className":"org.apache.spark.deploy.worker.DriverWrapper","methodName":"main"}],"suppressed":[]}