Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator.process(VectorMapJoinInnerBigOnlyLongOperator.java:405) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:966) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:939) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:158) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:966) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:939) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator.process(VectorFilterOperator.java:136) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:966) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:939) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:125) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.deliverVectorizedRowBatch(VectorMapOperator.java:812) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:845) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27) ~[hive-exec-3.1.2.jar:3.1.2] at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) ~[hive-exec-3.1.2.jar:3.1.2] at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:45) ~[scala-library-2.12.15.jar:?] at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:179) ~[spark-core_2.12-3.2.1.jar:3.2.1] at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.2.1.jar:3.2.1] at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) ~[spark-core_2.12-3.2.1.jar:3.2.1] at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) ~[spark-core_2.12-3.2.1.jar:3.2.1] at org.apache.spark.scheduler.Task.run(Task.scala:131) ~[spark-core_2.12-3.2.1.jar:3.2.1] at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:506) ~[spark-core_2.12-3.2.1.jar:3.2.1] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1462) ~[spark-core_2.12-3.2.1.jar:3.2.1] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:509) ~[spark-core_2.12-3.2.1.jar:3.2.1] ... 3 more
背后的核心堆栈原因是因为:
java.lang.RuntimeException: Map operator initialization failed: java.lang.ClassCastException: org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper cannot be cast to org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerDirectAccess at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.init(SparkMapRecordHandler.java:118) at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunction.call(HiveMapFunction.java:55) at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunction.call(HiveMapFunction.java:30) at org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$7$1.apply(JavaRDDLike.scala:186) at org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$7$1.apply(JavaRDDLike.scala:186) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:801) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.lang.ClassCastException: org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper cannot be cast to org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerDirectAccess at org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedHashTable.(VectorMapJoinOptimizedHashTable.java:92) at org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedHashMultiSet.(VectorMapJoinOptimizedHashMultiSet.java:101) at org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedStringHashMultiSet.(VectorMapJoinOptimizedStringHashMultiSet.java:61) at org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedCreateHashTable.createHashTable(VectorMapJoinOptimizedCreateHashTable.java:85) at org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinCommonOperator.setUpHashTable(VectorMapJoinCommonOperator.java:483) at org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinCommonOperator.completeInitializationOp(VectorMapJoinCommonOperator.java:461) at org.apache.hadoop.hive.ql.exec.Operator.completeInitialization(Operator.java:470) at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:400) at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:573) at org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:525) at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:386) at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.init(SparkMapRecordHandler.java:109) ... 18 more