AnsweredAssumed Answered

Loading Hive Bucketed Table

Question asked by sgudavalliR on Oct 19, 2017
Latest reply on Oct 19, 2017 by takeshi
Hello,

I am trying to populate below hive clustered ORC table.

CREATE TABLE llos(id string,x string,y string,z string,rg string,buketkey string, cat int,scat int,usr string,org string,act int,ctm int,c1 string,c2 string,c3 string,d1 int,d2 int,doc binary)
partitioned by (cdt int,catpartkey string,usrpartkey string)
CLUSTERED BY (buketkey) SORTED BY (cat,usr) INTO 25 Buckets
stored as orc
LOCATION "/apps/spark/llos"
tblproperties (
'orc.compress' = 'ZLIB',
'orc.create.index' = 'true',
'orc.bloom.filter.columns' = 'cat,usr',
'orc.bloom.filter.fpp'='0.05',
'orc.stripe.size'='268435456',
'orc.row.index.stride'='50000');


and now,

INSERT INTO TABLE auditlogsv3 PARTITION (cdt, catpartkey, usrpartkey)
SELECT id,chn ,ht ,br ,rg , CASE WHEN cat > 9 then concat(cast(cat as string), "$", usr) else concat("0", cast(cat as string), "$", usr) end as bucketkey,cat ,scat ,usr ,org ,act ,ctm ,c1 ,c2 ,c3 ,d1 ,d2 ,doc, cdt, catpartkey, usrpartkey from auditlogsv2 where cdt=20171002 and catpartkey= 'others' and usrpartkey = 'ahpUsers' distribute by bucketkey SORT by bucketkey, cat, usr



I am having trouble with following Error in the last reduce task. can you please help.
and i just added a new disk in the cluster to make more space available. space seems not to be an issue
i am lost here..

here are my settings: 

 

set hive.exec.max.dynamic.partitions.pernode=2000; SET hive.exec.dynamic.partition.mode = nonstrict; set hive.exec.max.dynamic.partitions=2000; set hive.enforce.sorting=true;
set hive.enforce.bucketing=true;
set hive.orc.splits.include.file.footer=true;   SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx5000m;
set mapred.job.shuffle.input.buffer.percent=0.4;

 

 

2017-10-19 23:26:42,754 INFO [IPC Server handler 18 on 46711] org.apache.hadoop.mapred.TaskAttemptListenerImpl: Progress of TaskAttempt attempt_1508162012352_0153_r_000009_0 is : 0.0 2017-10-19 23:26:42,756 FATAL [IPC Server handler 26 on 46711] org.apache.hadoop.mapred.TaskAttemptListenerImpl: Task: attempt_1508162012352_0153_r_000009_0 - exited : org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in InMemoryMerger - Thread to merge in-memory shuffled map-outputs      at org.apache.hadoop.mapreduce.task.reduce.DirectShuffle.run(DirectShuffle.java:128)      at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:376)      at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163)      at java.security.AccessController.doPrivileged(Native Method)      at javax.security.auth.Subject.doAs(Subject.java:415)      at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)      at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) Caused by: java.io.IOException: 2065.9237.775826 /var/mapr/local/bossrnd113059/mapred/nodeManager/spill/job_1508162012352_0153/attempt_1508162012352_0153_r_000009_0/map_6.out (No space left on device)      at com.mapr.fs.Inode.throwIfFailed(Inode.java:387)      at com.mapr.fs.Inode.flushPages(Inode.java:503)      at com.mapr.fs.Inode.releaseDirty(Inode.java:581)      at com.mapr.fs.MapRFsOutStream.dropCurrentPage(MapRFsOutStream.java:73)      at com.mapr.fs.MapRFsOutStream.write(MapRFsOutStream.java:85)      at com.mapr.fs.MapRFsDataOutputStream.write(MapRFsDataOutputStream.java:39)      at org.apache.hadoop.mapred.IFileOutputStream.write(IFileOutputStream.java:94)      at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.write(FSDataOutputStream.java:58)      at java.io.DataOutputStream.write(DataOutputStream.java:107)      at org.apache.hadoop.mapred.IFile$Writer.append(IFile.java:288)      at org.apache.hadoop.mapred.Merger.writeFile(Merger.java:210)      at org.apache.hadoop.mapreduce.task.reduce.DirectShuffleMergeManagerImpl$InMemoryMerger.merge(DirectShuffleMergeManagerImpl.java:491)      at org.apache.hadoop.mapreduce.task.reduce.MergeThread.run(MergeThread.java:94)

Outcomes