six month dormancy test
source data:
accountleg year_month amount
acc1A 2010-01 100
acc1A 2010-02 100
acc1A 2010-03 100
acc1A 2010-04 100
acc1A 2010-06 100
acc1A 2010-07 100
acc1A 2010-08 100
acc1A 2010-09 100
acc1A 2010-10 100
acc1A 2010-11 100
acc1A 2011-06 100
acc1A 2011-07 100
acc1A 2011-08 100
acc1A 2011-09 100
acc1A 2011-10 100
acc1A 2011-11 100
acc1A 2011-12 100
acc1A 2012-01 100
acc1A 2012-07 100
create table sixdormancy (accountleg string,year_month string,amount double) row format delimited fields terminated by '\t'; load data local inpath '/mnt/data/sixdormancy.txt' into table sixdormancy; --get the last row year_month drop table sixdormancy_lastmonth; create table sixdormancy_lastmonth as select *, lag(year_month) over(partition by accountleg order by year_month) as lastmonth from sixdormancy; create table sixdormancy_monthdiff as select *, (year(concat(year_month,'-01')) - year(concat(lastmonth,'-01')))*12 +month(concat(year_month,'-01'))- month(concat(lastmonth,'-01')) as monthdiff from sixdormancy_lastmonth; select accountleg from sixdormancy_monthdiff where monthdiff>5 group by accountleg; if 0.10 not support lag function, we can write one udf to do this, and then we can combine the calculation and filter and the udf.
package myudf; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import org.apache.hadoop.hive.ql.exec.UDF; public class dormancy extends UDF { String accountleg = ""; String predate = ""; boolean isDormancy = false; public boolean evaluate(String _accountleg, String _date) {
isDormancy=false; if (accountleg.equalsIgnoreCase(_accountleg)) { isDormancy = hasSixMonthsGap(predate, _date); } accountleg = _accountleg; predate = _date; return isDormancy; } boolean hasSixMonthsGap(String _sd, String _bd) { // issue yyyy-MM int year1 = Integer.parseInt(_bd.substring(1, 4)); int year2 = Integer.parseInt(_sd.substring(1, 4)); int month1 = Integer.parseInt(_bd.substring(5, 7)); int month2 = Integer.parseInt(_sd.substring(5, 7)); int cp = (year1 - year2) * 12 + (month1 - month2) + 1; if (cp > 7) // has dormancy return true; else return false; } public static void main(String[] args) { dormancy test = new dormancy(); // read data from source String filepath = "/mnt/data/sixdormancy.txt"; try { BufferedReader br = new BufferedReader(new FileReader(filepath)); String line; line = br.readLine(); String[] items = null; while (line != null) { // handle this line data items = line.split("\t"); System.out.print(line); System.out.print("\t"); System.out.println(test.evaluate(items[0], items[1])); line = br.readLine(); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (Exception ex) { ex.printStackTrace(); } } }
add jar /home/hadoop/workspace/myudf/bin/myudf.jar; create temporary function dormancy as "myudf.dormancy"; select *,dormancy(accountleg,year_month) from (select * from sixdormancy distribute by accountleg sort by accountleg, year_month) a;
Looking for a job working at Home about MSBI