kylin跨segment精确去重count distinct解析
1.kylin是如何实现count distinct操作的?
在kylin元数据的cube_desc中,可以看到CONT_DISTINCT存储使用的是bitmap
只需要计算bitmap中1的个数就能获取count distinct的值了
构建cube时聚合bitmap
BitmapAggregator
public class BitmapAggregator extends MeasureAggregator<BitmapCounter> { private static final BitmapCounterFactory bitmapFactory = RoaringBitmapCounterFactory.INSTANCE; private BitmapCounter sum; @Override public void reset() { sum = null; } @Override public void aggregate(BitmapCounter value) { // Here we optimize for case when group only has 1 value. In such situation, no // aggregation is needed, so we just keep a reference to the first value, saving // the cost of deserialization and merging. if (sum == null) { sum = value; return; } sum.orWith(value); } //bitmap进行 或 操作,得到聚合后的bitmap,这样即可实现count distinct相加 @Override public BitmapCounter aggregate(BitmapCounter value1, BitmapCounter value2) { BitmapCounter merged = bitmapFactory.newBitmap(); if (value1 != null) { merged.orWith(value1); } if (value2 != null) { merged.orWith(value2); } return merged; } @Override public BitmapCounter getState() { return sum; } @Override public int getMemBytesEstimate() { return sum == null ? 0 : sum.getMemBytes(); } }
RoaringBitmapCounter是BitmapCounter的实现类,RoaringBitmap是一种节约存储空间的bitmap,读者可自行了解
2.String类型如何转化成bitmap去重?
cube构建时会先构建字典,使每个字符串对应到唯一的Integer
3.String类型如何跨segment去重?
可以配置全局字典,全局字典会根据最新版本的字典快照进行增量构建,元数据中会保存全局字典的位置。
GlobalDictionaryBuilder调用AppendTrieDictionaryBuilder构建,使用GlobalDictHDFSStore存储
//全局字典的存储位置:Kylin配置的hdfs工作目录 + resources/GlobalDict + hive表名 + 字段名
String baseDir = hdfsDir + "resources/GlobalDict" + dictInfo.getResourceDir() + "/";
GlobalDictHDFSStore
public class GlobalDictHDFSStore extends GlobalDictStore { static final Logger logger = LoggerFactory.getLogger(GlobalDictHDFSStore.class); static final String V1_INDEX_NAME = ".index"; public static final String V2_INDEX_NAME = ".index_v2"; public static final String VERSION_PREFIX = "version_"; static final int BUFFER_SIZE = 8 * 1024 * 1024; private final Path basePath; private final Configuration conf; private final FileSystem fileSystem; public GlobalDictHDFSStore(String baseDir) throws IOException { super(baseDir); this.basePath = new Path(baseDir); this.conf = HadoopUtil.getCurrentConfiguration(); this.fileSystem = HadoopUtil.getFileSystem(baseDir); } // Previously we put slice files and index file directly in base directory, // should migrate to the new versioned layout private void migrateOldLayout() throws IOException { FileStatus[] sliceFiles = fileSystem.listStatus(basePath, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(IndexFormatV1.SLICE_PREFIX); } }); Path indexFile = new Path(basePath, V1_INDEX_NAME); if (fileSystem.exists(indexFile) && sliceFiles.length > 0) { // old layout final long version = System.currentTimeMillis(); Path tempDir = new Path(basePath, "tmp_" + VERSION_PREFIX + version); Path versionDir = getVersionDir(version); logger.info("Convert global dict at {} to new layout with version {}", basePath, version); fileSystem.mkdirs(tempDir); // convert to new layout try { // copy index and slice files to temp FileUtil.copy(fileSystem, indexFile, fileSystem, tempDir, false, conf); for (FileStatus sliceFile : sliceFiles) { FileUtil.copy(fileSystem, sliceFile.getPath(), fileSystem, tempDir, false, conf); } // rename fileSystem.rename(tempDir, versionDir); // delete index and slices files in base dir fileSystem.delete(indexFile, false); for (FileStatus sliceFile : sliceFiles) { fileSystem.delete(sliceFile.getPath(), true); } } finally { if (fileSystem.exists(tempDir)) { fileSystem.delete(tempDir, true); } } } } @Override void prepareForWrite(String workingDir, boolean isGlobal) throws IOException { if (!fileSystem.exists(basePath)) { logger.info("Global dict at {} doesn't exist, create a new one", basePath); fileSystem.mkdirs(basePath); } migrateOldLayout(); logger.trace("Prepare to write Global dict at {}, isGlobal={}", workingDir, isGlobal); Path working = new Path(workingDir); if (fileSystem.exists(working)) { fileSystem.delete(working, true); logger.trace("Working directory {} exits, delete it first", working); } // when build dict, copy all data into working dir and work on it, avoiding suddenly server crash made data corrupt Long[] versions = listAllVersions(); if (versions.length > 0 && isGlobal) { Path latestVersion = getVersionDir(versions[versions.length - 1]); FileUtil.copy(fileSystem, latestVersion, fileSystem, working, false, true, conf); } else { fileSystem.mkdirs(working); } } //获取目录下的所有版本的字典快照 @Override public Long[] listAllVersions() throws IOException { if (!fileSystem.exists(basePath)) { return new Long[0]; // for the removed SegmentAppendTrieDictBuilder } FileStatus[] versionDirs = fileSystem.listStatus(basePath, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(VERSION_PREFIX); } }); TreeSet<Long> versions = new TreeSet<>(); for (int i = 0; i < versionDirs.length; i++) { Path path = versionDirs[i].getPath(); versions.add(Long.parseLong(path.getName().substring(VERSION_PREFIX.length()))); } return versions.toArray(new Long[versions.size()]); } @Override public Path getVersionDir(long version) { return new Path(basePath, VERSION_PREFIX + version); } //根据version获取对应版本的字典快照 @Override public GlobalDictMetadata getMetadata(long version) throws IOException { Path versionDir = getVersionDir(version); FileStatus[] indexFiles = fileSystem.listStatus(versionDir, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(V1_INDEX_NAME); } }); checkState(indexFiles.length == 1, "zero or more than one index file found: %s", Arrays.toString(indexFiles)); IndexFormat format; String indexFile = indexFiles[0].getPath().getName(); if (V2_INDEX_NAME.equals(indexFile)) { format = new IndexFormatV2(fileSystem, conf); } else if (V1_INDEX_NAME.equals(indexFile)) { format = new IndexFormatV1(fileSystem, conf); } else { throw new RuntimeException("Unknown index file: " + indexFile); } return format.readIndexFile(versionDir); } @Override public AppendDictSlice readSlice(String directory, String sliceFileName) throws IOException { Path path = new Path(directory, sliceFileName); logger.trace("read slice from {}", path); try (FSDataInputStream input = fileSystem.open(path, BUFFER_SIZE)) { return AppendDictSlice.deserializeFrom(input); } } @Override public String writeSlice(String workingDir, AppendDictSliceKey key, AppendDictNode slice) throws IOException { //write new slice String sliceFile = IndexFormatV2.sliceFileName(key); Path path = new Path(workingDir, sliceFile); logger.trace("write slice with key {} into file {}", key, path); try (FSDataOutputStream out = fileSystem.create(path, true, BUFFER_SIZE)) { byte[] bytes = slice.buildTrieBytes(); out.write(bytes); } return sliceFile; } @Override public void deleteSlice(String workingDir, String sliceFileName) throws IOException { Path path = new Path(workingDir, sliceFileName); logger.trace("delete slice at {}", path); if (fileSystem.exists(path)) { fileSystem.delete(path, false); } } @Override public void commit(String workingDir, GlobalDictMetadata metadata, boolean isAppendDictGlobal) throws IOException { Path workingPath = new Path(workingDir); // delete v1 index file Path oldIndexFile = new Path(workingPath, V1_INDEX_NAME); if (fileSystem.exists(oldIndexFile)) { fileSystem.delete(oldIndexFile, false); } // write v2 index file IndexFormat index = new IndexFormatV2(fileSystem, conf); index.writeIndexFile(workingPath, metadata); index.sanityCheck(workingPath, metadata); // copy working dir to newVersion dir Path newVersionPath = new Path(basePath, VERSION_PREFIX + System.currentTimeMillis()); fileSystem.rename(workingPath, newVersionPath); cleanUp(isAppendDictGlobal); } // Check versions count, delete expired versions private void cleanUp(boolean isAppendDictGlobal) throws IOException { long timestamp = System.currentTimeMillis(); if (isAppendDictGlobal) { Long[] versions = listAllVersions(); for (int i = 0; i < versions.length - maxVersions; i++) { if (versions[i] + versionTTL < timestamp) { fileSystem.delete(getVersionDir(versions[i]), true); } } } else { FileStatus[] segmentDictDirs = fileSystem.listStatus(basePath.getParent()); for (FileStatus fileStatus : segmentDictDirs) { String filePath = fileStatus.getPath().getName(); Long version = Long.parseLong(filePath.split("_")[1]); if (version + versionTTL < timestamp) { fileSystem.delete(new Path(basePath.getParent() + "/" + filePath), true); } } } } @Override public String copyToAnotherMeta(KylinConfig srcConfig, KylinConfig dstConfig) throws IOException { if (baseDir.contains("resources/SegmentDict")) { logger.info("SegmentAppendTrieDict needn't to copy"); return baseDir; } checkArgument(baseDir.startsWith(srcConfig.getHdfsWorkingDirectory()), "Please check why current directory {} doesn't belong to source working directory {}", baseDir, srcConfig.getHdfsWorkingDirectory()); final String dstBaseDir = baseDir.replaceFirst(srcConfig.getHdfsWorkingDirectory(), dstConfig.getHdfsWorkingDirectory()); Long[] versions = listAllVersions(); if (versions.length == 0) { // empty dict, nothing to copy return dstBaseDir; } Path srcVersionDir = getVersionDir(versions[versions.length - 1]); Path dstVersionDir = new Path(srcVersionDir.toString().replaceFirst(srcConfig.getHdfsWorkingDirectory(), dstConfig.getHdfsWorkingDirectory())); FileSystem dstFS = dstVersionDir.getFileSystem(conf); if (dstFS.exists(dstVersionDir)) { dstFS.delete(dstVersionDir, true); } FileUtil.copy(fileSystem, srcVersionDir, dstFS, dstVersionDir, false, true, conf); return dstBaseDir; } public interface IndexFormat { GlobalDictMetadata readIndexFile(Path dir) throws IOException; void writeIndexFile(Path dir, GlobalDictMetadata metadata) throws IOException; void sanityCheck(Path dir, GlobalDictMetadata metadata) throws IOException; } public static class IndexFormatV1 implements IndexFormat { static final String SLICE_PREFIX = "cached_"; protected final FileSystem fs; protected final Configuration conf; public IndexFormatV1(FileSystem fs, Configuration conf) { this.fs = fs; this.conf = conf; } @Override public GlobalDictMetadata readIndexFile(Path dir) throws IOException { Path indexFile = new Path(dir, V1_INDEX_NAME); try (FSDataInputStream in = fs.open(indexFile)) { int baseId = in.readInt(); int maxId = in.readInt(); int maxValueLength = in.readInt(); int nValues = in.readInt(); String converterName = in.readUTF(); BytesConverter converter; try { converter = ClassUtil.forName(converterName, BytesConverter.class).getDeclaredConstructor().newInstance(); } catch (Exception e) { throw new RuntimeException("Fail to instantiate BytesConverter: " + converterName, e); } int nSlices = in.readInt(); TreeMap<AppendDictSliceKey, String> sliceFileMap = new TreeMap<>(); for (int i = 0; i < nSlices; i++) { AppendDictSliceKey key = new AppendDictSliceKey(); key.readFields(in); sliceFileMap.put(key, sliceFileName(key)); } // make sure first key is always "" String firstFile = sliceFileMap.remove(sliceFileMap.firstKey()); sliceFileMap.put(AppendDictSliceKey.START_KEY, firstFile); return new GlobalDictMetadata(baseId, maxId, maxValueLength, nValues, converter, sliceFileMap); } } //only for test @Override public void writeIndexFile(Path dir, GlobalDictMetadata metadata) throws IOException { Path indexFile = new Path(dir, V1_INDEX_NAME); try (FSDataOutputStream out = fs.create(indexFile, true)) { out.writeInt(metadata.baseId); out.writeInt(metadata.maxId); out.writeInt(metadata.maxValueLength); out.writeInt(metadata.nValues); out.writeUTF(metadata.bytesConverter.getClass().getName()); out.writeInt(metadata.sliceFileMap.size()); for (Map.Entry<AppendDictSliceKey, String> entry : metadata.sliceFileMap.entrySet()) { entry.getKey().write(out); } } } @Override public void sanityCheck(Path dir, GlobalDictMetadata metadata) throws IOException { throw new UnsupportedOperationException("sanityCheck V1 format is no longer supported"); } public static String sliceFileName(AppendDictSliceKey key) { return SLICE_PREFIX + key; } } public static class IndexFormatV2 extends IndexFormatV1 { static final String SLICE_PREFIX = "cached_"; static final int MINOR_VERSION_V1 = 0x01; protected IndexFormatV2(FileSystem fs, Configuration conf) { super(fs, conf); } @Override public GlobalDictMetadata readIndexFile(Path dir) throws IOException { Path indexFile = new Path(dir, V2_INDEX_NAME); try (FSDataInputStream in = fs.open(indexFile)) { byte minorVersion = in.readByte(); // include a header to allow minor format changes if (minorVersion != MINOR_VERSION_V1) { throw new RuntimeException("Unsupported minor version " + minorVersion); } int baseId = in.readInt(); int maxId = in.readInt(); int maxValueLength = in.readInt(); int nValues = in.readInt(); String converterName = in.readUTF(); BytesConverter converter; try { converter = ClassUtil.forName(converterName, BytesConverter.class).getDeclaredConstructor().newInstance(); } catch (Exception e) { throw new RuntimeException("Fail to instantiate BytesConverter: " + converterName, e); } int nSlices = in.readInt(); TreeMap<AppendDictSliceKey, String> sliceFileMap = new TreeMap<>(); for (int i = 0; i < nSlices; i++) { AppendDictSliceKey key = new AppendDictSliceKey(); key.readFields(in); String sliceFileName = in.readUTF(); sliceFileMap.put(key, sliceFileName); } return new GlobalDictMetadata(baseId, maxId, maxValueLength, nValues, converter, sliceFileMap); } } @Override public void writeIndexFile(Path dir, GlobalDictMetadata metadata) throws IOException { Path indexFile = new Path(dir, V2_INDEX_NAME); try (FSDataOutputStream out = fs.create(indexFile, true)) { out.writeByte(MINOR_VERSION_V1); out.writeInt(metadata.baseId); out.writeInt(metadata.maxId); out.writeInt(metadata.maxValueLength); out.writeInt(metadata.nValues); out.writeUTF(metadata.bytesConverter.getClass().getName()); out.writeInt(metadata.sliceFileMap.size()); for (Map.Entry<AppendDictSliceKey, String> entry : metadata.sliceFileMap.entrySet()) { entry.getKey().write(out); out.writeUTF(entry.getValue()); } } } @Override public void sanityCheck(Path dir, GlobalDictMetadata metadata) throws IOException { for (Map.Entry<AppendDictSliceKey, String> entry : metadata.sliceFileMap.entrySet()) { if (!fs.exists(new Path(dir, entry.getValue()))) { throw new RuntimeException("The slice file " + entry.getValue() + " for the key: " + entry.getKey() + " must be existed!"); } } } public static String sliceFileName(AppendDictSliceKey key) { return String.format(Locale.ROOT, "%s%d_%d", SLICE_PREFIX, System.currentTimeMillis(), key.hashCode()); } } }
笔者水平有限,只能结合自己的理解对大致过程进行简单梳理,如有错误请指出,欢迎交流讨论