HBase保存的各个字段意义解释
/×××××××××××××××××××××××××××××××××××××××××/
Author:xxx0624
HomePage:http://www.cnblogs.com/xxx0624/
/×××××××××××××××××××××××××××××××××××××××××/
nutch2.2.1集成HBase0.94.25, 可以查询nutch的conf文件中的gora-hbase-mapping.xml查看原文件
<gora-orm> <table name="webpage"> <family name="p" maxVersions="1"/> <!-- This can also have params like compression, bloom filters --> <family name="f" maxVersions="1"/> <family name="s" maxVersions="1"/> <family name="il" maxVersions="1"/> <family name="ol" maxVersions="1"/> <family name="h" maxVersions="1"/> <family name="mtdt" maxVersions="1"/> <family name="mk" maxVersions="1"/> </table> //name表示各个table的family. //比如: f表示下载, s表示评分, il表示链入地址, ol链出地址 & etc... <class table="webpage" keyClass="java.lang.String" name="org.apache.nutch.storage.WebPage"> <!-- fetch fields --> <field name="baseUrl" family="f" qualifier="bas"/>//源地址 <field name="status" family="f" qualifier="st"/> <field name="prevFetchTime" family="f" qualifier="pts"/> <field name="fetchTime" family="f" qualifier="ts"/>//下载时间 <field name="fetchInterval" family="f" qualifier="fi"/> <field name="retriesSinceFetch" family="f" qualifier="rsf"/> <field name="reprUrl" family="f" qualifier="rpr"/> <field name="content" family="f" qualifier="cnt"/>//下载的内容 <field name="contentType" family="f" qualifier="typ"/>//下载的type <field name="protocolStatus" family="f" qualifier="prot"/> <field name="modifiedTime" family="f" qualifier="mod"/> <field name="prevModifiedTime" family="f" qualifier="pmod"/> <field name="batchId" family="f" qualifier="bid"/> <!-- parse fields --> <field name="title" family="p" qualifier="t"/>//内容标题 <field name="text" family="p" qualifier="c"/> <field name="parseStatus" family="p" qualifier="st"/> <field name="signature" family="p" qualifier="sig"/> <field name="prevSignature" family="p" qualifier="psig"/> <!-- score fields --> <field name="score" family="s" qualifier="s"/> <field name="headers" family="h"/> <field name="inlinks" family="il"/>//链入地址 <field name="outlinks" family="ol"/>//链出地址 <field name="metadata" family="mtdt"/> <field name="markers" family="mk"/> </class> <table name="host"> <family name="mtdt" maxVersions="1"/> <family name="il" maxVersions="1"/> <family name="ol" maxVersions="1"/> </table> <class table="host" keyClass="java.lang.String" name="org.apache.nutch.storage.Host"> <field name="metadata" family="mtdt"/> <field name="inlinks" family="il"/> <field name="outlinks" family="ol"/> </class> </gora-orm>
keep moving...