数据读取类 JdbcDataSource.java
ResultSetIterator 是JdbcDataSource的内部类,用于从数据库读取数据private class ResultSetIterator {
ResultSet resultSet;
Statement stmt = null;
List<String> colNames;
Iterator<Map<String, Object>> rSetIterator;
public ResultSetIterator(String query) {
try {
Connection c = getConnection();
stmt = c.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
stmt.setFetchSize(batchSize);
stmt.setMaxRows(maxRows);
LOG.debug("Executing SQL: " + query);
long start = System.currentTimeMillis();
if (stmt.execute(query)) {
resultSet = stmt.getResultSet();
}
LOG.trace("Time taken for sql :"
+ (System.currentTimeMillis() - start));
colNames = readFieldNames(resultSet.getMetaData());
} catch (Exception e) {
wrapAndThrow(SEVERE, e, "Unable to execute query: " + query);
}
if (resultSet == null) {
rSetIterator = new ArrayList<Map<String, Object>>().iterator();
return;
}
rSetIterator = new Iterator<Map<String, Object>>() {
public boolean hasNext() {
return hasnext();
}
public Map<String, Object> next() {
return getARow();
}
public void remove() {/* do nothing */
}
};
}
private Iterator<Map<String, Object>> getIterator() {
return rSetIterator;
}
private Map<String, Object> getARow() {
if (resultSet == null)
return null;
Map<String, Object> result = new HashMap<String, Object>();
for (String colName : colNames) {
try {
if (!convertType) {
// Use underlying database's type information
result.put(colName, resultSet.getObject(colName));
continue;
}
Integer type = fieldNameVsType.get(colName);
if (type == null)
type = Types.VARCHAR;
switch (type) {
case Types.INTEGER:
result.put(colName, resultSet.getInt(colName));
break;
case Types.FLOAT:
result.put(colName, resultSet.getFloat(colName));
break;
case Types.BIGINT:
result.put(colName, resultSet.getLong(colName));
break;
case Types.DOUBLE:
result.put(colName, resultSet.getDouble(colName));
break;
case Types.DATE:
result.put(colName, resultSet.getDate(colName));
break;
case Types.BOOLEAN:
result.put(colName, resultSet.getBoolean(colName));
break;
case Types.BLOB:
result.put(colName, resultSet.getBytes(colName));
break;
default:
result.put(colName, resultSet.getString(colName));
break;
}
} catch (SQLException e) {
logError("Error reading data ", e);
wrapAndThrow(SEVERE, e, "Error reading data from database");
}
}
return result;
}
private boolean hasnext() {
if (resultSet == null)
return false;
try {
if (resultSet.next()) {
return true;
} else {
close();
return false;
}
} catch (SQLException e) {
close();
wrapAndThrow(SEVERE,e);
return false;
}
}
private void close() {
try {
if (resultSet != null)
resultSet.close();
if (stmt != null)
stmt.close();
} catch (Exception e) {
logError("Exception while closing result set", e);
} finally {
resultSet = null;
stmt = null;
}
}
ResultSet resultSet;
Statement stmt = null;
List<String> colNames;
Iterator<Map<String, Object>> rSetIterator;
public ResultSetIterator(String query) {
try {
Connection c = getConnection();
stmt = c.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
stmt.setFetchSize(batchSize);
stmt.setMaxRows(maxRows);
LOG.debug("Executing SQL: " + query);
long start = System.currentTimeMillis();
if (stmt.execute(query)) {
resultSet = stmt.getResultSet();
}
LOG.trace("Time taken for sql :"
+ (System.currentTimeMillis() - start));
colNames = readFieldNames(resultSet.getMetaData());
} catch (Exception e) {
wrapAndThrow(SEVERE, e, "Unable to execute query: " + query);
}
if (resultSet == null) {
rSetIterator = new ArrayList<Map<String, Object>>().iterator();
return;
}
rSetIterator = new Iterator<Map<String, Object>>() {
public boolean hasNext() {
return hasnext();
}
public Map<String, Object> next() {
return getARow();
}
public void remove() {/* do nothing */
}
};
}
private Iterator<Map<String, Object>> getIterator() {
return rSetIterator;
}
private Map<String, Object> getARow() {
if (resultSet == null)
return null;
Map<String, Object> result = new HashMap<String, Object>();
for (String colName : colNames) {
try {
if (!convertType) {
// Use underlying database's type information
result.put(colName, resultSet.getObject(colName));
continue;
}
Integer type = fieldNameVsType.get(colName);
if (type == null)
type = Types.VARCHAR;
switch (type) {
case Types.INTEGER:
result.put(colName, resultSet.getInt(colName));
break;
case Types.FLOAT:
result.put(colName, resultSet.getFloat(colName));
break;
case Types.BIGINT:
result.put(colName, resultSet.getLong(colName));
break;
case Types.DOUBLE:
result.put(colName, resultSet.getDouble(colName));
break;
case Types.DATE:
result.put(colName, resultSet.getDate(colName));
break;
case Types.BOOLEAN:
result.put(colName, resultSet.getBoolean(colName));
break;
case Types.BLOB:
result.put(colName, resultSet.getBytes(colName));
break;
default:
result.put(colName, resultSet.getString(colName));
break;
}
} catch (SQLException e) {
logError("Error reading data ", e);
wrapAndThrow(SEVERE, e, "Error reading data from database");
}
}
return result;
}
private boolean hasnext() {
if (resultSet == null)
return false;
try {
if (resultSet.next()) {
return true;
} else {
close();
return false;
}
} catch (SQLException e) {
close();
wrapAndThrow(SEVERE,e);
return false;
}
}
private void close() {
try {
if (resultSet != null)
resultSet.close();
if (stmt != null)
stmt.close();
} catch (Exception e) {
logError("Exception while closing result set", e);
} finally {
resultSet = null;
stmt = null;
}
}
}
其中List<String> colNames是数据表的元信息,即数据表的字段信息,包括字段名、字段类型等信息
colNames = readFieldNames(resultSet.getMetaData());
private List<String> readFieldNames(ResultSetMetaData metaData)
throws SQLException {
List<String> colNames = new ArrayList<String>();
int count = metaData.getColumnCount();
for (int i = 0; i < count; i++) {
colNames.add(metaData.getColumnLabel(i + 1));
}
return colNames;
Iterator<Map<String, Object>> rSetIterator 为数据迭代器throws SQLException {
List<String> colNames = new ArrayList<String>();
int count = metaData.getColumnCount();
for (int i = 0; i < count; i++) {
colNames.add(metaData.getColumnLabel(i + 1));
}
return colNames;
}
rSetIterator = new Iterator<Map<String, Object>>() {
public boolean hasNext() {
return hasnext();
}
public Map<String, Object> next() {
return getARow();
}
public void remove() {/* do nothing */
}
};
return hasnext();
}
public Map<String, Object> next() {
return getARow();
}
public void remove() {/* do nothing */
}
};
从这里可以看出,solr自带的数据导入是采取 迭代器的方式导入数据的,防止数据表数据量过大的时候出现out of memery的异常
网上推荐的采取类似分页的方式 读取数据然后添加到solr索引库的方式个人感觉比较拙劣,当我们采取编程的方式从数据库读取数据添加到solr索引库的时候可以参考这种方式,采取原始的JDBC数据访问方式,有时间我再贴出来分享。