/** * @author hao.wei */ @Service public class MissingHandleBizImpl implements MissingHandleBiz { private static final Logger logger = LoggerFactory.getLogger(MissingHandleBizImpl.class); /** 缺失值用该属性的平均值填充*/ @Override public Instances missingValuesFilledWithAvg(Instances instances, String incompatible) { try { // 属性个数(列) int dim = instances.numAttributes(); // 实例个数(行) int num = instances.numInstances(); logger.info("开始将平均值填充入缺失值..."); double[] meanV = new double[dim]; for (int line = 0; line < meanV.length; line++) { // 第i列平均值 meanV[line] = 0; // 实例个数 int count = 0; for (int row = 0; row < num; row++) { // 计算第i列平均值(缺失值 和 不合条件的值除外) if (!instances.instance(row).isMissing(line) && !instances.instance(row).toString(line).trim().contains(incompatible)) { meanV[line] += instances.instance(row).value(line); count++; } } meanV[line] = meanV[line] / count; logger.info("属性[{}]的平均值为[{}]", instances.attribute(line).name(), meanV[line]); for (int row = 0; row < num; row++) { // 平均值填充缺失值 和 不符合条件的值 if (instances.instance(row).isMissing(line) || instances.instance(row).toString(line).contains(incompatible)) { instances.instance(row).setValue(line, meanV[line]); } } } } catch (Exception e) { logger.error("将平均值填充入缺失值发生系统异常,错误信息:", e); } logger.info("平均值填充如缺失值结束..."); return instances; } /** 移除掉包含特殊值的属性的实例*/ @Override public Instances removeMismatchConditionData(Instances instances, String attribute, String incompatible) { try { logger.info("删除[{}]属性包含[{}]的实例", attribute, incompatible); // 属性个数(列) int dim = instances.numAttributes(); // 实例个数(行) int num = instances.numInstances(); for (int i = 0; i < dim; i++) { // 属性名称和须要处理的属性名相同 if (instances.attribute(i).name().equals(attribute)) { for (int j = 0; j < num; j++) { // 实例的该属性值包含不合条件值 删除该条实例(行) if (instances.instance(j).isMissing(i)|| instances.instance(j).toString(i).contains(incompatible)) { logger.info("删除的实例属性值为[{}]", instances.instance(j).toStringNoWeight()); instances.remove(j); j--; num--; } } } } } catch (Exception e) { logger.error("删除[{}]属性包含[{}]的实例发生系统异常,错误信息[{}]", attribute, incompatible, e); } return instances; } }