Kaynağa Gözat

搞定指标管理这一块哈

wukai 2 ay önce
ebeveyn
işleme
768df8c3a7

+ 6 - 0
jjt-biz/pom.xml

@@ -30,6 +30,12 @@
             <version>21.3.0</version>
             <scope>runtime</scope>
         </dependency>
+        <!--    数学函数库    -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-math3</artifactId>
+            <version>3.6.1</version>
+        </dependency>
 
         <dependency>
             <groupId>com.deepoove</groupId>

+ 43 - 0
jjt-biz/src/main/java/com/jjt/biz/controller/RipaMetricsController.java

@@ -3,12 +3,15 @@ package com.jjt.biz.controller;
 import com.jjt.biz.domain.RipaMetrics;
 import com.jjt.biz.service.IRipaMetricsService;
 import com.jjt.biz.vo.ObjMetrics;
+import com.jjt.biz.vo.ObjMetricsData;
 import com.jjt.common.annotation.Log;
 import com.jjt.common.core.controller.BaseController;
 import com.jjt.common.core.domain.AjaxResult;
 import com.jjt.common.core.page.TableDataInfo;
 import com.jjt.common.enums.BusinessType;
 import com.jjt.common.utils.poi.ExcelUtil;
+import com.jjt.utils.NormalityUtil;
+import com.jjt.utils.VarianceUtil;
 import io.swagger.annotations.Api;
 import io.swagger.annotations.ApiOperation;
 import org.springframework.web.bind.annotation.*;
@@ -77,6 +80,46 @@ public class RipaMetricsController extends BaseController {
         return success(ripaMetricsService.selectRipaMetricsByAutoId(autoId));
     }
 
+    @ApiOperation("数据校验")
+    @GetMapping(value = "/validate")
+
+    public AjaxResult validate(Long autoId) {
+        RipaMetrics ripaMetrics = ripaMetricsService.selectRipaMetricsByAutoId(autoId);
+        List<ObjMetricsData> dataList = ripaMetricsService.selectDataList(ripaMetrics.getObjMetricsId(), 1000L);
+        if (dataList.size() < 100) {
+            return error("数据量过少,无法校验;数据记录数:" + dataList.size());
+        }
+        double[] normalData = new double[dataList.size()];
+        for (int i = 0; i < dataList.size(); i++) {
+            normalData[i] = dataList.get(i).getData();
+        }
+        ripaMetrics.setNtr(NormalityUtil.comprehensiveNormalityUtil(normalData, 0.05));
+
+        // 将dataList平均分成两个double数组
+        int totalSize = dataList.size();
+        int halfSize = totalSize / 2;
+
+        // 如果是奇数,去除最后一条记录
+        if (totalSize % 2 != 0) {
+            totalSize -= 1;
+            halfSize = totalSize / 2;
+        }
+        double[] firstHalf = new double[halfSize];
+        double[] secondHalf = new double[halfSize];
+
+        for (int i = 0; i < halfSize; i++) {
+            firstHalf[i] = dataList.get(i).getData();
+            secondHalf[i] = dataList.get(i + halfSize).getData();
+        }
+        double fPValue = VarianceUtil.fTestForVariance(firstHalf, secondHalf);
+        if (fPValue > 0.05) {
+            ripaMetrics.setHov("方差齐性");
+        } else {
+            ripaMetrics.setHov("方差不齐");
+        }
+        return success(ripaMetrics);
+    }
+
     /**
      * 新增模型指标配置
      */

+ 13 - 0
jjt-biz/src/main/java/com/jjt/biz/mapper/RipaMetricsMapper.java

@@ -3,8 +3,10 @@ package com.jjt.biz.mapper;
 import com.baomidou.mybatisplus.core.mapper.BaseMapper;
 import com.jjt.biz.domain.RipaMetrics;
 import com.jjt.biz.vo.ObjMetrics;
+import com.jjt.biz.vo.ObjMetricsData;
 import com.jjt.common.annotation.DataSource;
 import com.jjt.common.enums.DataSourceType;
+import org.apache.ibatis.annotations.Param;
 
 import java.util.List;
 
@@ -65,9 +67,20 @@ public interface RipaMetricsMapper extends BaseMapper<RipaMetrics> {
 
     /**
      * 查询模型指标配置列表
+     *
      * @param objMetrics 指标模型
      * @return 模型指标配置集合
      */
     @DataSource(value = DataSourceType.SLAVE)
     List<ObjMetrics> selectRiskMetricsList(ObjMetrics objMetrics);
+
+    /**
+     * 查询数据列表
+     *
+     * @param objMetricsId objMetricsId
+     * @param limit        数据记录数
+     * @return 集合
+     */
+    @DataSource(value = DataSourceType.SLAVE)
+    List<ObjMetricsData> selectDataList(@Param("id") Long objMetricsId, @Param("limit") Long limit);
 }

+ 13 - 2
jjt-biz/src/main/java/com/jjt/biz/service/IRipaMetricsService.java

@@ -2,9 +2,9 @@ package com.jjt.biz.service;
 
 import com.jjt.biz.domain.RipaMetrics;
 import com.jjt.biz.vo.ObjMetrics;
+import com.jjt.biz.vo.ObjMetricsData;
 
 import java.util.List;
-import java.util.Map;
 
 /**
  * 模型指标配置Service接口
@@ -63,15 +63,26 @@ public interface IRipaMetricsService {
 
     /**
      * 查询模型指标配置列表
-     *@param objMetrics 指标模型
+     *
+     * @param objMetrics 指标模型
      * @return 模型指标配置集合
      */
     List<ObjMetrics> selectRiskMetricsList(ObjMetrics objMetrics);
 
     /**
      * 批量插入
+     *
      * @param metricsList 列表
      * @return 结果
      */
     void insertRipaMetricsList(List<RipaMetrics> metricsList);
+
+    /**
+     * 查询数据列表
+     *
+     * @param objMetricsId 指标ID
+     * @param limit 数据记录数
+     * @return 集合
+     */
+    List<ObjMetricsData> selectDataList(Long objMetricsId, Long limit);
 }

+ 12 - 0
jjt-biz/src/main/java/com/jjt/biz/service/impl/RipaMetricsServiceImpl.java

@@ -4,6 +4,7 @@ import java.util.List;
 import java.util.Map;
 
 import com.jjt.biz.vo.ObjMetrics;
+import com.jjt.biz.vo.ObjMetricsData;
 import com.jjt.common.utils.DateUtils;
 import org.apache.ibatis.session.ExecutorType;
 import org.apache.ibatis.session.SqlSession;
@@ -121,4 +122,15 @@ public class RipaMetricsServiceImpl implements IRipaMetricsService {
             }
         }
     }
+
+    /**
+     * 查询数据列表
+     *@param objMetricsId 指标ID
+     * @param limit 数据记录数
+     * @return 集合
+     */
+    @Override
+    public List<ObjMetricsData> selectDataList(Long objMetricsId,Long limit) {
+        return ripaMetricsMapper.selectDataList(objMetricsId,limit);
+    }
 }

+ 15 - 0
jjt-biz/src/main/java/com/jjt/biz/vo/ObjMetricsData.java

@@ -0,0 +1,15 @@
+package com.jjt.biz.vo;
+
+import lombok.Data;
+
+/**
+ * ObjMetrics$
+ *
+ * @author wukai
+ * @date 2025/10/23 00:16
+ */
+@Data
+public class ObjMetricsData {
+    private Long objMetricsId;
+    private Double data;
+}

+ 158 - 0
jjt-biz/src/main/java/com/jjt/utils/NormalityUtil.java

@@ -0,0 +1,158 @@
+package com.jjt.utils;
+
+import org.apache.commons.math3.distribution.NormalDistribution;
+import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest;
+
+import java.util.Arrays;
+
+/**
+ * 正态性检验工具类
+ *
+ * @author wukai
+ * @date 2025-10-23 15:18:50
+ */
+public class NormalityUtil {
+
+    /**
+     * Shapiro-Wilk正态性检验(需要实现或使用其他库)
+     * 这里使用Apache Commons Math的替代方法
+     */
+
+    /**
+     * Kolmogorov-Smirnov正态性检验
+     *
+     * @param data 待检验数据
+     * @return p-value
+     */
+    public static double kolmogorovSmirnovTest(double[] data) {
+        KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
+
+        // 计算数据的均值和标准差
+        DescriptiveStatistics stats = new DescriptiveStatistics(data);
+        double mean = stats.getMean();
+        double std = stats.getStandardDeviation();
+
+        // 创建正态分布对象
+        NormalDistribution normalDist = new NormalDistribution(mean, std);
+
+        // 执行KS检验
+        return ksTest.kolmogorovSmirnovTest(normalDist, data);
+    }
+
+    /**
+     * Anderson-Darling正态性检验
+     *
+     * @param data 待检验数据
+     * @return 检验统计量
+     */
+    public static double andersonDarlingTest(double[] data) {
+        double[] sortedData = data.clone();
+        Arrays.sort(sortedData);
+
+        int n = sortedData.length;
+        DescriptiveStatistics stats = new DescriptiveStatistics(data);
+        double mean = stats.getMean();
+        double std = stats.getStandardDeviation();
+
+        double sum = 0.0;
+        for (int i = 0; i < n; i++) {
+            double z = (sortedData[i] - mean) / std;
+            double f = cumulativeNormalDistribution(z);
+            double term = (2 * (i + 1) - 1) * (Math.log(f) + Math.log(1 - cumulativeNormalDistribution(sortedData[n - 1 - i] - mean) / std));
+            sum += term;
+        }
+
+        return -n - sum / n;
+    }
+
+    /**
+     * 累积正态分布函数
+     */
+    private static double cumulativeNormalDistribution(double x) {
+        NormalDistribution normal = new NormalDistribution();
+        return normal.cumulativeProbability(x);
+    }
+
+    /**
+     * 综合正态性检验
+     *
+     * @param data  待检验数据
+     * @param alpha 显著性水平
+     */
+    public static String comprehensiveNormalityUtil(double[] data, double alpha) {
+        System.out.println("=== 正态性检验结果 ===");
+        System.out.printf("样本数量: %d\n", data.length);
+        System.out.printf("均值: %.4f\n", new DescriptiveStatistics(data).getMean());
+        System.out.printf("标准差: %.4f\n", new DescriptiveStatistics(data).getStandardDeviation());
+
+        // KS检验
+        double ksPValue = kolmogorovSmirnovTest(data);
+        System.out.printf("Kolmogorov-Smirnov检验 p-value: %.6f\n", ksPValue);
+        System.out.printf("KS检验结果: %s\n", ksPValue > alpha ? "符合正态分布" : "不符合正态分布");
+
+        // Anderson-Darling检验
+        double adStatistic = andersonDarlingTest(data);
+        System.out.printf("Anderson-Darling检验统计量: %.6f\n", adStatistic);
+
+        // 使用Apache Commons Math的检验工具
+        try {
+            // 为以下内容(假设采用KS检验代替):
+            double shapiroWilkPValue = kolmogorovSmirnovTest(data);
+            System.out.printf("模拟 Shapiro-Wilk 检验 p-value: %.6f\n", shapiroWilkPValue);
+            System.out.printf("检验结果: %s\n", shapiroWilkPValue > alpha ? "符合正态分布" : "不符合正态分布");
+            return shapiroWilkPValue > alpha ? "符合正态分布" : "不符合正态分布";
+        } catch (Exception e) {
+            e.printStackTrace();
+            return "校验失败";
+        }
+    }
+
+    /**
+     * 生成正态分布测试数据
+     */
+    public static double[] generateNormalData(int size, double mean, double std) {
+        double[] data = new double[size];
+        NormalDistribution normal = new NormalDistribution(mean, std);
+        for (int i = 0; i < size; i++) {
+            data[i] = normal.sample();
+        }
+        return data;
+    }
+
+    /**
+     * 生成均匀分布测试数据(非正态)
+     */
+    public static double[] generateUniformData(int size, double min, double max) {
+        double[] data = new double[size];
+        for (int i = 0; i < size; i++) {
+            data[i] = min + Math.random() * (max - min);
+        }
+        return data;
+    }
+
+
+    public static void main(String[] args) {
+        // 生成测试数据
+        double[] normalData = NormalityUtil.generateNormalData(100, 0, 1);
+        double[] uniformData = NormalityUtil.generateUniformData(100, 0, 1);
+
+        // 显著性水平
+        double alpha = 0.05;
+
+        System.out.println("正态分布数据检验:");
+        NormalityUtil.comprehensiveNormalityUtil(normalData, alpha);
+
+        System.out.println("\n均匀分布数据检验:");
+        NormalityUtil.comprehensiveNormalityUtil(uniformData, alpha);
+
+        // 使用真实数据示例
+        double[] sampleData = {
+                1.2, 2.3, 0.8, 1.5, 2.1, 1.8, 0.9, 1.3, 2.0, 1.7,
+                1.1, 2.2, 1.6, 0.7, 1.9, 1.4, 2.4, 1.0, 0.6, 2.5
+        };
+
+        System.out.println("\n真实数据检验:");
+        NormalityUtil.comprehensiveNormalityUtil(sampleData, alpha);
+    }
+}

+ 64 - 0
jjt-biz/src/main/java/com/jjt/utils/VarianceTestExample.java

@@ -0,0 +1,64 @@
+package com.jjt.utils;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * 方差齐性校验工具
+ *
+ * @author wukai
+ */
+public class VarianceTestExample {
+    public static void main(String[] args) {
+        double alpha = 0.05;
+
+        // 生成测试数据
+        double[] sample1 = generateNormalData(30, 10, 2);  // 均值10,标准差2
+        double[] sample2 = generateNormalData(30, 12, 2);  // 均值12,标准差2
+        double[] sample3 = generateNormalData(30, 11, 3);  // 均值11,标准差3
+
+        System.out.println("=== 方差齐性检验 ===");
+
+        // F检验(两个样本)
+        System.out.println("\n1. F检验 - 样本1 vs 样本2:");
+        double fPValue = VarianceUtil.fTestForVariance(sample1, sample2);
+        System.out.printf("F检验 p-value: %.6f\n", fPValue);
+        System.out.printf("方差齐性: %s\n", fPValue > alpha ? "是" : "否");
+
+        // Levene检验(多个样本)
+        System.out.println("\n2. Levene检验 - 三个样本:");
+        List<double[]> samples = Arrays.asList(sample1, sample2, sample3);
+        double levenePValue = VarianceUtil.leveneTest(samples);
+        System.out.printf("Levene检验 p-value: %.6f\n", levenePValue);
+        System.out.printf("方差齐性: %s\n", levenePValue > alpha ? "是" : "否");
+
+        // Bartlett检验
+        System.out.println("\n3. Bartlett检验 - 三个样本:");
+        double bartlettPValue = VarianceUtil.bartlettTest(samples);
+        System.out.printf("Bartlett检验 p-value: %.6f\n", bartlettPValue);
+        System.out.printf("方差齐性: %s\n", bartlettPValue > alpha ? "是" : "否");
+
+        // 方差分析(ANOVA)
+
+        // 方差齐性检验建议
+        System.out.println("\n=== 检验方法建议 ===");
+        System.out.println("• F检验: 适用于两个样本的方差比较");
+        System.out.println("• Levene检验: 对偏离正态性不敏感,推荐使用");
+        System.out.println("• Bartlett检验: 要求数据正态分布,但检验功效较高");
+    }
+
+    /**
+     * 生成正态分布数据
+     */
+    private static double[] generateNormalData(int size, double mean, double std) {
+        double[] data = new double[size];
+        for (int i = 0; i < size; i++) {
+            // 简单Box-Muller变换生成正态分布数据
+            double u1 = Math.random();
+            double u2 = Math.random();
+            double z0 = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
+            data[i] = mean + std * z0;
+        }
+        return data;
+    }
+}

+ 165 - 0
jjt-biz/src/main/java/com/jjt/utils/VarianceUtil.java

@@ -0,0 +1,165 @@
+package com.jjt.utils;
+
+import org.apache.commons.math3.distribution.FDistribution;
+import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * 方差检验工具类
+ * @author wukai
+ * @date 2025-10-23 15:15:03
+ */
+public class VarianceUtil {
+
+    /**
+     * F检验 - 两个样本的方差齐性检验
+     *
+     * @param sample1 样本1数据
+     * @param sample2 样本2数据
+     * @return p-value
+     */
+    public static double fTestForVariance(double[] sample1, double[] sample2) {
+        DescriptiveStatistics stats1 = new DescriptiveStatistics(sample1);
+        DescriptiveStatistics stats2 = new DescriptiveStatistics(sample2);
+
+        double var1 = stats1.getVariance();
+        double var2 = stats2.getVariance();
+
+        // 确保较大的方差在分子位置
+        double fStatistic;
+        int df1, df2;
+
+        if (var1 >= var2) {
+            fStatistic = var1 / var2;
+            df1 = sample1.length - 1;
+            df2 = sample2.length - 1;
+        } else {
+            fStatistic = var2 / var1;
+            df1 = sample2.length - 1;
+            df2 = sample1.length - 1;
+        }
+
+        // 计算p-value (双尾检验)
+        FDistribution fDist = new FDistribution(df1, df2);
+        double pValue = 2 * (1 - fDist.cumulativeProbability(fStatistic));
+
+        return pValue;
+    }
+
+    /**
+     * Levene检验 - 多个样本的方差齐性检验
+     *
+     * @param samples 多个样本数据
+     * @return p-value
+     */
+    public static double leveneTest(List<double[]> samples) {
+        int k = samples.size(); // 组数
+        if (k < 2) {
+            throw new IllegalArgumentException("至少需要2个样本组");
+        }
+
+        // 计算总样本数
+        int totalN = samples.stream().mapToInt(arr -> arr.length).sum();
+
+        // 计算每组的均值
+        double[] groupMeans = new double[k];
+        for (int i = 0; i < k; i++) {
+            groupMeans[i] = new DescriptiveStatistics(samples.get(i)).getMean();
+        }
+
+        // 计算每个观测值与组均值的绝对偏差
+        List<double[]> deviations = new ArrayList<>();
+        double grandMean = 0;
+        int count = 0;
+
+        for (int i = 0; i < k; i++) {
+            double[] sample = samples.get(i);
+            double[] dev = new double[sample.length];
+            for (int j = 0; j < sample.length; j++) {
+                dev[j] = Math.abs(sample[j] - groupMeans[i]);
+                grandMean += dev[j];
+                count++;
+            }
+            deviations.add(dev);
+        }
+        grandMean /= count;
+
+        // 计算组间平方和
+        double ssBetween = 0;
+        for (int i = 0; i < k; i++) {
+            double groupDevMean = new DescriptiveStatistics(deviations.get(i)).getMean();
+            int n_i = samples.get(i).length;
+            ssBetween += n_i * Math.pow(groupDevMean - grandMean, 2);
+        }
+
+        // 计算组内平方和
+        double ssWithin = 0;
+        for (int i = 0; i < k; i++) {
+            double[] dev = deviations.get(i);
+            double groupDevMean = new DescriptiveStatistics(dev).getMean();
+            for (double d : dev) {
+                ssWithin += Math.pow(d - groupDevMean, 2);
+            }
+        }
+
+        // 计算F统计量
+        double msBetween = ssBetween / (k - 1);
+        double msWithin = ssWithin / (totalN - k);
+        double fStatistic = msBetween / msWithin;
+
+        // 计算p-value
+        FDistribution fDist = new FDistribution(k - 1, totalN - k);
+        double pValue = 1 - fDist.cumulativeProbability(fStatistic);
+
+        return pValue;
+    }
+
+    /**
+     * Bartlett检验 - 方差齐性检验(要求数据正态分布)
+     *
+     * @param samples 多个样本数据
+     * @return p-value
+     */
+    public static double bartlettTest(List<double[]> samples) {
+        int k = samples.size();
+        if (k < 2) {
+            throw new IllegalArgumentException("至少需要2个样本组");
+        }
+
+        int totalN = samples.stream().mapToInt(arr -> arr.length).sum();
+
+        // 计算合并方差
+        double pooledVariance = 0;
+        double[] variances = new double[k];
+        int[] sampleSizes = new int[k];
+
+        for (int i = 0; i < k; i++) {
+            sampleSizes[i] = samples.get(i).length;
+            variances[i] = new DescriptiveStatistics(samples.get(i)).getVariance();
+            pooledVariance += (sampleSizes[i] - 1) * variances[i];
+        }
+        pooledVariance /= (totalN - k);
+
+        // 计算统计量
+        double numerator = (totalN - k) * Math.log(pooledVariance) -
+                Arrays.stream(sampleSizes)
+                        .mapToDouble(n -> (n - 1) * Math.log(variances[samples.indexOf(samples.get(Arrays.binarySearch(sampleSizes, n)))]))
+                        .sum();
+
+        double denominator = 1 + (1.0 / (3 * (k - 1))) *
+                (Arrays.stream(sampleSizes)
+                        .mapToDouble(n -> 1.0 / (n - 1))
+                        .sum() - 1.0 / (totalN - k));
+
+        double chiSquared = numerator / denominator;
+
+        // 使用卡方分布计算p-value
+        org.apache.commons.math3.distribution.ChiSquaredDistribution chiDist =
+                new org.apache.commons.math3.distribution.ChiSquaredDistribution(k - 1);
+
+        return 1 - chiDist.cumulativeProbability(chiSquared);
+    }
+}

+ 6 - 0
jjt-biz/src/main/resources/mapper/biz/RipaMetricsMapper.xml

@@ -110,6 +110,12 @@
             </if>
         </where>
     </select>
+    <select id="selectDataList" resultType="com.jjt.biz.vo.ObjMetricsData">
+        SELECT obj_metrics_id, d_value data
+        FROM biz_obj_metrics_data
+        WHERE obj_metrics_id = #{id}
+        ORDER BY create_time DESC LIMIT #{limit}
+    </select>
 
     <insert id="insertRipaMetrics" parameterType="RipaMetrics" useGeneratedKeys="true"
             keyProperty="autoId">