yl-backend/src/main/java/com/guwan/backend/util/BookContentUtil.java

129 lines
4.4 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package com.guwan.backend.util;
import com.guwan.backend.pojo.entity.BookContent;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class BookContentUtil {
// 通过 OkHttpClient 发起同步请求获取文件内容
public static String getTextUsingOkHttp(OkHttpClient client, Request request) throws IOException {
try (Response response = client.newCall(request).execute()) {
if (response.isSuccessful()) {
return response.body().string(); // 返回文件内容
} else {
throw new IOException("Unexpected code " + response);
}
}
}
public static ArrayList<BookContent> processContent(String content, String bookName) {
// 正则表达式,提取卷和节
String volumePattern = "第([一二三四五六七八九十]+)卷"; // 提取卷
String sectionPattern = "第([一二三四五六七八九十零百]+)节:(.*)"; // 提取节及其节名
// 提取卷
Pattern volumeRegex = Pattern.compile(volumePattern);
Matcher volumeMatcher = volumeRegex.matcher(content);
// 提取节
Pattern sectionRegex = Pattern.compile(sectionPattern);
Matcher sectionMatcher = sectionRegex.matcher(content);
// 列表来存储所有卷和节的内容
List<String> volumes = new ArrayList<>(); // 存储所有卷的标题
List<String> sections = new ArrayList<>(); // 存储所有节的标题
List<String> sectionContents = new ArrayList<>(); // 存储每节的正文内容
// 收集卷的信息
while (volumeMatcher.find()) {
String volume = "" + volumeMatcher.group(1) + "";
volumes.add(volume);
}
// 收集节的信息
while (sectionMatcher.find()) {
String sectionTitle = "" + sectionMatcher.group(1) + "节:" + sectionMatcher.group(2).trim(); // 这里去掉节名前后空格
sections.add(sectionTitle);
// 获取节的正文内容
int start = sectionMatcher.end(); // 获取节标题之后的位置
int end = content.length(); // 默认到文件末尾
// 查找下一个节的位置(即本节内容的结束位置)
Matcher nextSectionMatcher = sectionRegex.matcher(content);
if (nextSectionMatcher.find(start)) {
end = nextSectionMatcher.start();
}
// 获取当前节的正文内容
String sectionContent = content.substring(start, end).trim();
sectionContents.add(sectionContent);
}
// 标记是否是第一次匹配到“第一节”
boolean isFirstSection = true;
ArrayList<BookContent> bookContents = new ArrayList<>();
int sectionId = 1;
// 输出卷和节信息
for (int i = 0; i < volumes.size(); i++) {
// 输出卷的标题
// 输出该卷的每一节标题和正文内容
for (int j = 0; j < sections.size(); j++) {
// System.out.print(volumes.get(i));
String section = sections.get(j);
String sectionContent = sectionContents.get(j);
// 输出节标题
// System.out.println(" " + section);
// 输出节的正文内容
// System.out.println(" 正文: " + sectionContent);
// 如果是“第一节”,并且不是第一次出现,递增卷的索引
if (section.contains("第一节") && !isFirstSection) {
i++; // 不是第一次才递增
}
// 第一次匹配到“第一节”后标记为false
isFirstSection = false;
BookContent bookContent = new BookContent();
bookContent.setBookName(bookName);
bookContent.setVolume(volumes.get(i));
bookContent.setSection(section);
bookContent.setSectionContent(sectionContent);
bookContent.setSectionId(sectionId++);
System.out.println("bookContent = " + bookContent);
bookContents.add(bookContent);
}
}
return bookContents;
}
}