129 lines
4.4 KiB
Java
129 lines
4.4 KiB
Java
package com.guwan.backend.util;
|
||
|
||
import com.guwan.backend.pojo.entity.BookContent;
|
||
import okhttp3.OkHttpClient;
|
||
import okhttp3.Request;
|
||
import okhttp3.Response;
|
||
|
||
import java.io.IOException;
|
||
import java.util.ArrayList;
|
||
import java.util.List;
|
||
import java.util.regex.Matcher;
|
||
import java.util.regex.Pattern;
|
||
|
||
public class BookContentUtil {
|
||
|
||
|
||
|
||
// 通过 OkHttpClient 发起同步请求获取文件内容
|
||
public static String getTextUsingOkHttp(OkHttpClient client, Request request) throws IOException {
|
||
try (Response response = client.newCall(request).execute()) {
|
||
if (response.isSuccessful()) {
|
||
return response.body().string(); // 返回文件内容
|
||
} else {
|
||
throw new IOException("Unexpected code " + response);
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
|
||
public static ArrayList<BookContent> processContent(String content, String bookName) {
|
||
// 正则表达式,提取卷和节
|
||
String volumePattern = "第([一二三四五六七八九十]+)卷"; // 提取卷
|
||
String sectionPattern = "第([一二三四五六七八九十零百]+)节:(.*)"; // 提取节及其节名
|
||
|
||
// 提取卷
|
||
Pattern volumeRegex = Pattern.compile(volumePattern);
|
||
Matcher volumeMatcher = volumeRegex.matcher(content);
|
||
|
||
// 提取节
|
||
Pattern sectionRegex = Pattern.compile(sectionPattern);
|
||
Matcher sectionMatcher = sectionRegex.matcher(content);
|
||
|
||
// 列表来存储所有卷和节的内容
|
||
List<String> volumes = new ArrayList<>(); // 存储所有卷的标题
|
||
List<String> sections = new ArrayList<>(); // 存储所有节的标题
|
||
List<String> sectionContents = new ArrayList<>(); // 存储每节的正文内容
|
||
|
||
// 收集卷的信息
|
||
while (volumeMatcher.find()) {
|
||
String volume = "第" + volumeMatcher.group(1) + "卷";
|
||
volumes.add(volume);
|
||
}
|
||
|
||
// 收集节的信息
|
||
while (sectionMatcher.find()) {
|
||
String sectionTitle = "第" + sectionMatcher.group(1) + "节:" + sectionMatcher.group(2).trim(); // 这里去掉节名前后空格
|
||
sections.add(sectionTitle);
|
||
|
||
// 获取节的正文内容
|
||
int start = sectionMatcher.end(); // 获取节标题之后的位置
|
||
int end = content.length(); // 默认到文件末尾
|
||
|
||
// 查找下一个节的位置(即本节内容的结束位置)
|
||
Matcher nextSectionMatcher = sectionRegex.matcher(content);
|
||
if (nextSectionMatcher.find(start)) {
|
||
end = nextSectionMatcher.start();
|
||
}
|
||
|
||
// 获取当前节的正文内容
|
||
String sectionContent = content.substring(start, end).trim();
|
||
sectionContents.add(sectionContent);
|
||
}
|
||
|
||
// 标记是否是第一次匹配到“第一节”
|
||
boolean isFirstSection = true;
|
||
|
||
ArrayList<BookContent> bookContents = new ArrayList<>();
|
||
|
||
int sectionId = 1;
|
||
|
||
// 输出卷和节信息
|
||
for (int i = 0; i < volumes.size(); i++) {
|
||
// 输出卷的标题
|
||
|
||
// 输出该卷的每一节标题和正文内容
|
||
for (int j = 0; j < sections.size(); j++) {
|
||
|
||
// System.out.print(volumes.get(i));
|
||
String section = sections.get(j);
|
||
String sectionContent = sectionContents.get(j);
|
||
|
||
// 输出节标题
|
||
// System.out.println(" " + section);
|
||
|
||
// 输出节的正文内容
|
||
// System.out.println(" 正文: " + sectionContent);
|
||
|
||
// 如果是“第一节”,并且不是第一次出现,递增卷的索引
|
||
if (section.contains("第一节") && !isFirstSection) {
|
||
i++; // 不是第一次才递增
|
||
}
|
||
|
||
// 第一次匹配到“第一节”后,标记为false
|
||
isFirstSection = false;
|
||
|
||
|
||
BookContent bookContent = new BookContent();
|
||
bookContent.setBookName(bookName);
|
||
bookContent.setVolume(volumes.get(i));
|
||
bookContent.setSection(section);
|
||
bookContent.setSectionContent(sectionContent);
|
||
bookContent.setSectionId(sectionId++);
|
||
|
||
System.out.println("bookContent = " + bookContent);
|
||
|
||
bookContents.add(bookContent);
|
||
|
||
|
||
}
|
||
|
||
|
||
}
|
||
return bookContents;
|
||
}
|
||
|
||
|
||
}
|