(language-processing) Add maximum length limit for text input in SentenceExtractor
Added a new constant, MAX_TEXT_LENGTH, to the SentenceExtractor class. If the length of the text input exceeds this limit, the text is truncated to fit within the limit. This modification is designed to prevent excessive resource usage for unusually long text inputs.
This commit is contained in:
parent
32436d099c
commit
0806aa6dfe
@ -96,7 +96,7 @@ public class SentenceExtractor {
|
||||
title = doc.getElementsByTag("h2").text();
|
||||
}
|
||||
|
||||
if (title.trim().length() < 3 && textSentences.length > 0) {
|
||||
if (title.trim().length() < 3) {
|
||||
for (DocumentSentence textSentence : textSentences) {
|
||||
if (textSentence.length() > 0) {
|
||||
title = textSentence.originalSentence.toLowerCase();
|
||||
@ -138,10 +138,6 @@ public class SentenceExtractor {
|
||||
|
||||
String textNormalizedSpaces = SentenceExtractorStringUtils.normalizeSpaces(text);
|
||||
|
||||
if (text.length() > MAX_TEXT_LENGTH) {
|
||||
textNormalizedSpaces = textNormalizedSpaces.substring(0, MAX_TEXT_LENGTH);
|
||||
}
|
||||
|
||||
try {
|
||||
sentences = sentenceDetector.sentDetect(textNormalizedSpaces);
|
||||
}
|
||||
@ -221,7 +217,12 @@ public class SentenceExtractor {
|
||||
public String asText(Document dc) {
|
||||
String text = dc.getElementsByTag("body").text();
|
||||
|
||||
return text.substring(0, (int) (text.length()*0.95));
|
||||
if (text.length() > MAX_TEXT_LENGTH) {
|
||||
return text.substring(0, MAX_TEXT_LENGTH);
|
||||
}
|
||||
else {
|
||||
return text.substring(0, (int) (text.length() * 0.95));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user