package org.allenai.pdffigures2;

import org.allenai.common.Logging;
import org.allenai.common.Logging$logger$;
import org.allenai.common.Logging$loggerConfig$;
import org.allenai.pdffigures2.SectionTitleExtractor;
import org.apache.pdfbox.text.TextPosition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.MatchError;
import scala.Option;
import scala.Predef$;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.GenTraversable;
import scala.collection.IterableLike;
import scala.collection.LinearSeqOptimized;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.StringOps;
import scala.collection.immutable.StringOps$;
import scala.math.Numeric$IntIsIntegral$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.Tuple3Zipped$;
import scala.runtime.Tuple3Zipped$Ops$;
import scala.util.matching.Regex;

/* compiled from: SectionTitleExtractor.scala */
/* loaded from: input_file:org/allenai/pdffigures2/SectionTitleExtractor$.class */
public final class SectionTitleExtractor$ implements Logging {
    public static final SectionTitleExtractor$ MODULE$ = null;
    private final double org$allenai$pdffigures2$SectionTitleExtractor$$TextAlignmentTolerance;
    private final int MaxNonCapitalizedLargeWords;
    private final double PruneNonPrefixedSections;
    private final double MinSharedMargin;
    private final Regex NumberRegex;
    private final Regex LetterNumberRegex;
    private final Regex RomanNumeralsRegex;
    private final Regex AppendixRegex;
    private final Regex ListRegex;
    private final Seq<Regex> BlackList;
    private final Regex org$allenai$pdffigures2$SectionTitleExtractor$$allowNonPrefiex;
    private final Logger internalLogger;
    private volatile Logging$logger$ logger$module;
    private volatile Logging$loggerConfig$ loggerConfig$module;

    static {
        new SectionTitleExtractor$();
    }

    @Override // org.allenai.common.Logging
    public Logger internalLogger() {
        return this.internalLogger;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v5 */
    private Logging$logger$ logger$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (this.logger$module == null) {
                this.logger$module = new Logging$logger$(this);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.logger$module;
        }
    }

    @Override // org.allenai.common.Logging
    public Logging$logger$ logger() {
        return this.logger$module == null ? logger$lzycompute() : this.logger$module;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v0 */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v5 */
    private Logging$loggerConfig$ loggerConfig$lzycompute() {
        ?? r0 = this;
        synchronized (r0) {
            if (this.loggerConfig$module == null) {
                this.loggerConfig$module = new Logging$loggerConfig$(this);
            }
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
            r0 = r0;
            return this.loggerConfig$module;
        }
    }

    @Override // org.allenai.common.Logging
    public Logging$loggerConfig$ loggerConfig() {
        return this.loggerConfig$module == null ? loggerConfig$lzycompute() : this.loggerConfig$module;
    }

    @Override // org.allenai.common.Logging
    public void org$allenai$common$Logging$_setter_$internalLogger_$eq(Logger logger) {
        this.internalLogger = logger;
    }

    public double org$allenai$pdffigures2$SectionTitleExtractor$$TextAlignmentTolerance() {
        return this.org$allenai$pdffigures2$SectionTitleExtractor$$TextAlignmentTolerance;
    }

    private int MaxNonCapitalizedLargeWords() {
        return this.MaxNonCapitalizedLargeWords;
    }

    private double PruneNonPrefixedSections() {
        return this.PruneNonPrefixedSections;
    }

    private double MinSharedMargin() {
        return this.MinSharedMargin;
    }

    private Regex NumberRegex() {
        return this.NumberRegex;
    }

    private Regex LetterNumberRegex() {
        return this.LetterNumberRegex;
    }

    private Regex RomanNumeralsRegex() {
        return this.RomanNumeralsRegex;
    }

    private Regex AppendixRegex() {
        return this.AppendixRegex;
    }

    public boolean isPrefixed(Line line) {
        if (line.words().size() == 1) {
            return false;
        }
        String text = line.words().mo455head().text();
        return RegexWithTimeout.matcher(NumberRegex().pattern(), text).matches() || RegexWithTimeout.matcher(RomanNumeralsRegex().pattern(), text).matches() || RegexWithTimeout.matcher(LetterNumberRegex().pattern(), text).matches() || RegexWithTimeout.matcher(AppendixRegex().pattern(), text).matches();
    }

    public boolean isTitleStartText(Line line) {
        String text = line.text();
        return text.length() > 1 && (Character.isUpperCase(StringOps$.MODULE$.apply$extension(Predef$.MODULE$.augmentString(text), 0)) || isPrefixed(line));
    }

    public boolean isAlignedOrCentered(Box box, DocumentLayout documentLayout) {
        boolean z;
        boolean z2;
        if (documentLayout.standardWidthBucketed().isDefined()) {
            double xCenter = box.xCenter() - (BoxesRunTime.unboxToDouble(documentLayout.standardWidthBucketed().get()) / org$allenai$pdffigures2$SectionTitleExtractor$$TextAlignmentTolerance());
            z = BoxesRunTime.unboxToDouble(documentLayout.leftMargins().getOrElse(BoxesRunTime.boxToInteger((int) Math.ceil(xCenter)), new SectionTitleExtractor$$anonfun$1())) + BoxesRunTime.unboxToDouble(documentLayout.leftMargins().getOrElse(BoxesRunTime.boxToInteger((int) Math.floor(xCenter)), new SectionTitleExtractor$$anonfun$2())) > MinSharedMargin();
        } else {
            z = false;
        }
        boolean z3 = z;
        if (documentLayout.trustLeftMargin()) {
            double x1 = box.x1();
            z2 = BoxesRunTime.unboxToDouble(documentLayout.leftMargins().getOrElse(BoxesRunTime.boxToInteger((int) Math.ceil(x1)), new SectionTitleExtractor$$anonfun$3())) + BoxesRunTime.unboxToDouble(documentLayout.leftMargins().getOrElse(BoxesRunTime.boxToInteger((int) Math.floor(x1)), new SectionTitleExtractor$$anonfun$4())) > MinSharedMargin();
        } else {
            z2 = false;
        }
        return z2 || z3;
    }

    public boolean isTitleStyle(Line line, DocumentLayout documentLayout) {
        int unboxToInt = BoxesRunTime.unboxToInt(((TraversableOnce) line.words().map(new SectionTitleExtractor$$anonfun$6(), List$.MODULE$.canBuildFrom())).mo2196sum(Numeric$IntIsIntegral$.MODULE$));
        int count = ((TraversableOnce) line.words().flatMap(new SectionTitleExtractor$$anonfun$7(), List$.MODULE$.canBuildFrom())).count(new SectionTitleExtractor$$anonfun$8(documentLayout));
        boolean z = (documentLayout.standardFontSize().nonEmpty() && (BoxesRunTime.unboxToDouble(documentLayout.standardFontSize().get()) > ((double) 20) ? 1 : (BoxesRunTime.unboxToDouble(documentLayout.standardFontSize().get()) == ((double) 20) ? 0 : -1)) < 0) && ((TraversableOnce) line.words().flatMap(new SectionTitleExtractor$$anonfun$9(), List$.MODULE$.canBuildFrom())).count(new SectionTitleExtractor$$anonfun$10(documentLayout)) > unboxToInt / 2;
        List list = (List) ((List) ((TraversableLike) line.words().filter(new SectionTitleExtractor$$anonfun$11())).filter(new SectionTitleExtractor$$anonfun$12())).flatMap(new SectionTitleExtractor$$anonfun$13(), List$.MODULE$.canBuildFrom());
        return (list.nonEmpty() && ((LinearSeqOptimized) list.tail()).forall(new SectionTitleExtractor$$anonfun$14(list))) && ((!z && count > unboxToInt / 2) || (!new StringOps(Predef$.MODULE$.augmentString(line.text())).exists(new SectionTitleExtractor$$anonfun$15())));
    }

    public boolean org$allenai$pdffigures2$SectionTitleExtractor$$isNormalText(TextPosition textPosition) {
        if (textPosition.getUnicode().length() == 1) {
            if (BoxesRunTime.unboxToChar(new StringOps(Predef$.MODULE$.augmentString(textPosition.getUnicode())).mo455head()) >= 128) {
                String unicode = textPosition.getUnicode();
                if (unicode != null ? !unicode.equals("ﬁ") : "ﬁ" != 0) {
                }
            }
            return true;
        }
        return false;
    }

    public boolean org$allenai$pdffigures2$SectionTitleExtractor$$isEquation(Line line) {
        int count = ((TraversableOnce) line.words().flatMap(new SectionTitleExtractor$$anonfun$16(), List$.MODULE$.canBuildFrom())).count(new SectionTitleExtractor$$anonfun$17());
        return !isPrefixed(line) && count > 3 && ((double) count) > ((double) BoxesRunTime.unboxToInt(((TraversableOnce) line.words().map(new SectionTitleExtractor$$anonfun$18(), List$.MODULE$.canBuildFrom())).mo2196sum(Numeric$IntIsIntegral$.MODULE$))) * 0.4d;
    }

    private Regex ListRegex() {
        return this.ListRegex;
    }

    public boolean isList(Line line) {
        return line.words().size() > 1 && Character.isUpperCase(BoxesRunTime.unboxToChar(new StringOps(Predef$.MODULE$.augmentString(line.words().mo455head().text())).mo455head())) && ListRegex().findFirstIn(((Word) ((IterableLike) line.words().tail()).mo455head()).text()).nonEmpty();
    }

    private Seq<Regex> BlackList() {
        return this.BlackList;
    }

    public boolean org$allenai$pdffigures2$SectionTitleExtractor$$isCompleteTitle(SectionTitleExtractor.SectionTitle sectionTitle) {
        String text = sectionTitle.toParagraph().text();
        if (BlackList().exists(new SectionTitleExtractor$$anonfun$org$allenai$pdffigures2$SectionTitleExtractor$$isCompleteTitle$1(text))) {
            logger().trace(new SectionTitleExtractor$$anonfun$org$allenai$pdffigures2$SectionTitleExtractor$$isCompleteTitle$2(text));
            return false;
        }
        if (sectionTitle.lines().size() > 3) {
            return false;
        }
        if (sectionTitle.isPrefixed()) {
            return true;
        }
        List list = (List) ((TraversableLike) ((List) sectionTitle.lines().flatMap(new SectionTitleExtractor$$anonfun$19(), List$.MODULE$.canBuildFrom())).filter(new SectionTitleExtractor$$anonfun$20())).filter(new SectionTitleExtractor$$anonfun$21());
        return sectionTitle.isPrefixed() || list.size() <= 3 || list.count(new SectionTitleExtractor$$anonfun$22()) < MaxNonCapitalizedLargeWords();
    }

    public boolean org$allenai$pdffigures2$SectionTitleExtractor$$isBeneath(Box box, Box box2) {
        return box.horizontallyAligned(box2, 50.0d) && box.y2() - ((double) 5) < box2.y2();
    }

    public boolean org$allenai$pdffigures2$SectionTitleExtractor$$isFarFromPreviousLine(Line line, Option<Line> option, DocumentLayout documentLayout) {
        if (option.isDefined()) {
            return option.get().boundary().height() > ((double) 40) || line.boundary().y1() - option.get().boundary().y2() > documentLayout.medianLineSpacing() + 0.1d;
        }
        return true;
    }

    public boolean org$allenai$pdffigures2$SectionTitleExtractor$$isLineBeginningSection(Line line, SectionTitleExtractor.SectionTitle sectionTitle, DocumentLayout documentLayout) {
        return ((line.words().size() <= 3 && !Character.isUpperCase(BoxesRunTime.unboxToChar(new StringOps(Predef$.MODULE$.augmentString(line.words().mo455head().text())).mo455head()))) || BoxesRunTime.unboxToChar(new StringOps(Predef$.MODULE$.augmentString(sectionTitle.lines().mo454last().text())).mo454last()) == '-' || (!((sectionTitle.fontSize() > BoxesRunTime.unboxToDouble(documentLayout.standardFontSize().getOrElse(new SectionTitleExtractor$$anonfun$5(documentLayout))) ? 1 : (sectionTitle.fontSize() == BoxesRunTime.unboxToDouble(documentLayout.standardFontSize().getOrElse(new SectionTitleExtractor$$anonfun$5(documentLayout))) ? 0 : -1)) > 0) && (((line.boundary().y1() - sectionTitle.boundary().y2()) > documentLayout.medianLineSpacing() ? 1 : ((line.boundary().y1() - sectionTitle.boundary().y2()) == documentLayout.medianLineSpacing() ? 0 : -1)) < 0) && ((Math.abs(sectionTitle.boundary().x1() - line.boundary().x1()) > org$allenai$pdffigures2$SectionTitleExtractor$$TextAlignmentTolerance() ? 1 : (Math.abs(sectionTitle.boundary().x1() - line.boundary().x1()) == org$allenai$pdffigures2$SectionTitleExtractor$$TextAlignmentTolerance() ? 0 : -1)) < 0 && (Math.abs(sectionTitle.boundary().x2() - line.boundary().x2()) > org$allenai$pdffigures2$SectionTitleExtractor$$TextAlignmentTolerance() ? 1 : (Math.abs(sectionTitle.boundary().x2() - line.boundary().x2()) == org$allenai$pdffigures2$SectionTitleExtractor$$TextAlignmentTolerance() ? 0 : -1)) < 0))) ? false : true;
    }

    public Seq<PageWithClassifiedText> stripSectionTitlesFromTextPage(Seq<ClassifiedPage> seq, DocumentLayout documentLayout) {
        Tuple2<GenTraversable, GenTraversable> unzip = stripSectionTitlesFromSortedParagraphs((Seq) seq.map(new SectionTitleExtractor$$anonfun$28(), Seq$.MODULE$.canBuildFrom()), documentLayout).unzip(Predef$.MODULE$.$conforms());
        if (unzip == null) {
            throw new MatchError(unzip);
        }
        Tuple2 tuple2 = new Tuple2((Seq) unzip.mo2061_1(), (Seq) unzip.mo2060_2());
        return (Seq) Tuple3Zipped$.MODULE$.map$extension(Tuple3Zipped$Ops$.MODULE$.zipped$extension(Predef$.MODULE$.tuple3ToZippedOps(new Tuple3(seq, (Seq) tuple2.mo2061_1(), (Seq) tuple2.mo2060_2())), Predef$.MODULE$.$conforms(), Predef$.MODULE$.$conforms(), Predef$.MODULE$.$conforms()), new SectionTitleExtractor$$anonfun$stripSectionTitlesFromTextPage$1(), Seq$.MODULE$.canBuildFrom());
    }

    public Seq<Tuple2<Seq<Paragraph>, Seq<Paragraph>>> stripSectionTitles(Seq<Seq<Paragraph>> seq, DocumentLayout documentLayout) {
        return stripSectionTitlesFromSortedParagraphs((Seq) seq.map(new SectionTitleExtractor$$anonfun$stripSectionTitles$1(), Seq$.MODULE$.canBuildFrom()), documentLayout);
    }

    public Regex org$allenai$pdffigures2$SectionTitleExtractor$$allowNonPrefiex() {
        return this.org$allenai$pdffigures2$SectionTitleExtractor$$allowNonPrefiex;
    }

    private Seq<Seq<SectionTitleExtractor.SectionTitle>> cleanPrefixedSections(Seq<Seq<SectionTitleExtractor.SectionTitle>> seq) {
        int unboxToInt = BoxesRunTime.unboxToInt(((TraversableOnce) seq.map(new SectionTitleExtractor$$anonfun$29(), Seq$.MODULE$.canBuildFrom())).mo2196sum(Numeric$IntIsIntegral$.MODULE$));
        if (unboxToInt > 3 && ((TraversableOnce) seq.flatten2(Predef$.MODULE$.$conforms())).count(new SectionTitleExtractor$$anonfun$30()) / unboxToInt > PruneNonPrefixedSections()) {
            logger().debug(new SectionTitleExtractor$$anonfun$cleanPrefixedSections$1());
            return (Seq) seq.map(new SectionTitleExtractor$$anonfun$cleanPrefixedSections$2(), Seq$.MODULE$.canBuildFrom());
        }
        return seq;
    }

    private Seq<Tuple2<Seq<Paragraph>, Seq<Paragraph>>> stripSectionTitlesFromSortedParagraphs(Seq<Seq<Paragraph>> seq, DocumentLayout documentLayout) {
        return (Seq) ((TraversableLike) seq.zip(cleanPrefixedSections((Seq) seq.map(new SectionTitleExtractor$$anonfun$31(documentLayout), Seq$.MODULE$.canBuildFrom())), Seq$.MODULE$.canBuildFrom())).map(new SectionTitleExtractor$$anonfun$stripSectionTitlesFromSortedParagraphs$1(), Seq$.MODULE$.canBuildFrom());
    }

    private SectionTitleExtractor$() {
        MODULE$ = this;
        org$allenai$common$Logging$_setter_$internalLogger_$eq(LoggerFactory.getLogger(getClass()));
        this.org$allenai$pdffigures2$SectionTitleExtractor$$TextAlignmentTolerance = 2.0d;
        this.MaxNonCapitalizedLargeWords = 2;
        this.PruneNonPrefixedSections = 0.7d;
        this.MinSharedMargin = 0.1d;
        this.NumberRegex = new StringOps(Predef$.MODULE$.augmentString("^[1-9][0-9]*(.[1-9][0-9]*)*.?$")).r();
        this.LetterNumberRegex = new StringOps(Predef$.MODULE$.augmentString("^[A-Z](.|[1-9]*.?)$")).r();
        this.RomanNumeralsRegex = new StringOps(Predef$.MODULE$.augmentString("^[IVX]+.?$")).r();
        this.AppendixRegex = new StringOps(Predef$.MODULE$.augmentString("^(?i)appendix.?$")).r();
        this.ListRegex = new StringOps(Predef$.MODULE$.augmentString("^([1-9][0-9]*|[IVX]+)(.|:)?")).r();
        this.BlackList = (Seq) Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Regex[]{new StringOps(Predef$.MODULE$.augmentString("\\b[wW]e\\b")).r(), new StringOps(Predef$.MODULE$.augmentString("^Proceedings of")).r()}));
        this.org$allenai$pdffigures2$SectionTitleExtractor$$allowNonPrefiex = new StringOps(Predef$.MODULE$.augmentString("(?i)^(references|acknowledge?ments?|bibliography)$")).r();
    }
}
