oracle
diff --git a/‎LICENSE-mandoc.txt
Lines changed: 56 additions & 0 deletions b/‎LICENSE-mandoc.txt
Lines changed: 56 additions & 0 deletions
diff --git a/‎OpenGrok
Lines changed: 3 additions & 0 deletions b/‎OpenGrok
Lines changed: 3 additions & 0 deletions
diff --git a/‎build.xml
Lines changed: 2 additions & 0 deletions b/‎build.xml
Lines changed: 2 additions & 0 deletions
diff --git a/‎opengrok-indexer/pom.xml
Lines changed: 7 additions & 0 deletions b/‎opengrok-indexer/pom.xml
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java
Lines changed: 7 additions & 2 deletions b/‎src/org/opensolaris/opengrok/analysis/AnalyzerGuru.java
Lines changed: 7 additions & 2 deletions
diff --git a/‎src/org/opensolaris/opengrok/analysis/FileAnalyzerFactory.java
Lines changed: 2 additions & 1 deletion b/‎src/org/opensolaris/opengrok/analysis/FileAnalyzerFactory.java
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/org/opensolaris/opengrok/analysis/document/DocumentMatcher.java
Lines changed: 213 additions & 0 deletions b/‎src/org/opensolaris/opengrok/analysis/document/DocumentMatcher.java
Lines changed: 213 additions & 0 deletions
@@ -0,0 +1,56 @@
+mandoc.css is included in OpenGrok under the following license:
+
+$Id: LICENSE,v 1.17 2017/06/23 15:58:14 schwarze Exp $
+
+With the exceptions noted below, all code and documentation
+contained in the mandoc toolkit is protected by the Copyright
+of the following developers:
+
+Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <[email protected]>
+Copyright (c) 2010-2017 Ingo Schwarze <[email protected]>
+Copyright (c) 2009, 2010, 2011, 2012 Joerg Sonnenberger <[email protected]>
+Copyright (c) 2013 Franco Fichtner <[email protected]>
+Copyright (c) 2014 Baptiste Daroussin <[email protected]>
+Copyright (c) 2016 Ed Maste <[email protected]>
+Copyright (c) 2017 Michael Stapelberg <[email protected]>
+Copyright (c) 1999, 2004 Marc Espie <[email protected]>
+Copyright (c) 1998, 2004, 2010 Todd C. Miller <[email protected]>
+Copyright (c) 2008, 2017 Otto Moerbeek <[email protected]>
+Copyright (c) 2004 Ted Unangst <[email protected]>
+Copyright (c) 1994 Christos Zoulas <[email protected]>
+Copyright (c) 2003, 2007, 2008, 2014 Jason McIntyre <[email protected]>
+
+See the individual source files for information about who contributed
+to which file during which years.
+
+
+The mandoc distribution as a whole is distributed by its developers
+under the following license:
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+
+The following files included from outside sources are protected by
+other people's Copyright and are distributed under various 2-clause
+and 3-clause BSD licenses; see these individual files for details.
+
+soelim.c, soelim.1:
+Copyright (c) 2014 Baptiste Daroussin <[email protected]>
+
+compat_err.c, compat_fts.c, compat_fts.h,
+compat_getsubopt.c, compat_strcasestr.c, compat_strsep.c,
+man.1:
+Copyright (c) 1989,1990,1993,1994 The Regents of the University of California
+
+compat_stringlist.c, compat_stringlist.h:
+Copyright (c) 1994 Christos Zoulas <[email protected]>
@@ -41,6 +41,7 @@
 #   - OPENGROK_CTAGS_OPTIONS_FILE Full path to file with extra command line
 #                                 options for CTags program (for its --options
 #                                 switch), default is DATA_ROOT/etc/ctags.config
+#   - OPENGROK_MANDOC             Full path to mandoc(1) binary
 #   - JAVA_HOME                   Full Path to Java Installation Root
 #   - JAVA                        Full Path to java binary (to enable 64bit JDK)
 #   - JAVA_OPTS                   Java options (e.g. for JVM memory increase
@@ -118,6 +119,7 @@
 
 #
 # Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
+# Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
 #
 
 #
@@ -853,6 +855,7 @@ CommonInvocation()
         ${SCAN_DEPTH}                                           	\
         ${PROGRESS}                                             	\
         ${OPENGROK_CTAGS:+-c} ${OPENGROK_CTAGS}               		\
+        ${OPENGROK_MANDOC:+--mandoc} ${OPENGROK_MANDOC}			\
         ${CTAGS_OPTIONS_FILE:+-o} ${CTAGS_OPTIONS_FILE}         	\
         ${OPENGROK_FLUSH_RAM_BUFFER_SIZE} ${SKIN} ${LEADING_WILDCARD}	\
         ${READ_XML_CONF}                                        	\
 
@@ -19,6 +19,7 @@ information: Portions Copyright [yyyy] [name of copyright owner]
 CDDL HEADER END
 
 Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
+Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
 
 -->
 <project name="OpenGrok" default="jar" basedir="." xmlns:jacoco="antlib:org.jacoco.ant"
@@ -322,6 +323,7 @@ Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
 
         <run-jflex dir="${gen.analysis.dir}/sql" name="SQLXref"/>
         <run-jflex dir="${gen.analysis.dir}/sql" name="PLSQLXref"/>
+        <run-jflex dir="${gen.analysis.dir}/document" name="MandocXref"/>
         <run-jflex dir="${gen.analysis.dir}/document" name="TroffXref"/>
         <run-jflex dir="${gen.analysis.dir}/document" name="TroffFullTokenizer"/>
         <run-jflex dir="${gen.analysis.dir}/sh" name="ShSymbolTokenizer"/>
 
@@ -49,6 +49,13 @@ Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
                     <include>*.adb</include>
                 </includes>
             </resource>
+            <resource>
+                <targetPath>org/opensolaris/opengrok/analysis/document/</targetPath>
+                <directory>../src/org/opensolaris/opengrok/analysis/document/</directory>
+                <includes>
+                    <include>*.1m</include>
+                </includes>
+            </resource>
             <resource>
                 <targetPath>org/opensolaris/opengrok/analysis/sql/</targetPath>
                 <directory>../src/org/opensolaris/opengrok/analysis/sql/</directory>
 
@@ -64,6 +64,7 @@
 import org.opensolaris.opengrok.analysis.csharp.CSharpAnalyzerFactory;
 import org.opensolaris.opengrok.analysis.data.IgnorantAnalyzerFactory;
 import org.opensolaris.opengrok.analysis.data.ImageAnalyzerFactory;
+import org.opensolaris.opengrok.analysis.document.MandocAnalyzerFactory;
 import org.opensolaris.opengrok.analysis.document.TroffAnalyzerFactory;
 import org.opensolaris.opengrok.analysis.erlang.ErlangAnalyzerFactory;
 import org.opensolaris.opengrok.analysis.executables.ELFAnalyzerFactory;
@@ -199,15 +200,19 @@ public class AnalyzerGuru {
     private static final Map<String, String> fileTypeDescriptions = new TreeMap<>();
 
     /*
-     * If you write your own analyzer please register it here
+     * If you write your own analyzer please register it here. The order is
+     * important for any factory that uses a FileAnalyzerFactory.Matcher
+     * implementation, as those are run in the same order as defined below --
+     * though precise Matchers are run before imprecise ones.
      */
     static {
         FileAnalyzerFactory[] analyzers = {
             DEFAULT_ANALYZER_FACTORY,
             new IgnorantAnalyzerFactory(),
             new BZip2AnalyzerFactory(),
             new XMLAnalyzerFactory(),
-            new TroffAnalyzerFactory(),
+            MandocAnalyzerFactory.DEFAULT_INSTANCE,
+            TroffAnalyzerFactory.DEFAULT_INSTANCE,
             new ELFAnalyzerFactory(),
             new JavaClassAnalyzerFactory(),
             new ImageAnalyzerFactory(),
 
@@ -216,10 +216,11 @@ protected FileAnalyzer newAnalyzer() {
     /**
      * Interface for matchers which map file contents to analyzer factories.
      */
-    protected interface Matcher {
+    public interface Matcher {
 
         /**
          * Get a value indicating if the magic is byte-precise.
+         * @return true if precise
          */
         default boolean getIsPreciseMagic() { return false; }
 
 
@@ -0,0 +1,213 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * See LICENSE.txt included in this distribution for the specific
+ * language governing permissions and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at LICENSE.txt.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+ /*
+ * Copyright (c) 2017, Chris Fraire <[email protected]>.
+ */
+package org.opensolaris.opengrok.analysis.document;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
+import org.opensolaris.opengrok.analysis.FileAnalyzerFactory.Matcher;
+import org.opensolaris.opengrok.util.IOUtils;
+
+/**
+ * Represents an implementation of {@link Matcher} that detects a troff-
+ * or mandoc-like document
+ */
+public class DocumentMatcher implements Matcher {
+
+    /**
+     * Set to 512K {@code int}, but {@code NUMCHARS_FIRST_LOOK} and
+     * {@code LINE_LIMIT} should apply beforehand. This value is "effectively
+     * unbounded" without being literally 2_147_483_647 -- as the other limits
+     * will apply first, and the {@link java.io.BufferedInputStream} will
+     * manage a reasonably-sized buffer.
+     */
+    private static final int MARK_READ_LIMIT = 1024 * 512;
+
+    private static final int LINE_LIMIT = 100;
+
+    private static final int FIRST_LOOK_WIDTH = 300;
+
+    private final FileAnalyzerFactory factory;
+
+    private final String[] lineStarters;
+
+    /**
+     * Initializes an instance for the required parameters.
+     * @param factory required factory to return when matched
+     * @param lineStarters required list of line starters that indicate a match
+     * @throws IllegalArgumentException thrown if any parameter is null
+     */
+    public DocumentMatcher(FileAnalyzerFactory factory, String[] lineStarters) {
+        if (factory == null) {
+            throw  new IllegalArgumentException("`factory' is null");
+        }
+        if (lineStarters == null) {
+            throw  new IllegalArgumentException("`lineStarters' is null");
+        }
+        if (lineStarters.length < 1) {
+            throw  new IllegalArgumentException("`lineStarters' is empty");
+        }
+
+        String[] copyOf = Arrays.copyOf(lineStarters, lineStarters.length);
+        for (String elem : copyOf) {
+            if (elem == null) {
+                throw  new IllegalArgumentException(
+                    "`lineStarters' has null element");
+            }
+        }
+
+        this.factory = factory;
+        this.lineStarters = copyOf;
+    }
+
+    /**
+     * Try to match the file contents by looking for {@code lineStarters} in
+     * the first 100 lines while also affirming that the document starts
+     * with "." or "'" after a limited amount of whitespace.
+     * <p>
+     * The stream is reset before returning.
+     *
+     * @param contents the first few bytes of a file
+     * @param in the input stream from which the full file can be read
+     * @return an analyzer factory if the contents match, or {@code null}
+     * otherwise
+     * @throws IOException in case of any read error
+     */
+    @Override
+    public FileAnalyzerFactory isMagic(byte[] contents, InputStream in)
+        throws IOException {
+
+        if (!in.markSupported()) return null;
+        in.mark(MARK_READ_LIMIT);
+
+        // read encoding, and skip past any BOM
+        int bomLength = 0;
+        String encoding = IOUtils.findBOMEncoding(contents);
+        if (encoding == null) {
+            encoding = "UTF-8";
+        } else {
+            bomLength = IOUtils.skipForBOM(contents);
+            if (in.skip(bomLength) != bomLength) {
+                in.reset();
+                return null;
+            }
+        }
+
+        // affirm that a LF exists in a first block
+        boolean foundLF = hasLineFeed(in, encoding);
+        in.reset();
+        if (!foundLF) return null;
+        if (bomLength > 0) in.skip(bomLength);
+
+        // read line-by-line for a first few lines
+        BufferedReader rdr = new BufferedReader(new InputStreamReader(
+            in, encoding));
+        boolean foundContent = false;
+        int numFirstChars = 0;
+        int numLines = 0;
+        String line;
+        while ((line = rdr.readLine()) != null) {
+            for (int i = 0; i < lineStarters.length; ++i) {
+                if (line.startsWith(lineStarters[i])) {
+                    in.reset();
+                    return factory;
+                }
+            }
+            if (++numLines >= LINE_LIMIT) {
+                in.reset();
+                return null;
+            }
+
+            // If not yet `foundContent', then only a limited allowance is
+            // given until a sentinel '.' or '\'' must be seen after nothing
+            // else but whitespace.
+            if (!foundContent) {
+                for (int i = 0; i < line.length() && numFirstChars <
+                    FIRST_LOOK_WIDTH; ++i, ++numFirstChars) {
+                    char c = line.charAt(i);
+                    if (c == '.' || c == '\'') {
+                        foundContent = true;
+                        break;
+                    } else if (!Character.isWhitespace(c)) {
+                        in.reset();
+                        return null;
+                    }
+                }
+                if (!foundContent && numFirstChars >= FIRST_LOOK_WIDTH) {
+                    in.reset();
+                    return null;
+                }
+            }
+        }
+
+        in.reset();
+        return null;
+    }
+
+    /**
+     * Determines if the {@code in} stream has a line feed character within the
+     * first {@code FIRST_LOOK_WIDTH} characters.
+     * @param in the input stream has any BOM (not {@code reset} after use)
+     * @param encoding the input stream charset
+     * @return true if a line feed '\n' was found
+     * @throws IOException thrown on any error in reading
+     */
+    private boolean hasLineFeed(InputStream in, String encoding)
+            throws IOException {
+        byte[] buf;
+        int nextra;
+        int noff;
+        switch (encoding) {
+            case "UTF-16LE":
+                buf = new byte[FIRST_LOOK_WIDTH * 2];
+                nextra = 1;
+                noff = 0;
+                break;
+            case "UTF-16BE":
+                buf = new byte[FIRST_LOOK_WIDTH * 2];
+                nextra = 1;
+                noff = 1;
+                break;
+            default:
+                buf = new byte[FIRST_LOOK_WIDTH];
+                nextra = 0;
+                noff = 0;
+                break;
+        }
+
+        int nread = in.read(buf);
+        for (int i = 0; i + nextra < nread; i += 1 + nextra) {
+            if (nextra > 0) {
+                if (buf[i + noff] == '\n' && buf[i + 1 - noff] == '\0') {
+                    return true;
+                }
+            } else {
+                if (buf[i] == '\n') return true;
+            }
+        }
+        return false;
+    }
+}