1 месяц назад · 4eebe58c9a
--- a/Dockerfile
+++ b/Dockerfile
@@ -0,0 +1,15 @@
 
				+FROM maven:3.9.2-eclipse-temurin-8-alpine as build
			
 
				+
			
 
				+RUN apk update \
			
 
				+    && apk add zip
			
 
				+
			
 
				+WORKDIR /app
			
 
				+
			
 
				+COPY . ./
			
 
				+
			
 
				+RUN mvn package \
			
 
				+    && unzip target/releases/elasticsearch-analysis-dynamic-synonym-*.zip -d target/extracted
			
 
				+
			
 
				+FROM docker.elastic.co/elasticsearch/elasticsearch:8.7.1
			
 
				+
			
 
				+COPY --from=build --chown=elasticsearch:elasticsearch /app/target/extracted /usr/share/elasticsearch/plugins/dynamic-synonym/
			
--- a/Makefile
+++ b/Makefile
@@ -0,0 +1,18 @@
 
				+image_host ?=
			
 
				+
			
 
				+image_tag ?= 1.0.0
			
 
				+
			
 
				+.PHONY: help build_image push_image build_image_and_push
			
 
				+
			
 
				+help: ## Display this help message.
			
 
				+	@echo "Please use \`make <target>\` where <target> is one of"
			
 
				+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; \
			
 
				+	{printf "\033[36m%-40s\033[0m %s\n", $$1, $$2}'
			
 
				+
			
 
				+build_image:  ## Build docker image. Use `image_host` to override the default image host and `image_tag` to do the same for image tag.
			
 
				+	docker build -t $(image_host)dynamic-synonym-elasticsearch:$(image_tag) -t $(image_host)dynamic-synonym-elasticsearch:latest .
			
 
				+
			
 
				+push_image:  ## Push docker image. Use `image_host` to override the default image host.
			
 
				+	docker push -a $(image_host)dynamic-synonym-elasticsearch
			
 
				+
			
 
				+build_image_and_push: build_image push_image  ## Build and push docker image. Use `image_host` to override the default image host and `image_tag` to do the same for image tag.
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,79 @@
 
				+# Dynamic Synonym for ElasticSearch
			
 
				+
			
 
				+The dynamic synonym plugin adds a synonym token filter that reloads the synonym file (local file or remote file) at given intervals (default 60s).
			
 
				+
			
 
				+## Version
			
 
				+
			
 
				+| dynamic synonym version | ES version    |
			
 
				+|-------------------------|---------------|
			
 
				+| master                  | 8.x -> master |
			
 
				+| 7.4.2                   | 7.4.2         |
			
 
				+| 6.1.4                   | 6.1.4         |
			
 
				+| 5.2.0                   | 5.2.0         |
			
 
				+| 5.1.1                   | 5.1.1         |
			
 
				+| 2.3.0                   | 2.3.0         |
			
 
				+| 2.2.0                   | 2.2.0         |
			
 
				+| 2.1.0                   | 2.1.0         |
			
 
				+| 2.0.0                   | 2.0.0         |
			
 
				+| 1.6.0                   | 1.6.X         |
			
 
				+
			
 
				+## Installation
			
 
				+
			
 
				+1. `mvn package`
			
 
				+
			
 
				+2. copy and unzip `target/releases/elasticsearch-analysis-dynamic-synonym-{version}.zip` to `your-es-root/plugins/dynamic-synonym`
			
 
				+
			
 
				+## Example
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+    "index" : {
			
 
				+        "analysis" : {
			
 
				+            "analyzer" : {
			
 
				+                "synonym" : {
			
 
				+                    "tokenizer" : "whitespace",
			
 
				+                    "filter" : ["remote_synonym"]
			
 
				+                }
			
 
				+            },
			
 
				+            "filter" : {
			
 
				+                "remote_synonym" : {
			
 
				+                    "type" : "dynamic_synonym",
			
 
				+                    "synonyms_path" : "http://host:port/synonym.txt",
			
 
				+                    "interval": 30
			
 
				+                },
			
 
				+                "local_synonym" : {
			
 
				+                    "type" : "dynamic_synonym",
			
 
				+                    "synonyms_path" : "synonym.txt"
			
 
				+                },
			
 
				+                "synonym_graph" : {
			
 
				+                    "type" : "dynamic_synonym_graph",
			
 
				+                    "synonyms_path" : "http://host:port/synonym.txt"
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+```
			
 
				+### Configuration
			
 
				+
			
 
				+`type`: `dynamic_synonym` or `dynamic_synonym_graph`, *mandatory*
			
 
				+
			
 
				+`synonyms_path`: A file path relative to the Elastic config file or an URL, *mandatory*
			
 
				+
			
 
				+`interval`: Refresh interval in seconds for the synonym file, default: `60`, *optional*
			
 
				+
			
 
				+`ignore_case`: Ignore case in synonyms file, default: `false`, *optional*
			
 
				+
			
 
				+`expand`: Expand, default: `true`, *optional* 
			
 
				+
			
 
				+`lenient`: Lenient on exception thrown when importing a synonym, default: `false`, *optional* 
			
 
				+
			
 
				+`format`: Synonym file format, default: `''`, *optional*. For WordNet structure this can be set to `'wordnet'`
			
 
				+
			
 
				+
			
 
				+## Update mechanism
			
 
				+
			
 
				+* Local files: Determined by modification time of the file, if it has changed the synonyms wil
			
 
				+* Remote files: Reads out the `Last-Modified` and `ETag` http header. If one of these changes, the synonyms will be reloaded. 
			
 
				+
			
 
				+**Note:** File encoding should be an utf-8 text file. 
			
--- a/pom.xml
+++ b/pom.xml
@@ -0,0 +1,175 @@
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
			
 
				+    <modelVersion>4.0.0</modelVersion>
			
 
				+
			
 
				+    <groupId>com.bellszhu.elasticsearch</groupId>
			
 
				+    <artifactId>elasticsearch-analysis-dynamic-synonym</artifactId>
			
 
				+    <version>7.14.0</version>
			
 
				+    <packaging>jar</packaging>
			
 
				+    <name>elasticsearch-dynamic-synonym</name>
			
 
				+    <description>Analysis-plugin for synonym</description>
			
 
				+
			
 
				+    <properties>
			
 
				+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
			
 
				+        <elasticsearch.version>${project.version}</elasticsearch.version>
			
 
				+        <maven.compiler.target>16</maven.compiler.target>
			
 
				+        <elasticsearch.plugin.name>analysis-dynamic-synonym</elasticsearch.plugin.name>
			
 
				+        <elasticsearch.assembly.descriptor>${project.basedir}/src/main/assemblies/plugin.xml
			
 
				+        </elasticsearch.assembly.descriptor>
			
 
				+        <elasticsearch.plugin.classname>com.bellszhu.elasticsearch.plugin.DynamicSynonymPlugin
			
 
				+        </elasticsearch.plugin.classname>
			
 
				+        <elasticsearch.plugin.jvm>true</elasticsearch.plugin.jvm>
			
 
				+    </properties>
			
 
				+
			
 
				+    <licenses>
			
 
				+        <license>
			
 
				+            <name>The Apache Software License, Version 2.0</name>
			
 
				+            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
			
 
				+            <distribution>repo</distribution>
			
 
				+        </license>
			
 
				+    </licenses>
			
 
				+
			
 
				+    <parent>
			
 
				+        <groupId>org.sonatype.oss</groupId>
			
 
				+        <artifactId>oss-parent</artifactId>
			
 
				+        <version>9</version>
			
 
				+    </parent>
			
 
				+
			
 
				+    <scm>
			
 
				+        <connection>scm:git:git@github.com:bells/elasticsearch-analysis-dynamic-synonym.git</connection>
			
 
				+        <developerConnection>scm:git:git@github.com:bells/elasticsearch-analysis-dynamic-synonym.git
			
 
				+        </developerConnection>
			
 
				+        <url>https://github.com/bells/elasticsearch-analysis-dynamic-synonym</url>
			
 
				+    </scm>
			
 
				+
			
 
				+    <repositories>
			
 
				+        <repository>
			
 
				+            <id>central</id>
			
 
				+            <url>https://repo1.maven.org/maven2</url>
			
 
				+            <releases>
			
 
				+                <enabled>true</enabled>
			
 
				+            </releases>
			
 
				+            <snapshots>
			
 
				+                <enabled>true</enabled>
			
 
				+            </snapshots>
			
 
				+        </repository>
			
 
				+        <repository>
			
 
				+            <id>codelibs.org</id>
			
 
				+            <name>CodeLibs Repository</name>
			
 
				+            <url>https://maven.codelibs.org/</url>
			
 
				+        </repository>
			
 
				+    </repositories>
			
 
				+
			
 
				+    <dependencies>
			
 
				+        <dependency>
			
 
				+            <groupId>org.elasticsearch</groupId>
			
 
				+            <artifactId>elasticsearch</artifactId>
			
 
				+            <version>${elasticsearch.version}</version>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>org.codelibs.elasticsearch.module</groupId>
			
 
				+            <artifactId>analysis-common</artifactId>
			
 
				+            <version>${project.version}</version>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>junit</groupId>
			
 
				+            <artifactId>junit</artifactId>
			
 
				+            <version>4.13.1</version>
			
 
				+            <scope>test</scope>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.httpcomponents.client5</groupId>
			
 
				+            <artifactId>httpclient5</artifactId>
			
 
				+            <version>5.2.1</version>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.logging.log4j</groupId>
			
 
				+            <artifactId>log4j-core</artifactId>
			
 
				+            <version>2.17.1</version>
			
 
				+            <scope>provided</scope>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>org.apache.logging.log4j</groupId>
			
 
				+            <artifactId>log4j-api</artifactId>
			
 
				+            <version>2.20.0</version>
			
 
				+            <scope>provided</scope>
			
 
				+        </dependency>
			
 
				+        <dependency>
			
 
				+            <groupId>org.codelibs</groupId>
			
 
				+            <artifactId>elasticsearch-cluster-runner</artifactId>
			
 
				+            <version>${project.version}.0</version>
			
 
				+            <scope>test</scope>
			
 
				+        </dependency>
			
 
				+    </dependencies>
			
 
				+
			
 
				+
			
 
				+    <build>
			
 
				+        <plugins>
			
 
				+            <plugin>
			
 
				+                <groupId>org.apache.maven.plugins</groupId>
			
 
				+                <artifactId>maven-compiler-plugin</artifactId>
			
 
				+                <version>3.11.0</version>
			
 
				+                <configuration>
			
 
				+                    <source>${maven.compiler.target}</source>
			
 
				+                    <target>${maven.compiler.target}</target>
			
 
				+                </configuration>
			
 
				+            </plugin>
			
 
				+            <plugin>
			
 
				+                <groupId>org.apache.maven.plugins</groupId>
			
 
				+                <artifactId>maven-surefire-plugin</artifactId>
			
 
				+                <version>3.1.0</version>
			
 
				+                <configuration>
			
 
				+                    <includes>
			
 
				+                        <include>**/*Tests.java</include>
			
 
				+                    </includes>
			
 
				+                </configuration>
			
 
				+            </plugin>
			
 
				+            <plugin>
			
 
				+                <groupId>org.apache.maven.plugins</groupId>
			
 
				+                <artifactId>maven-enforcer-plugin</artifactId>
			
 
				+                <version>3.3.0</version>
			
 
				+            </plugin>
			
 
				+            <plugin>
			
 
				+                <groupId>org.apache.maven.plugins</groupId>
			
 
				+                <artifactId>maven-resources-plugin</artifactId>
			
 
				+                <version>3.3.1</version>
			
 
				+            </plugin>
			
 
				+            <plugin>
			
 
				+                <groupId>org.apache.maven.plugins</groupId>
			
 
				+                <artifactId>maven-source-plugin</artifactId>
			
 
				+                <version>3.3.0</version>
			
 
				+                <executions>
			
 
				+                    <execution>
			
 
				+                        <id>attach-sources</id>
			
 
				+                        <goals>
			
 
				+                            <goal>jar</goal>
			
 
				+                        </goals>
			
 
				+                    </execution>
			
 
				+                </executions>
			
 
				+            </plugin>
			
 
				+            <plugin>
			
 
				+                <artifactId>maven-assembly-plugin</artifactId>
			
 
				+                <configuration>
			
 
				+                    <appendAssemblyId>false</appendAssemblyId>
			
 
				+                    <outputDirectory>${project.build.directory}/releases/</outputDirectory>
			
 
				+                    <descriptors>
			
 
				+                        <descriptor>${basedir}/src/main/assemblies/plugin.xml</descriptor>
			
 
				+                    </descriptors>
			
 
				+                    <archive>
			
 
				+                        <manifest>
			
 
				+                            <mainClass>fully.qualified.MainClass</mainClass>
			
 
				+                        </manifest>
			
 
				+                    </archive>
			
 
				+                </configuration>
			
 
				+                <executions>
			
 
				+                    <execution>
			
 
				+                        <phase>package</phase>
			
 
				+                        <goals>
			
 
				+                            <goal>single</goal>
			
 
				+                        </goals>
			
 
				+                    </execution>
			
 
				+                </executions>
			
 
				+            </plugin>
			
 
				+        </plugins>
			
 
				+    </build>
			
 
				+</project>
			
--- a/src/main/assemblies/plugin.xml
+++ b/src/main/assemblies/plugin.xml
@@ -0,0 +1,34 @@
 
				+<?xml version="1.0"?>
			
 
				+<assembly>
			
 
				+    <id>-</id>
			
 
				+    <formats>
			
 
				+        <format>zip</format>
			
 
				+    </formats>
			
 
				+    <includeBaseDirectory>false</includeBaseDirectory>
			
 
				+    <files>
			
 
				+        <file>
			
 
				+            <source>${project.basedir}/src/main/resources/plugin-descriptor.properties</source>
			
 
				+            <filtered>true</filtered>
			
 
				+        </file>
			
 
				+        <file>
			
 
				+            <source>${project.basedir}/src/main/resources/plugin-security.policy</source>
			
 
				+            <filtered>true</filtered>
			
 
				+        </file>
			
 
				+    </files>
			
 
				+    <dependencySets>
			
 
				+        <dependencySet>
			
 
				+            <useProjectArtifact>true</useProjectArtifact>
			
 
				+            <useTransitiveFiltering>true</useTransitiveFiltering>
			
 
				+            <excludes>
			
 
				+                <exclude>org.elasticsearch:elasticsearch</exclude>
			
 
				+            </excludes>
			
 
				+        </dependencySet>
			
 
				+        <dependencySet>
			
 
				+            <useProjectArtifact>true</useProjectArtifact>
			
 
				+            <useTransitiveFiltering>true</useTransitiveFiltering>
			
 
				+            <includes>
			
 
				+                <include>org.apache.httpcomponents:httpclient</include>
			
 
				+            </includes>
			
 
				+        </dependencySet>
			
 
				+    </dependencySets>
			
 
				+</assembly>
			
--- a/src/main/java/com/bellszhu/elasticsearch/plugin/DynamicSynonymPlugin.java
+++ b/src/main/java/com/bellszhu/elasticsearch/plugin/DynamicSynonymPlugin.java
@@ -0,0 +1,29 @@
 
				+package com.bellszhu.elasticsearch.plugin;
			
 
				+
			
 
				+import static org.elasticsearch.plugins.AnalysisPlugin.requiresAnalysisSettings;
			
 
				+
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+import org.elasticsearch.index.analysis.TokenFilterFactory;
			
 
				+import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
			
 
				+import org.elasticsearch.plugins.AnalysisPlugin;
			
 
				+import org.elasticsearch.plugins.Plugin;
			
 
				+
			
 
				+import com.bellszhu.elasticsearch.plugin.synonym.analysis.DynamicSynonymGraphTokenFilterFactory;
			
 
				+import com.bellszhu.elasticsearch.plugin.synonym.analysis.DynamicSynonymTokenFilterFactory;
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * @author bellszhu
			
 
				+ */
			
 
				+public class DynamicSynonymPlugin extends Plugin implements AnalysisPlugin {
			
 
				+
			
 
				+    @Override
			
 
				+    public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
			
 
				+        Map<String, AnalysisProvider<TokenFilterFactory>> extra = new HashMap<>();
			
 
				+        extra.put("dynamic_synonym", requiresAnalysisSettings((indexSettings, env, name, settings) -> new DynamicSynonymTokenFilterFactory(indexSettings,env, name, settings)));
			
 
				+        extra.put("dynamic_synonym_graph", requiresAnalysisSettings((indexSettings, env, name, settings) -> new DynamicSynonymGraphTokenFilterFactory(indexSettings,env, name, settings)));
			
 
				+        return extra;
			
 
				+    }
			
 
				+}
			
--- a/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/AbsSynonymFilter.java
+++ b/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/AbsSynonymFilter.java
@@ -0,0 +1,21 @@
 
				+package com.bellszhu.elasticsearch.plugin.synonym.analysis;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenFilter;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				+
			
 
				+/**
			
 
				+ * @author bellszhu
			
 
				+ */
			
 
				+public abstract class AbsSynonymFilter extends TokenFilter {
			
 
				+    /**
			
 
				+     * Construct a token stream filtering the given input.
			
 
				+     *
			
 
				+     * @param input
			
 
				+     */
			
 
				+    protected AbsSynonymFilter(TokenStream input) {
			
 
				+        super(input);
			
 
				+    }
			
 
				+
			
 
				+    abstract void update(SynonymMap synonymMap);
			
 
				+}
			
--- a/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/DynamicSynonymFilter.java
+++ b/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/DynamicSynonymFilter.java
@@ -0,0 +1,632 @@
 
				+package com.bellszhu.elasticsearch.plugin.synonym.analysis;
			
 
				+
			
 
				+/*
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+ * contributor license agreements.  See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright ownership.
			
 
				+ * The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+ * (the "License"); you may not use this file except in compliance with
			
 
				+ * the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Arrays;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
			
 
				+import org.apache.lucene.store.ByteArrayDataInput;
			
 
				+import org.apache.lucene.util.ArrayUtil;
			
 
				+import org.apache.lucene.util.AttributeSource;
			
 
				+import org.apache.lucene.util.BytesRef;
			
 
				+import org.apache.lucene.util.CharsRef;
			
 
				+import org.apache.lucene.util.CharsRefBuilder;
			
 
				+import org.apache.lucene.util.RamUsageEstimator;
			
 
				+import org.apache.lucene.util.fst.FST;
			
 
				+
			
 
				+/**
			
 
				+ * Matches single or multi word synonyms in a token stream. This token stream
			
 
				+ * cannot properly handle position increments != 1, ie, you should place this
			
 
				+ * filter before filtering out stop words.
			
 
				+ *
			
 
				+ * <p>
			
 
				+ * Note that with the current implementation, parsing is greedy, so whenever
			
 
				+ * multiple parses would apply, the rule starting the earliest and parsing the
			
 
				+ * most tokens wins. For example if you have these rules:
			
 
				+ *
			
 
				+ * <pre>
			
 
				+ *   a -> x
			
 
				+ *   a b -> y
			
 
				+ *   b c d -> z
			
 
				+ * </pre>
			
 
				+ * <p>
			
 
				+ * Then input <code>a b c d e</code> parses to <code>y b c
			
 
				+ * d</code>, ie the 2nd rule "wins" because it started earliest and matched the
			
 
				+ * most input tokens of other rules starting at that point.
			
 
				+ * </p>
			
 
				+ *
			
 
				+ * <p>
			
 
				+ * A future improvement to this filter could allow non-greedy parsing, such that
			
 
				+ * the 3rd rule would win, and also separately allow multiple parses, such that
			
 
				+ * all 3 rules would match, perhaps even on a rule by rule basis.
			
 
				+ * </p>
			
 
				+ *
			
 
				+ * <p>
			
 
				+ * <b>NOTE</b>: when a match occurs, the output tokens associated with the
			
 
				+ * matching rule are "stacked" on top of the input stream (if the rule had
			
 
				+ * <code>keepOrig=true</code>) and also on top of another matched rule's output
			
 
				+ * tokens. This is not a correct solution, as really the output should be an
			
 
				+ * arbitrary graph/lattice. For example, with the above match, you would expect
			
 
				+ * an exact <code>PhraseQuery</code> <code>"y b
			
 
				+ * c"</code> to match the parsed tokens, but it will fail to do so. This
			
 
				+ * limitation is necessary because Lucene's TokenStream (and index) cannot yet
			
 
				+ * represent an arbitrary graph.
			
 
				+ * </p>
			
 
				+ *
			
 
				+ * <p>
			
 
				+ * <b>NOTE</b>: If multiple incoming tokens arrive on the same position, only
			
 
				+ * the first token at that position is used for parsing. Subsequent tokens
			
 
				+ * simply pass through and are not parsed. A future improvement would be to
			
 
				+ * allow these tokens to also be matched.
			
 
				+ * </p>
			
 
				+ */
			
 
				+
			
 
				+// TODO: maybe we should resolve token -> wordID then run
			
 
				+// FST on wordIDs, for better perf?
			
 
				+
			
 
				+// TODO: a more efficient approach would be Aho/Corasick's
			
 
				+// algorithm
			
 
				+// http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm
			
 
				+// It improves over the current approach here
			
 
				+// because it does not fully re-start matching at every
			
 
				+// token. For example if one pattern is "a b c x"
			
 
				+// and another is "b c d" and the input is "a b c d", on
			
 
				+// trying to parse "a b c x" but failing when you got to x,
			
 
				+// rather than starting over again your really should
			
 
				+// immediately recognize that "b c d" matches at the next
			
 
				+// input. I suspect this won't matter that much in
			
 
				+// practice, but it's possible on some set of synonyms it
			
 
				+// will. We'd have to modify Aho/Corasick to enforce our
			
 
				+// conflict resolving (eg greedy matching) because that algo
			
 
				+// finds all matches. This really amounts to adding a .*
			
 
				+// closure to the FST and then determinizing it.
			
 
				+//
			
 
				+// Another possible solution is described at
			
 
				+// http://www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps
			
 
				+
			
 
				+public final class DynamicSynonymFilter extends AbsSynonymFilter {
			
 
				+
			
 
				+    private static final String TYPE_SYNONYM = "SYNONYM";
			
 
				+    private final boolean ignoreCase;
			
 
				+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
			
 
				+    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
			
 
				+    private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
			
 
				+
			
 
				+    // TODO: we should set PositionLengthAttr too...
			
 
				+    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
			
 
				+    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
			
 
				+    private final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
			
 
				+    private final BytesRef scratchBytes = new BytesRef();
			
 
				+    private final CharsRefBuilder scratchChars = new CharsRefBuilder();
			
 
				+    private SynonymMap synonyms;
			
 
				+    private int rollBufferSize;
			
 
				+
			
 
				+    private int captureCount;
			
 
				+    // How many future input tokens have already been matched
			
 
				+    // to a synonym; because the matching is "greedy" we don't
			
 
				+    // try to do any more matching for such tokens:
			
 
				+    private int inputSkipCount;
			
 
				+
			
 
				+    // Rolling buffer, holding pending input tokens we had to
			
 
				+    // clone because we needed to look ahead, indexed by
			
 
				+    // position:
			
 
				+    private PendingInput[] futureInputs;
			
 
				+    // Rolling buffer, holding stack of pending synonym
			
 
				+    // outputs, indexed by position:
			
 
				+    private PendingOutputs[] futureOutputs;
			
 
				+
			
 
				+    // Where (in rolling buffers) to write next input saved state:
			
 
				+    private int nextWrite;
			
 
				+
			
 
				+    // Where (in rolling buffers) to read next input saved state:
			
 
				+    private int nextRead;
			
 
				+
			
 
				+    // True once we've read last token
			
 
				+    private boolean finished;
			
 
				+
			
 
				+    private FST.Arc<BytesRef> scratchArc;
			
 
				+
			
 
				+    private FST<BytesRef> fst;
			
 
				+
			
 
				+    private FST.BytesReader fstReader;
			
 
				+    /*
			
 
				+     * This is the core of this TokenFilter: it locates the synonym matches and
			
 
				+     * buffers up the results into futureInputs/Outputs.
			
 
				+     *
			
 
				+     * NOTE: this calls input.incrementToken and does not capture the state if
			
 
				+     * no further tokens were checked. So caller must then forward state to our
			
 
				+     * caller, or capture:
			
 
				+     */
			
 
				+    private int lastStartOffset;
			
 
				+    private int lastEndOffset;
			
 
				+
			
 
				+    /**
			
 
				+     * @param input      input tokenstream
			
 
				+     * @param synonyms   synonym map
			
 
				+     * @param ignoreCase case-folds input for matching with
			
 
				+     *                   {@link Character#toLowerCase(int)}. Note, if you set this to
			
 
				+     *                   true, its your responsibility to lowercase the input entries
			
 
				+     *                   when you create the {@link SynonymMap}
			
 
				+     */
			
 
				+    DynamicSynonymFilter(TokenStream input, SynonymMap synonyms,
			
 
				+                         boolean ignoreCase) {
			
 
				+        super(input);
			
 
				+        this.ignoreCase = ignoreCase;
			
 
				+        update(synonyms);
			
 
				+    }
			
 
				+
			
 
				+    private void capture() {
			
 
				+        captureCount++;
			
 
				+        final PendingInput input = futureInputs[nextWrite];
			
 
				+
			
 
				+        input.state = captureState();
			
 
				+        input.consumed = false;
			
 
				+        input.term.copyChars(termAtt.buffer(), 0, termAtt.length());
			
 
				+
			
 
				+        nextWrite = rollIncr(nextWrite);
			
 
				+
			
 
				+        // Buffer head should never catch up to tail:
			
 
				+        assert nextWrite != nextRead;
			
 
				+    }
			
 
				+
			
 
				+    private void parse() throws IOException {
			
 
				+
			
 
				+        assert inputSkipCount == 0;
			
 
				+
			
 
				+        int curNextRead = nextRead;
			
 
				+
			
 
				+        // Holds the longest match we've seen so far:
			
 
				+        BytesRef matchOutput = null;
			
 
				+        int matchInputLength = 0;
			
 
				+        int matchEndOffset = -1;
			
 
				+
			
 
				+        BytesRef pendingOutput = fst.outputs.getNoOutput();
			
 
				+        fst.getFirstArc(scratchArc);
			
 
				+
			
 
				+        assert scratchArc.output() == fst.outputs.getNoOutput();
			
 
				+
			
 
				+        int tokenCount = 0;
			
 
				+
			
 
				+        byToken:
			
 
				+        while (true) {
			
 
				+
			
 
				+            // Pull next token's chars:
			
 
				+            final char[] buffer;
			
 
				+            final int bufferLen;
			
 
				+
			
 
				+            int inputEndOffset = 0;
			
 
				+
			
 
				+            if (curNextRead == nextWrite) {
			
 
				+
			
 
				+                // We used up our lookahead buffer of input tokens
			
 
				+                // -- pull next real input token:
			
 
				+                if (finished) {
			
 
				+                    break;
			
 
				+                } else {
			
 
				+                    assert futureInputs[nextWrite].consumed;
			
 
				+                    // Not correct: a syn match whose output is longer
			
 
				+                    // than its input can set future inputs keepOrig
			
 
				+                    // to true:
			
 
				+                    if (input.incrementToken()) {
			
 
				+                        buffer = termAtt.buffer();
			
 
				+                        bufferLen = termAtt.length();
			
 
				+                        final PendingInput input = futureInputs[nextWrite];
			
 
				+                        lastStartOffset = input.startOffset = offsetAtt
			
 
				+                                .startOffset();
			
 
				+                        lastEndOffset = input.endOffset = offsetAtt.endOffset();
			
 
				+                        inputEndOffset = input.endOffset;
			
 
				+                        if (nextRead != nextWrite) {
			
 
				+                            capture();
			
 
				+                        } else {
			
 
				+                            input.consumed = false;
			
 
				+                        }
			
 
				+
			
 
				+                    } else {
			
 
				+                        // No more input tokens
			
 
				+                        finished = true;
			
 
				+                        break;
			
 
				+                    }
			
 
				+                }
			
 
				+            } else {
			
 
				+                // Still in our lookahead
			
 
				+                buffer = futureInputs[curNextRead].term.chars();
			
 
				+                bufferLen = futureInputs[curNextRead].term.length();
			
 
				+                inputEndOffset = futureInputs[curNextRead].endOffset;
			
 
				+            }
			
 
				+
			
 
				+            tokenCount++;
			
 
				+
			
 
				+            // Run each char in this token through the FST:
			
 
				+            int bufUpto = 0;
			
 
				+            while (bufUpto < bufferLen) {
			
 
				+                final int codePoint = Character.codePointAt(buffer, bufUpto,
			
 
				+                        bufferLen);
			
 
				+                if (fst.findTargetArc(
			
 
				+                        ignoreCase ? Character.toLowerCase(codePoint)
			
 
				+                                : codePoint, scratchArc, scratchArc, fstReader) == null) {
			
 
				+                    break byToken;
			
 
				+                }
			
 
				+
			
 
				+                // Accum the output
			
 
				+                pendingOutput = fst.outputs.add(pendingOutput,
			
 
				+                        scratchArc.output());
			
 
				+                bufUpto += Character.charCount(codePoint);
			
 
				+            }
			
 
				+
			
 
				+            // OK, entire token matched; now see if this is a final
			
 
				+            // state:
			
 
				+            if (scratchArc.isFinal()) {
			
 
				+                matchOutput = fst.outputs.add(pendingOutput,
			
 
				+                        scratchArc.nextFinalOutput());
			
 
				+                matchInputLength = tokenCount;
			
 
				+                matchEndOffset = inputEndOffset;
			
 
				+            }
			
 
				+
			
 
				+            // See if the FST wants to continue matching (ie, needs to
			
 
				+            // see the next input token):
			
 
				+            if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc,
			
 
				+                    scratchArc, fstReader) == null) {
			
 
				+                // No further rules can match here; we're done
			
 
				+                // searching for matching rules starting at the
			
 
				+                // current input position.
			
 
				+                break;
			
 
				+            } else {
			
 
				+                // More matching is possible -- accum the output (if
			
 
				+                // any) of the WORD_SEP arc:
			
 
				+                pendingOutput = fst.outputs.add(pendingOutput,
			
 
				+                        scratchArc.output());
			
 
				+                if (nextRead == nextWrite) {
			
 
				+                    capture();
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            curNextRead = rollIncr(curNextRead);
			
 
				+        }
			
 
				+
			
 
				+        if (nextRead == nextWrite && !finished) {
			
 
				+            nextWrite = rollIncr(nextWrite);
			
 
				+        }
			
 
				+
			
 
				+        if (matchOutput != null) {
			
 
				+            inputSkipCount = matchInputLength;
			
 
				+            addOutput(matchOutput, matchInputLength, matchEndOffset);
			
 
				+        } else if (nextRead != nextWrite) {
			
 
				+            // Even though we had no match here, we set to 1
			
 
				+            // because we need to skip current input token before
			
 
				+            // trying to match again:
			
 
				+            inputSkipCount = 1;
			
 
				+        } else {
			
 
				+            assert finished;
			
 
				+        }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    // Interleaves all output tokens onto the futureOutputs:
			
 
				+    private void addOutput(BytesRef bytes, int matchInputLength,
			
 
				+                           int matchEndOffset) {
			
 
				+        bytesReader.reset(bytes.bytes, bytes.offset, bytes.length);
			
 
				+
			
 
				+        final int code = bytesReader.readVInt();
			
 
				+        final boolean keepOrig = (code & 0x1) == 0;
			
 
				+        final int count = code >>> 1;
			
 
				+        for (int outputIDX = 0; outputIDX < count; outputIDX++) {
			
 
				+            synonyms.words.get(bytesReader.readVInt(), scratchBytes);
			
 
				+            scratchChars.copyUTF8Bytes(scratchBytes);
			
 
				+            int lastStart = 0;
			
 
				+            final int chEnd = lastStart + scratchChars.length();
			
 
				+            int outputUpto = nextRead;
			
 
				+            for (int chIDX = lastStart; chIDX <= chEnd; chIDX++) {
			
 
				+                if (chIDX == chEnd
			
 
				+                        || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) {
			
 
				+                    final int outputLen = chIDX - lastStart;
			
 
				+                    // Caller is not allowed to have empty string in
			
 
				+                    // the output:
			
 
				+                    assert outputLen > 0 : "output contains empty string: "
			
 
				+                            + scratchChars;
			
 
				+                    final int endOffset;
			
 
				+                    final int posLen;
			
 
				+                    if (chIDX == chEnd && lastStart == 0) {
			
 
				+                        // This rule had a single output token, so, we set
			
 
				+                        // this output's endOffset to the current
			
 
				+                        // endOffset (ie, endOffset of the last input
			
 
				+                        // token it matched):
			
 
				+                        endOffset = matchEndOffset;
			
 
				+                        posLen = keepOrig ? matchInputLength : 1;
			
 
				+                    } else {
			
 
				+                        // This rule has more than one output token; we
			
 
				+                        // can't pick any particular endOffset for this
			
 
				+                        // case, so, we inherit the endOffset for the
			
 
				+                        // input token which this output overlaps:
			
 
				+                        endOffset = -1;
			
 
				+                        posLen = 1;
			
 
				+                    }
			
 
				+                    futureOutputs[outputUpto].add(scratchChars.chars(),
			
 
				+                            lastStart, outputLen, endOffset, posLen);
			
 
				+                    lastStart = 1 + chIDX;
			
 
				+                    outputUpto = rollIncr(outputUpto);
			
 
				+                    assert futureOutputs[outputUpto].posIncr == 1 : "outputUpto="
			
 
				+                            + outputUpto + " vs nextWrite=" + nextWrite;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        int upto = nextRead;
			
 
				+        for (int idx = 0; idx < matchInputLength; idx++) {
			
 
				+            futureInputs[upto].keepOrig |= keepOrig;
			
 
				+            futureInputs[upto].matched = true;
			
 
				+            upto = rollIncr(upto);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // ++ mod rollBufferSize
			
 
				+    private int rollIncr(int count) {
			
 
				+        count++;
			
 
				+        if (count == rollBufferSize) {
			
 
				+            return 0;
			
 
				+        } else {
			
 
				+            return count;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public boolean incrementToken() throws IOException {
			
 
				+
			
 
				+        while (true) {
			
 
				+
			
 
				+            // First play back any buffered future inputs/outputs
			
 
				+            // w/o running parsing again:
			
 
				+            while (inputSkipCount != 0) {
			
 
				+
			
 
				+                // At each position, we first output the original
			
 
				+                // token
			
 
				+
			
 
				+                // TODO: maybe just a PendingState class, holding
			
 
				+                // both input & outputs?
			
 
				+                final PendingInput input = futureInputs[nextRead];
			
 
				+                final PendingOutputs outputs = futureOutputs[nextRead];
			
 
				+
			
 
				+                if (!input.consumed && (input.keepOrig || !input.matched)) {
			
 
				+                    if (input.state != null) {
			
 
				+                        // Return a previously saved token (because we
			
 
				+                        // had to lookahead):
			
 
				+                        restoreState(input.state);
			
 
				+                    } else {
			
 
				+                        // Pass-through case: return token we just pulled
			
 
				+                        // but didn't capture:
			
 
				+                        assert inputSkipCount == 1 : "inputSkipCount="
			
 
				+                                + inputSkipCount + " nextRead=" + nextRead;
			
 
				+                    }
			
 
				+                    input.reset();
			
 
				+                    if (outputs.count > 0) {
			
 
				+                        outputs.posIncr = 0;
			
 
				+                    } else {
			
 
				+                        nextRead = rollIncr(nextRead);
			
 
				+                        inputSkipCount--;
			
 
				+                    }
			
 
				+                    return true;
			
 
				+                } else if (outputs.upto < outputs.count) {
			
 
				+                    // Still have pending outputs to replay at this
			
 
				+                    // position
			
 
				+                    input.reset();
			
 
				+                    final int posIncr = outputs.posIncr;
			
 
				+                    final CharsRef output = outputs.pullNext();
			
 
				+                    clearAttributes();
			
 
				+                    termAtt.copyBuffer(output.chars, output.offset,
			
 
				+                            output.length);
			
 
				+                    typeAtt.setType(TYPE_SYNONYM);
			
 
				+                    int endOffset = outputs.getLastEndOffset();
			
 
				+                    if (endOffset == -1) {
			
 
				+                        endOffset = input.endOffset;
			
 
				+                    }
			
 
				+                    offsetAtt.setOffset(input.startOffset, endOffset);
			
 
				+                    posIncrAtt.setPositionIncrement(posIncr);
			
 
				+                    posLenAtt.setPositionLength(outputs.getLastPosLength());
			
 
				+                    if (outputs.count == 0) {
			
 
				+                        // Done with the buffered input and all outputs at
			
 
				+                        // this position
			
 
				+                        nextRead = rollIncr(nextRead);
			
 
				+                        inputSkipCount--;
			
 
				+                    }
			
 
				+                    return true;
			
 
				+                } else {
			
 
				+                    // Done with the buffered input and all outputs at
			
 
				+                    // this position
			
 
				+                    input.reset();
			
 
				+                    nextRead = rollIncr(nextRead);
			
 
				+                    inputSkipCount--;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            if (finished && nextRead == nextWrite) {
			
 
				+                // End case: if any output syns went beyond end of
			
 
				+                // input stream, enumerate them now:
			
 
				+                final PendingOutputs outputs = futureOutputs[nextRead];
			
 
				+                if (outputs.upto < outputs.count) {
			
 
				+                    final int posIncr = outputs.posIncr;
			
 
				+                    final CharsRef output = outputs.pullNext();
			
 
				+                    futureInputs[nextRead].reset();
			
 
				+                    if (outputs.count == 0) {
			
 
				+                        nextWrite = nextRead = rollIncr(nextRead);
			
 
				+                    }
			
 
				+                    clearAttributes();
			
 
				+                    // Keep offset from last input token:
			
 
				+                    offsetAtt.setOffset(lastStartOffset, lastEndOffset);
			
 
				+                    termAtt.copyBuffer(output.chars, output.offset,
			
 
				+                            output.length);
			
 
				+                    typeAtt.setType(TYPE_SYNONYM);
			
 
				+                    posIncrAtt.setPositionIncrement(posIncr);
			
 
				+                    return true;
			
 
				+                } else {
			
 
				+                    return false;
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            // Find new synonym matches:
			
 
				+            parse();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void reset() throws IOException {
			
 
				+
			
 
				+        super.reset();
			
 
				+        captureCount = 0;
			
 
				+        finished = false;
			
 
				+        inputSkipCount = 0;
			
 
				+        nextRead = nextWrite = 0;
			
 
				+
			
 
				+        // In normal usage these resets would not be needed,
			
 
				+        // since they reset-as-they-are-consumed, but the app
			
 
				+        // may not consume all input tokens (or we might hit an
			
 
				+        // exception), in which case we have leftover state
			
 
				+        // here:
			
 
				+        for (PendingInput input : futureInputs) {
			
 
				+            input.reset();
			
 
				+        }
			
 
				+        for (PendingOutputs output : futureOutputs) {
			
 
				+            output.reset();
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    void update(SynonymMap synonymMap) {
			
 
				+        this.synonyms = synonymMap;
			
 
				+        this.fst = synonyms.fst;
			
 
				+        if (fst == null) {
			
 
				+            throw new IllegalArgumentException("fst must be non-null");
			
 
				+        }
			
 
				+        this.fstReader = fst.getBytesReader();
			
 
				+
			
 
				+        // Must be 1+ so that when roll buffer is at full
			
 
				+        // lookahead we can distinguish this full buffer from
			
 
				+        // the empty buffer:
			
 
				+        rollBufferSize = 1 + synonyms.maxHorizontalContext;
			
 
				+
			
 
				+        futureInputs = new PendingInput[rollBufferSize];
			
 
				+        futureOutputs = new PendingOutputs[rollBufferSize];
			
 
				+        for (int pos = 0; pos < rollBufferSize; pos++) {
			
 
				+            futureInputs[pos] = new PendingInput();
			
 
				+            futureOutputs[pos] = new PendingOutputs();
			
 
				+        }
			
 
				+
			
 
				+        scratchArc = new FST.Arc<>();
			
 
				+    }
			
 
				+
			
 
				+    // Hold all buffered (read ahead) stacked input tokens for
			
 
				+    // a future position. When multiple tokens are at the
			
 
				+    // same position, we only store (and match against) the
			
 
				+    // term for the first token at the position, but capture
			
 
				+    // state for (and enumerate) all other tokens at this
			
 
				+    // position:
			
 
				+    private static class PendingInput {
			
 
				+        final CharsRefBuilder term = new CharsRefBuilder();
			
 
				+        AttributeSource.State state;
			
 
				+        boolean keepOrig;
			
 
				+        boolean matched;
			
 
				+        boolean consumed = true;
			
 
				+        int startOffset;
			
 
				+        int endOffset;
			
 
				+
			
 
				+        void reset() {
			
 
				+            state = null;
			
 
				+            consumed = true;
			
 
				+            keepOrig = false;
			
 
				+            matched = false;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // Holds pending output synonyms for one future position:
			
 
				+    private static class PendingOutputs {
			
 
				+        CharsRefBuilder[] outputs;
			
 
				+        int[] endOffsets;
			
 
				+        int[] posLengths;
			
 
				+        int upto;
			
 
				+        int count;
			
 
				+        int posIncr = 1;
			
 
				+        int lastEndOffset;
			
 
				+        int lastPosLength;
			
 
				+
			
 
				+        PendingOutputs() {
			
 
				+            outputs = new CharsRefBuilder[1];
			
 
				+            endOffsets = new int[1];
			
 
				+            posLengths = new int[1];
			
 
				+        }
			
 
				+
			
 
				+        void reset() {
			
 
				+            upto = count = 0;
			
 
				+            posIncr = 1;
			
 
				+        }
			
 
				+
			
 
				+        CharsRef pullNext() {
			
 
				+            assert upto < count;
			
 
				+            lastEndOffset = endOffsets[upto];
			
 
				+            lastPosLength = posLengths[upto];
			
 
				+            final CharsRefBuilder result = outputs[upto++];
			
 
				+            posIncr = 0;
			
 
				+            if (upto == count) {
			
 
				+                reset();
			
 
				+            }
			
 
				+            return result.get();
			
 
				+        }
			
 
				+
			
 
				+        int getLastEndOffset() {
			
 
				+            return lastEndOffset;
			
 
				+        }
			
 
				+
			
 
				+        int getLastPosLength() {
			
 
				+            return lastPosLength;
			
 
				+        }
			
 
				+
			
 
				+        void add(char[] output, int offset, int len, int endOffset,
			
 
				+                 int posLength) {
			
 
				+            if (count == outputs.length) {
			
 
				+                outputs = Arrays.copyOf(outputs, ArrayUtil.oversize(1 + count,
			
 
				+                        RamUsageEstimator.NUM_BYTES_OBJECT_REF));
			
 
				+            }
			
 
				+            if (count == endOffsets.length) {
			
 
				+                final int[] next = new int[ArrayUtil.oversize(1 + count,
			
 
				+                                                              Integer.BYTES)];
			
 
				+                System.arraycopy(endOffsets, 0, next, 0, count);
			
 
				+                endOffsets = next;
			
 
				+            }
			
 
				+            if (count == posLengths.length) {
			
 
				+                final int[] next = new int[ArrayUtil.oversize(1 + count,
			
 
				+                                                              Integer.BYTES)];
			
 
				+                System.arraycopy(posLengths, 0, next, 0, count);
			
 
				+                posLengths = next;
			
 
				+            }
			
 
				+            if (outputs[count] == null) {
			
 
				+                outputs[count] = new CharsRefBuilder();
			
 
				+            }
			
 
				+            outputs[count].copyChars(output, offset, len);
			
 
				+            // endOffset can be -1, in which case we should simply
			
 
				+            // use the endOffset of the input token, or X >= 0, in
			
 
				+            // which case we use X as the endOffset for this output
			
 
				+            endOffsets[count] = endOffset;
			
 
				+            posLengths[count] = posLength;
			
 
				+            count++;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/DynamicSynonymGraphFilter.java
+++ b/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/DynamicSynonymGraphFilter.java
@@ -0,0 +1,600 @@
 
				+/*
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+ * contributor license agreements.  See the NOTICE file distributed with
			
 
				+ * this work for additional information regarding copyright ownership.
			
 
				+ * The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+ * (the "License"); you may not use this file except in compliance with
			
 
				+ * the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package com.bellszhu.elasticsearch.plugin.synonym.analysis;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.LinkedList;
			
 
				+import java.util.List;
			
 
				+
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.core.FlattenGraphFilter;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymFilter;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
			
 
				+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
			
 
				+import org.apache.lucene.store.ByteArrayDataInput;
			
 
				+import org.apache.lucene.util.BytesRef;
			
 
				+import org.apache.lucene.util.CharsRefBuilder;
			
 
				+import org.apache.lucene.util.RollingBuffer;
			
 
				+import org.apache.lucene.util.fst.FST;
			
 
				+
			
 
				+// TODO: maybe we should resolve token -> wordID then run
			
 
				+// FST on wordIDs, for better perf?
			
 
				+ 
			
 
				+// TODO: a more efficient approach would be Aho/Corasick's
			
 
				+// algorithm
			
 
				+// http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm
			
 
				+// It improves over the current approach here
			
 
				+// because it does not fully re-start matching at every
			
 
				+// token.  For example if one pattern is "a b c x"
			
 
				+// and another is "b c d" and the input is "a b c d", on
			
 
				+// trying to parse "a b c x" but failing when you got to x,
			
 
				+// rather than starting over again your really should
			
 
				+// immediately recognize that "b c d" matches at the next
			
 
				+// input.  I suspect this won't matter that much in
			
 
				+// practice, but it's possible on some set of synonyms it
			
 
				+// will.  We'd have to modify Aho/Corasick to enforce our
			
 
				+// conflict resolving (eg greedy matching) because that algo
			
 
				+// finds all matches.  This really amounts to adding a .*
			
 
				+// closure to the FST and then determinizing it.
			
 
				+//
			
 
				+// Another possible solution is described at http://www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps
			
 
				+
			
 
				+/** Applies single- or multi-token synonyms from a {@link SynonymMap}
			
 
				+ *  to an incoming {@link TokenStream}, producing a fully correct graph
			
 
				+ *  output.  This is a replacement for {@link SynonymFilter}, which produces
			
 
				+ *  incorrect graphs for multi-token synonyms.
			
 
				+ *
			
 
				+ *  <p>However, if you use this during indexing, you must follow it with
			
 
				+ *  {@link FlattenGraphFilter} to squash tokens on top of one another
			
 
				+ *  like {@link SynonymFilter}, because the indexer can't directly
			
 
				+ *  consume a graph.  To get fully correct positional queries when your
			
 
				+ *  synonym replacements are multiple tokens, you should instead apply
			
 
				+ *  synonyms using this {@code TokenFilter} at query time and translate
			
 
				+ *  the resulting graph to a {@code TermAutomatonQuery} e.g. using
			
 
				+ *  {@code TokenStreamToTermAutomatonQuery}.
			
 
				+ *
			
 
				+ *  <p><b>NOTE</b>: this cannot consume an incoming graph; results will
			
 
				+ *  be undefined.
			
 
				+ *
			
 
				+ *  @lucene.experimental */
			
 
				+
			
 
				+public final class DynamicSynonymGraphFilter extends AbsSynonymFilter {
			
 
				+
			
 
				+  public static final String TYPE_SYNONYM = "SYNONYM";
			
 
				+
			
 
				+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
			
 
				+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
			
 
				+  private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
			
 
				+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
			
 
				+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
			
 
				+
			
 
				+  private SynonymMap synonyms;
			
 
				+  private final boolean ignoreCase;
			
 
				+
			
 
				+  private FST<BytesRef> fst;
			
 
				+
			
 
				+  private FST.BytesReader fstReader;
			
 
				+  private FST.Arc<BytesRef> scratchArc;
			
 
				+  private final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
			
 
				+  private final BytesRef scratchBytes = new BytesRef();
			
 
				+  private final CharsRefBuilder scratchChars = new CharsRefBuilder();
			
 
				+  private final LinkedList<BufferedOutputToken> outputBuffer = new LinkedList<>();
			
 
				+
			
 
				+  private int nextNodeOut;
			
 
				+  private int lastNodeOut;
			
 
				+  private int maxLookaheadUsed;
			
 
				+
			
 
				+  // For testing:
			
 
				+  private int captureCount;
			
 
				+
			
 
				+  private boolean liveToken;
			
 
				+
			
 
				+  // Start/end offset of the current match:
			
 
				+  private int matchStartOffset;
			
 
				+  private int matchEndOffset;
			
 
				+
			
 
				+  // True once the input TokenStream is exhausted:
			
 
				+  private boolean finished;
			
 
				+
			
 
				+  private int lookaheadNextRead;
			
 
				+  private int lookaheadNextWrite;
			
 
				+
			
 
				+  private RollingBuffer<BufferedInputToken> lookahead = new RollingBuffer<BufferedInputToken>() {
			
 
				+    @Override
			
 
				+    protected BufferedInputToken newInstance() {
			
 
				+      return new BufferedInputToken();
			
 
				+    }
			
 
				+  };
			
 
				+
			
 
				+  static class BufferedInputToken implements RollingBuffer.Resettable {
			
 
				+    final CharsRefBuilder term = new CharsRefBuilder();
			
 
				+    State state;
			
 
				+    int startOffset = -1;
			
 
				+    int endOffset = -1;
			
 
				+
			
 
				+    @Override
			
 
				+    public void reset() {
			
 
				+      state = null;
			
 
				+      term.clear();
			
 
				+
			
 
				+      // Intentionally invalid to ferret out bugs:
			
 
				+      startOffset = -1;
			
 
				+      endOffset = -1;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static class BufferedOutputToken {
			
 
				+    final String term;
			
 
				+
			
 
				+    // Non-null if this was an incoming token:
			
 
				+    final State state;
			
 
				+
			
 
				+    final int startNode;
			
 
				+    final int endNode;
			
 
				+
			
 
				+    public BufferedOutputToken(State state, String term, int startNode, int endNode) {
			
 
				+      this.state = state;
			
 
				+      this.term = term;
			
 
				+      this.startNode = startNode;
			
 
				+      this.endNode = endNode;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Apply previously built synonyms to incoming tokens.
			
 
				+   * @param input input tokenstream
			
 
				+   * @param synonyms synonym map
			
 
				+   * @param ignoreCase case-folds input for matching with {@link Character#toLowerCase(int)}.
			
 
				+   *                   Note, if you set this to true, it's your responsibility to lowercase
			
 
				+   *                   the input entries when you create the {@link SynonymMap}
			
 
				+   */
			
 
				+  public DynamicSynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
			
 
				+    super(input);
			
 
				+    update(synonyms);
			
 
				+    this.ignoreCase = ignoreCase;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public boolean incrementToken() throws IOException {
			
 
				+    //System.out.println("\nS: incrToken lastNodeOut=" + lastNodeOut + " nextNodeOut=" + nextNodeOut);
			
 
				+
			
 
				+    assert lastNodeOut <= nextNodeOut;
			
 
				+      
			
 
				+    if (outputBuffer.isEmpty() == false) {
			
 
				+      // We still have pending outputs from a prior synonym match:
			
 
				+      releaseBufferedToken();
			
 
				+      //System.out.println("  syn: ret buffered=" + this);
			
 
				+      assert liveToken == false;
			
 
				+      return true;
			
 
				+    }
			
 
				+
			
 
				+    // Try to parse a new synonym match at the current token:
			
 
				+
			
 
				+    if (parse()) {
			
 
				+      // A new match was found:
			
 
				+      releaseBufferedToken();
			
 
				+      //System.out.println("  syn: after parse, ret buffered=" + this);
			
 
				+      assert liveToken == false;
			
 
				+      return true;
			
 
				+    }
			
 
				+
			
 
				+    if (lookaheadNextRead == lookaheadNextWrite) {
			
 
				+
			
 
				+      // Fast path: parse pulled one token, but it didn't match
			
 
				+      // the start for any synonym, so we now return it "live" w/o having
			
 
				+      // cloned all of its atts:
			
 
				+      if (finished) {
			
 
				+        //System.out.println("  syn: ret END");
			
 
				+        return false;
			
 
				+      }
			
 
				+
			
 
				+      assert liveToken;
			
 
				+      liveToken = false;
			
 
				+
			
 
				+      // NOTE: no need to change posInc since it's relative, i.e. whatever
			
 
				+      // node our output is upto will just increase by the incoming posInc.
			
 
				+      // We also don't need to change posLen, but only because we cannot
			
 
				+      // consume a graph, so the incoming token can never span a future
			
 
				+      // synonym match.
			
 
				+
			
 
				+    } else {
			
 
				+      // We still have buffered lookahead tokens from a previous
			
 
				+      // parse attempt that required lookahead; just replay them now:
			
 
				+      //System.out.println("  restore buffer");
			
 
				+      assert lookaheadNextRead < lookaheadNextWrite: "read=" + lookaheadNextRead + " write=" + lookaheadNextWrite;
			
 
				+      BufferedInputToken token = lookahead.get(lookaheadNextRead);
			
 
				+      lookaheadNextRead++;
			
 
				+
			
 
				+      restoreState(token.state);
			
 
				+
			
 
				+      lookahead.freeBefore(lookaheadNextRead);
			
 
				+
			
 
				+      //System.out.println("  after restore offset=" + offsetAtt.startOffset() + "-" + offsetAtt.endOffset());
			
 
				+      assert liveToken == false;
			
 
				+    }
			
 
				+
			
 
				+    lastNodeOut += posIncrAtt.getPositionIncrement();
			
 
				+    nextNodeOut = lastNodeOut + posLenAtt.getPositionLength();
			
 
				+
			
 
				+    //System.out.println("  syn: ret lookahead=" + this);
			
 
				+
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+  private void releaseBufferedToken() throws IOException {
			
 
				+    //System.out.println("  releaseBufferedToken");
			
 
				+
			
 
				+    BufferedOutputToken token = outputBuffer.pollFirst();
			
 
				+
			
 
				+    if (token.state != null) {
			
 
				+      // This is an original input token (keepOrig=true case):
			
 
				+      //System.out.println("    hasState");
			
 
				+      restoreState(token.state);
			
 
				+      //System.out.println("    startOffset=" + offsetAtt.startOffset() + " endOffset=" + offsetAtt.endOffset());
			
 
				+    } else {
			
 
				+      clearAttributes();
			
 
				+      //System.out.println("    no state");
			
 
				+      termAtt.append(token.term);
			
 
				+
			
 
				+      // We better have a match already:
			
 
				+      assert matchStartOffset != -1;
			
 
				+
			
 
				+      offsetAtt.setOffset(matchStartOffset, matchEndOffset);
			
 
				+      //System.out.println("    startOffset=" + matchStartOffset + " endOffset=" + matchEndOffset);
			
 
				+      typeAtt.setType(TYPE_SYNONYM);
			
 
				+    }
			
 
				+
			
 
				+    //System.out.println("    lastNodeOut=" + lastNodeOut);
			
 
				+    //System.out.println("    term=" + termAtt);
			
 
				+
			
 
				+    posIncrAtt.setPositionIncrement(token.startNode - lastNodeOut);
			
 
				+    lastNodeOut = token.startNode;
			
 
				+    posLenAtt.setPositionLength(token.endNode - token.startNode);
			
 
				+  }
			
 
				+
			
 
				+  /** Scans the next input token(s) to see if a synonym matches.  Returns true
			
 
				+   *  if a match was found. */
			
 
				+  private boolean parse() throws IOException {
			
 
				+    // System.out.println(Thread.currentThread().getName() + ": S: parse: " + System.identityHashCode(this));
			
 
				+
			
 
				+    // Holds the longest match we've seen so far:
			
 
				+    BytesRef matchOutput = null;
			
 
				+    int matchInputLength = 0;
			
 
				+
			
 
				+    BytesRef pendingOutput = fst.outputs.getNoOutput();
			
 
				+    fst.getFirstArc(scratchArc);
			
 
				+
			
 
				+    assert scratchArc.output() == fst.outputs.getNoOutput();
			
 
				+
			
 
				+    // How many tokens in the current match
			
 
				+    int matchLength = 0;
			
 
				+    boolean doFinalCapture = false;
			
 
				+
			
 
				+    int lookaheadUpto = lookaheadNextRead;
			
 
				+    matchStartOffset = -1;
			
 
				+
			
 
				+    byToken:
			
 
				+    while (true) {
			
 
				+      //System.out.println("  cycle lookaheadUpto=" + lookaheadUpto + " maxPos=" + lookahead.getMaxPos());
			
 
				+      
			
 
				+      // Pull next token's chars:
			
 
				+      final char[] buffer;
			
 
				+      final int bufferLen;
			
 
				+      final int inputEndOffset;
			
 
				+
			
 
				+      if (lookaheadUpto <= lookahead.getMaxPos()) {
			
 
				+        // Still in our lookahead buffer
			
 
				+        BufferedInputToken token = lookahead.get(lookaheadUpto);
			
 
				+        lookaheadUpto++;
			
 
				+        buffer = token.term.chars();
			
 
				+        bufferLen = token.term.length();
			
 
				+        inputEndOffset = token.endOffset;
			
 
				+        //System.out.println("    use buffer now max=" + lookahead.getMaxPos());
			
 
				+        if (matchStartOffset == -1) {
			
 
				+          matchStartOffset = token.startOffset;
			
 
				+        }
			
 
				+      } else {
			
 
				+
			
 
				+        // We used up our lookahead buffer of input tokens
			
 
				+        // -- pull next real input token:
			
 
				+
			
 
				+        assert finished || liveToken == false;
			
 
				+
			
 
				+        if (finished) {
			
 
				+          //System.out.println("    break: finished");
			
 
				+          break;
			
 
				+        } else if (input.incrementToken()) {
			
 
				+          //System.out.println("    input.incrToken");
			
 
				+          liveToken = true;
			
 
				+          buffer = termAtt.buffer();
			
 
				+          bufferLen = termAtt.length();
			
 
				+          if (matchStartOffset == -1) {
			
 
				+            matchStartOffset = offsetAtt.startOffset();
			
 
				+          }
			
 
				+          inputEndOffset = offsetAtt.endOffset();
			
 
				+
			
 
				+          lookaheadUpto++;
			
 
				+        } else {
			
 
				+          // No more input tokens
			
 
				+          finished = true;
			
 
				+          //System.out.println("    break: now set finished");
			
 
				+          break;
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      matchLength++;
			
 
				+      //System.out.println("    cycle term=" + new String(buffer, 0, bufferLen));
			
 
				+
			
 
				+      // Run each char in this token through the FST:
			
 
				+      int bufUpto = 0;
			
 
				+      while (bufUpto < bufferLen) {
			
 
				+        final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen);
			
 
				+        if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null) {
			
 
				+          break byToken;
			
 
				+        }
			
 
				+
			
 
				+        // Accum the output
			
 
				+        pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output());
			
 
				+        bufUpto += Character.charCount(codePoint);
			
 
				+      }
			
 
				+
			
 
				+      assert bufUpto == bufferLen;
			
 
				+
			
 
				+      // OK, entire token matched; now see if this is a final
			
 
				+      // state in the FST (a match):
			
 
				+      if (scratchArc.isFinal()) {
			
 
				+        matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput());
			
 
				+        matchInputLength = matchLength;
			
 
				+        matchEndOffset = inputEndOffset;
			
 
				+        //System.out.println("    ** match");
			
 
				+      }
			
 
				+
			
 
				+      // See if the FST can continue matching (ie, needs to
			
 
				+      // see the next input token):
			
 
				+      if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) == null) {
			
 
				+        // No further rules can match here; we're done
			
 
				+        // searching for matching rules starting at the
			
 
				+        // current input position.
			
 
				+        break;
			
 
				+      } else {
			
 
				+        // More matching is possible -- accum the output (if
			
 
				+        // any) of the WORD_SEP arc:
			
 
				+        pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output());
			
 
				+        doFinalCapture = true;
			
 
				+        if (liveToken) {
			
 
				+          capture();
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (doFinalCapture && liveToken && finished == false) {
			
 
				+      // Must capture the final token if we captured any prior tokens:
			
 
				+      capture();
			
 
				+    }
			
 
				+
			
 
				+    if (matchOutput != null) {
			
 
				+
			
 
				+      if (liveToken) {
			
 
				+        // Single input token synonym; we must buffer it now:
			
 
				+        capture();
			
 
				+      }
			
 
				+
			
 
				+      // There is a match!
			
 
				+      bufferOutputTokens(matchOutput, matchInputLength);
			
 
				+      lookaheadNextRead += matchInputLength;
			
 
				+      //System.out.println("  precmatch; set lookaheadNextRead=" + lookaheadNextRead + " now max=" + lookahead.getMaxPos());
			
 
				+      lookahead.freeBefore(lookaheadNextRead);
			
 
				+      //System.out.println("  match; set lookaheadNextRead=" + lookaheadNextRead + " now max=" + lookahead.getMaxPos());
			
 
				+      return true;
			
 
				+    } else {
			
 
				+      //System.out.println("  no match; lookaheadNextRead=" + lookaheadNextRead);
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    //System.out.println("  parse done inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite);
			
 
				+  }
			
 
				+
			
 
				+  /** Expands the output graph into the necessary tokens, adding
			
 
				+   *  synonyms as side paths parallel to the input tokens, and
			
 
				+   *  buffers them in the output token buffer. */
			
 
				+  private void bufferOutputTokens(BytesRef bytes, int matchInputLength) {
			
 
				+    bytesReader.reset(bytes.bytes, bytes.offset, bytes.length);
			
 
				+
			
 
				+    final int code = bytesReader.readVInt();
			
 
				+    final boolean keepOrig = (code & 0x1) == 0;
			
 
				+    //System.out.println("  buffer: keepOrig=" + keepOrig + " matchInputLength=" + matchInputLength);
			
 
				+
			
 
				+    // How many nodes along all paths; we need this to assign the
			
 
				+    // node ID for the final end node where all paths merge back:
			
 
				+    int totalPathNodes;
			
 
				+    if (keepOrig) {
			
 
				+      assert matchInputLength > 0;
			
 
				+      totalPathNodes = matchInputLength - 1;
			
 
				+    } else {
			
 
				+      totalPathNodes = 0;
			
 
				+    }
			
 
				+
			
 
				+    // How many synonyms we will insert over this match:
			
 
				+    final int count = code >>> 1;
			
 
				+
			
 
				+    // TODO: we could encode this instead into the FST:
			
 
				+
			
 
				+    // 1st pass: count how many new nodes we need
			
 
				+    List<List<String>> paths = new ArrayList<>();
			
 
				+    for(int outputIDX=0;outputIDX<count;outputIDX++) {
			
 
				+      int wordID = bytesReader.readVInt();
			
 
				+      synonyms.words.get(wordID, scratchBytes);
			
 
				+      scratchChars.copyUTF8Bytes(scratchBytes);
			
 
				+      int lastStart = 0;
			
 
				+
			
 
				+      List<String> path = new ArrayList<>();
			
 
				+      paths.add(path);
			
 
				+      int chEnd = scratchChars.length();
			
 
				+      for(int chUpto=0; chUpto<=chEnd; chUpto++) {
			
 
				+        if (chUpto == chEnd || scratchChars.charAt(chUpto) == SynonymMap.WORD_SEPARATOR) {
			
 
				+          path.add(new String(scratchChars.chars(), lastStart, chUpto - lastStart));
			
 
				+          lastStart = 1 + chUpto;
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      assert path.size() > 0;
			
 
				+      totalPathNodes += path.size() - 1;
			
 
				+    }
			
 
				+    //System.out.println("  totalPathNodes=" + totalPathNodes);
			
 
				+
			
 
				+    // 2nd pass: buffer tokens for the graph fragment
			
 
				+
			
 
				+    // NOTE: totalPathNodes will be 0 in the case where the matched
			
 
				+    // input is a single token and all outputs are also a single token
			
 
				+
			
 
				+    // We "spawn" a side-path for each of the outputs for this matched
			
 
				+    // synonym, all ending back at this end node:
			
 
				+
			
 
				+    int startNode = nextNodeOut;
			
 
				+
			
 
				+    int endNode = startNode + totalPathNodes + 1;
			
 
				+    //System.out.println("  " + paths.size() + " new side-paths");
			
 
				+
			
 
				+    // First, fanout all tokens departing start node for these new side paths:
			
 
				+    int newNodeCount = 0;
			
 
				+    for(List<String> path : paths) {
			
 
				+      int pathEndNode;
			
 
				+      //System.out.println("    path size=" + path.size());
			
 
				+      if (path.size() == 1) {
			
 
				+        // Single token output, so there are no intermediate nodes:
			
 
				+        pathEndNode = endNode;
			
 
				+      } else {
			
 
				+        pathEndNode = nextNodeOut + newNodeCount + 1;
			
 
				+        newNodeCount += path.size() - 1;
			
 
				+      }
			
 
				+      outputBuffer.add(new BufferedOutputToken(null, path.get(0), startNode, pathEndNode));
			
 
				+    }
			
 
				+
			
 
				+    // We must do the original tokens last, else the offsets "go backwards":
			
 
				+    if (keepOrig) {
			
 
				+      BufferedInputToken token = lookahead.get(lookaheadNextRead);
			
 
				+      int inputEndNode;
			
 
				+      if (matchInputLength == 1) {
			
 
				+        // Single token matched input, so there are no intermediate nodes:
			
 
				+        inputEndNode = endNode;
			
 
				+      } else {
			
 
				+        inputEndNode = nextNodeOut + newNodeCount + 1;
			
 
				+      }
			
 
				+
			
 
				+      //System.out.println("    keepOrig first token: " + token.term);
			
 
				+
			
 
				+      outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), startNode, inputEndNode));
			
 
				+    }
			
 
				+
			
 
				+    nextNodeOut = endNode;
			
 
				+
			
 
				+    // Do full side-path for each syn output:
			
 
				+    for(int pathID=0;pathID<paths.size();pathID++) {
			
 
				+      List<String> path = paths.get(pathID);
			
 
				+      if (path.size() > 1) {
			
 
				+        int lastNode = outputBuffer.get(pathID).endNode;
			
 
				+        for(int i=1;i<path.size()-1;i++) {
			
 
				+          outputBuffer.add(new BufferedOutputToken(null, path.get(i), lastNode, lastNode+1));
			
 
				+          lastNode++;
			
 
				+        }
			
 
				+        outputBuffer.add(new BufferedOutputToken(null, path.get(path.size()-1), lastNode, endNode));
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (keepOrig && matchInputLength > 1) {
			
 
				+      // Do full "side path" with the original tokens:
			
 
				+      int lastNode = outputBuffer.get(paths.size()).endNode;
			
 
				+      for(int i=1;i<matchInputLength-1;i++) {
			
 
				+        BufferedInputToken token = lookahead.get(lookaheadNextRead + i);
			
 
				+        outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), lastNode, lastNode+1));
			
 
				+        lastNode++;
			
 
				+      }
			
 
				+      BufferedInputToken token = lookahead.get(lookaheadNextRead + matchInputLength - 1);
			
 
				+      outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), lastNode, endNode));
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+    System.out.println("  after buffer: " + outputBuffer.size() + " tokens:");
			
 
				+    for(BufferedOutputToken token : outputBuffer) {
			
 
				+      System.out.println("    tok: " + token.term + " startNode=" + token.startNode + " endNode=" + token.endNode);
			
 
				+    }
			
 
				+    */
			
 
				+  }
			
 
				+
			
 
				+  /** Buffers the current input token into lookahead buffer. */
			
 
				+  private void capture() {
			
 
				+    assert liveToken;
			
 
				+    liveToken = false;
			
 
				+    BufferedInputToken token = lookahead.get(lookaheadNextWrite);
			
 
				+    lookaheadNextWrite++;
			
 
				+
			
 
				+    token.state = captureState();
			
 
				+    token.startOffset = offsetAtt.startOffset();
			
 
				+    token.endOffset = offsetAtt.endOffset();
			
 
				+    assert token.term.length() == 0;
			
 
				+    token.term.append(termAtt);
			
 
				+
			
 
				+    captureCount++;
			
 
				+    maxLookaheadUsed = Math.max(maxLookaheadUsed, lookahead.getBufferSize());
			
 
				+    //System.out.println("  maxLookaheadUsed=" + maxLookaheadUsed);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void reset() throws IOException {
			
 
				+    super.reset();
			
 
				+    lookahead.reset();
			
 
				+    lookaheadNextWrite = 0;
			
 
				+    lookaheadNextRead = 0;
			
 
				+    captureCount = 0;
			
 
				+    lastNodeOut = -1;
			
 
				+    nextNodeOut = 0;
			
 
				+    matchStartOffset = -1;
			
 
				+    matchEndOffset = -1;
			
 
				+    finished = false;
			
 
				+    liveToken = false;
			
 
				+    outputBuffer.clear();
			
 
				+    maxLookaheadUsed = 0;
			
 
				+    //System.out.println("S: reset");
			
 
				+  }
			
 
				+
			
 
				+  void update(SynonymMap synonymMap) {
			
 
				+    this.synonyms = synonymMap;
			
 
				+    this.fst = synonyms.fst;
			
 
				+    if (fst == null) {
			
 
				+      throw new IllegalArgumentException("fst must be non-null");
			
 
				+    }
			
 
				+    this.fstReader = fst.getBytesReader();
			
 
				+    scratchArc = new FST.Arc<>();
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  // for testing
			
 
				+  int getCaptureCount() {
			
 
				+    return captureCount;
			
 
				+  }
			
 
				+
			
 
				+  // for testing
			
 
				+  int getMaxLookaheadUsed() {
			
 
				+    return maxLookaheadUsed;
			
 
				+  }
			
 
				+}
			
--- a/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/DynamicSynonymGraphTokenFilterFactory.java
+++ b/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/DynamicSynonymGraphTokenFilterFactory.java
@@ -0,0 +1,66 @@
 
				+package com.bellszhu.elasticsearch.plugin.synonym.analysis;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.List;
			
 
				+import java.util.function.Function;
			
 
				+
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				+import org.elasticsearch.index.analysis.AnalysisMode;
			
 
				+import org.elasticsearch.index.analysis.CharFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.TokenFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.TokenizerFactory;
			
 
				+
			
 
				+public class DynamicSynonymGraphTokenFilterFactory extends DynamicSynonymTokenFilterFactory {
			
 
				+
			
 
				+    public DynamicSynonymGraphTokenFilterFactory(
			
 
				+            IndexSettings indexSettings, Environment env, String name, Settings settings
			
 
				+    ) throws IOException {
			
 
				+        super(indexSettings,env, name, settings);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        throw new IllegalStateException(
			
 
				+                "Call createPerAnalyzerSynonymGraphFactory to specialize this factory for an analysis chain first"
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenFilterFactory getChainAwareTokenFilterFactory(
			
 
				+            TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
			
 
				+            List<TokenFilterFactory> previousTokenFilters,
			
 
				+            Function<String, TokenFilterFactory> allFilters
			
 
				+    ) {
			
 
				+        final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters);
			
 
				+        synonymMap = buildSynonyms(analyzer);
			
 
				+        final String name = name();
			
 
				+        return new TokenFilterFactory() {
			
 
				+            @Override
			
 
				+            public String name() {
			
 
				+                return name;
			
 
				+            }
			
 
				+
			
 
				+            @Override
			
 
				+            public TokenStream create(TokenStream tokenStream) {
			
 
				+                // fst is null means no synonyms
			
 
				+                if (synonymMap.fst == null) {
			
 
				+                    return tokenStream;
			
 
				+                }
			
 
				+                DynamicSynonymGraphFilter dynamicSynonymGraphFilter = new DynamicSynonymGraphFilter(
			
 
				+                        tokenStream, synonymMap, false);
			
 
				+                dynamicSynonymFilters.put(dynamicSynonymGraphFilter, 1);
			
 
				+
			
 
				+                return dynamicSynonymGraphFilter;
			
 
				+            }
			
 
				+
			
 
				+            @Override
			
 
				+            public AnalysisMode getAnalysisMode() {
			
 
				+                return analysisMode;
			
 
				+            }
			
 
				+        };
			
 
				+    }
			
 
				+}
			
--- a/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/DynamicSynonymTokenFilterFactory.java
+++ b/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/DynamicSynonymTokenFilterFactory.java
@@ -0,0 +1,207 @@
 
				+package com.bellszhu.elasticsearch.plugin.synonym.analysis;
			
 
				+
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+import java.util.WeakHashMap;
			
 
				+import java.util.concurrent.Executors;
			
 
				+import java.util.concurrent.ScheduledExecutorService;
			
 
				+import java.util.concurrent.ScheduledFuture;
			
 
				+import java.util.concurrent.TimeUnit;
			
 
				+import java.util.concurrent.atomic.AtomicInteger;
			
 
				+import java.util.function.Function;
			
 
				+
			
 
				+import org.apache.logging.log4j.LogManager;
			
 
				+import org.apache.logging.log4j.Logger;
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.TokenStream;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				+import org.elasticsearch.common.settings.Settings;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+import org.elasticsearch.index.IndexSettings;
			
 
				+import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.AnalysisMode;
			
 
				+import org.elasticsearch.index.analysis.CharFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.CustomAnalyzer;
			
 
				+import org.elasticsearch.index.analysis.TokenFilterFactory;
			
 
				+import org.elasticsearch.index.analysis.TokenizerFactory;
			
 
				+
			
 
				+/**
			
 
				+ * @author bellszhu
			
 
				+ */
			
 
				+public class DynamicSynonymTokenFilterFactory extends
			
 
				+        AbstractTokenFilterFactory {
			
 
				+
			
 
				+    private static final Logger logger = LogManager.getLogger("dynamic-synonym");
			
 
				+
			
 
				+    /**
			
 
				+     * Static id generator
			
 
				+     */
			
 
				+    private static final AtomicInteger id = new AtomicInteger(1);
			
 
				+    private static final ScheduledExecutorService pool = Executors.newScheduledThreadPool(1, r -> {
			
 
				+        Thread thread = new Thread(r);
			
 
				+        thread.setName("monitor-synonym-Thread-" + id.getAndAdd(1));
			
 
				+        return thread;
			
 
				+    });
			
 
				+    private volatile ScheduledFuture<?> scheduledFuture;
			
 
				+
			
 
				+    private final String location;
			
 
				+    private final boolean expand;
			
 
				+    private final boolean lenient;
			
 
				+    private final String format;
			
 
				+    private final int interval;
			
 
				+    protected SynonymMap synonymMap;
			
 
				+    protected Map<AbsSynonymFilter, Integer> dynamicSynonymFilters = new WeakHashMap<>();
			
 
				+    protected final Environment environment;
			
 
				+    protected final AnalysisMode analysisMode;
			
 
				+
			
 
				+    public DynamicSynonymTokenFilterFactory(
			
 
				+            IndexSettings indexSettings,
			
 
				+            Environment env,
			
 
				+            String name,
			
 
				+            Settings settings
			
 
				+    ) throws IOException {
			
 
				+        super(indexSettings,name, settings);
			
 
				+
			
 
				+        this.location = settings.get("synonyms_path");
			
 
				+        if (this.location == null) {
			
 
				+            throw new IllegalArgumentException(
			
 
				+                    "dynamic synonym requires `synonyms_path` to be configured");
			
 
				+        }
			
 
				+        if (settings.get("ignore_case") != null) {
			
 
				+        }
			
 
				+
			
 
				+        this.interval = settings.getAsInt("interval", 60);
			
 
				+        this.expand = settings.getAsBoolean("expand", true);
			
 
				+        this.lenient = settings.getAsBoolean("lenient", false);
			
 
				+        this.format = settings.get("format", "");
			
 
				+        boolean updateable = settings.getAsBoolean("updateable", false);
			
 
				+        this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
			
 
				+        this.environment = env;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public AnalysisMode getAnalysisMode() {
			
 
				+        return this.analysisMode;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    @Override
			
 
				+    public TokenStream create(TokenStream tokenStream) {
			
 
				+        throw new IllegalStateException(
			
 
				+                "Call getChainAwareTokenFilterFactory to specialize this factory for an analysis chain first");
			
 
				+    }
			
 
				+
			
 
				+    public TokenFilterFactory getChainAwareTokenFilterFactory(
			
 
				+            TokenizerFactory tokenizer,
			
 
				+            List<CharFilterFactory> charFilters,
			
 
				+            List<TokenFilterFactory> previousTokenFilters,
			
 
				+            Function<String, TokenFilterFactory> allFilters
			
 
				+    ) {
			
 
				+        final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters);
			
 
				+        synonymMap = buildSynonyms(analyzer);
			
 
				+        final String name = name();
			
 
				+        return new TokenFilterFactory() {
			
 
				+            @Override
			
 
				+            public String name() {
			
 
				+                return name;
			
 
				+            }
			
 
				+
			
 
				+            @Override
			
 
				+            public TokenStream create(TokenStream tokenStream) {
			
 
				+                // fst is null means no synonyms
			
 
				+                if (synonymMap.fst == null) {
			
 
				+                    return tokenStream;
			
 
				+                }
			
 
				+                DynamicSynonymFilter dynamicSynonymFilter = new DynamicSynonymFilter(tokenStream, synonymMap, false);
			
 
				+                dynamicSynonymFilters.put(dynamicSynonymFilter, 1);
			
 
				+
			
 
				+                return dynamicSynonymFilter;
			
 
				+            }
			
 
				+
			
 
				+            @Override
			
 
				+            public TokenFilterFactory getSynonymFilter() {
			
 
				+                // In order to allow chained synonym filters, we return IDENTITY here to
			
 
				+                // ensure that synonyms don't get applied to the synonym map itself,
			
 
				+                // which doesn't support stacked input tokens
			
 
				+                return IDENTITY_FILTER;
			
 
				+            }
			
 
				+
			
 
				+            @Override
			
 
				+            public AnalysisMode getAnalysisMode() {
			
 
				+                return analysisMode;
			
 
				+            }
			
 
				+        };
			
 
				+    }
			
 
				+
			
 
				+    Analyzer buildSynonymAnalyzer(
			
 
				+            TokenizerFactory tokenizer,
			
 
				+            List<CharFilterFactory> charFilters,
			
 
				+            List<TokenFilterFactory> tokenFilters
			
 
				+    ) {
			
 
				+        return new CustomAnalyzer(
			
 
				+                tokenizer,
			
 
				+                charFilters.toArray(new CharFilterFactory[0]),
			
 
				+                tokenFilters.stream().map(TokenFilterFactory::getSynonymFilter).toArray(TokenFilterFactory[]::new)
			
 
				+        );
			
 
				+    }
			
 
				+
			
 
				+    SynonymMap buildSynonyms(Analyzer analyzer) {
			
 
				+        try {
			
 
				+            return getSynonymFile(analyzer).reloadSynonymMap();
			
 
				+        } catch (Exception e) {
			
 
				+            logger.error("failed to build synonyms", e);
			
 
				+            throw new IllegalArgumentException("failed to build synonyms", e);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    SynonymFile getSynonymFile(Analyzer analyzer) {
			
 
				+        try {
			
 
				+            SynonymFile synonymFile;
			
 
				+            if (location.startsWith("http://") || location.startsWith("https://")) {
			
 
				+                synonymFile = new RemoteSynonymFile(
			
 
				+                        environment, analyzer, expand, lenient,  format, location);
			
 
				+            } else {
			
 
				+                synonymFile = new LocalSynonymFile(
			
 
				+                        environment, analyzer, expand, lenient, format, location);
			
 
				+            }
			
 
				+            if (scheduledFuture == null) {
			
 
				+                scheduledFuture = pool.scheduleAtFixedRate(new Monitor(synonymFile),
			
 
				+                                interval, interval, TimeUnit.SECONDS);
			
 
				+            }
			
 
				+            return synonymFile;
			
 
				+        } catch (Exception e) {
			
 
				+            logger.error("failed to get synonyms: " + location, e);
			
 
				+            throw new IllegalArgumentException("failed to get synonyms : " + location, e);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    public class Monitor implements Runnable {
			
 
				+
			
 
				+        private SynonymFile synonymFile;
			
 
				+
			
 
				+        Monitor(SynonymFile synonymFile) {
			
 
				+            this.synonymFile = synonymFile;
			
 
				+        }
			
 
				+
			
 
				+        @Override
			
 
				+        public void run() {
			
 
				+            try {
			
 
				+                logger.info("===== Monitor =======");
			
 
				+                if (synonymFile.isNeedReloadSynonymMap()) {
			
 
				+                    synonymMap = synonymFile.reloadSynonymMap();
			
 
				+                    for (AbsSynonymFilter dynamicSynonymFilter : dynamicSynonymFilters.keySet()) {
			
 
				+                        dynamicSynonymFilter.update(synonymMap);
			
 
				+                        logger.debug("success reload synonym");
			
 
				+                    }
			
 
				+                }
			
 
				+            } catch (Exception e) {
			
 
				+                logger.info("Monitor error", e);
			
 
				+//                e.printStackTrace();
			
 
				+                logger.error(e);
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+}
			
--- a/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/LocalSynonymFile.java
+++ b/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/LocalSynonymFile.java
@@ -0,0 +1,164 @@
 
				+/**
			
 
				+ *
			
 
				+ */
			
 
				+package com.bellszhu.elasticsearch.plugin.synonym.analysis;
			
 
				+
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.File;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStreamReader;
			
 
				+import java.io.Reader;
			
 
				+import java.io.StringReader;
			
 
				+import java.nio.charset.StandardCharsets;
			
 
				+import java.nio.file.Files;
			
 
				+import java.nio.file.Path;
			
 
				+
			
 
				+import org.apache.logging.log4j.LogManager;
			
 
				+import org.apache.logging.log4j.Logger;
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * @author bellszhu
			
 
				+ */
			
 
				+public class LocalSynonymFile implements SynonymFile {
			
 
				+
			
 
				+    private static final Logger logger = LogManager.getLogger("dynamic-synonym");
			
 
				+
			
 
				+    private String format;
			
 
				+
			
 
				+    private boolean expand;
			
 
				+
			
 
				+    private boolean lenient;
			
 
				+
			
 
				+    private Analyzer analyzer;
			
 
				+
			
 
				+    private Environment env;
			
 
				+
			
 
				+    /**
			
 
				+     * Local file path relative to the config directory
			
 
				+     */
			
 
				+    private String location;
			
 
				+
			
 
				+    private Path synonymFilePath;
			
 
				+
			
 
				+    private long lastModified;
			
 
				+
			
 
				+    LocalSynonymFile(Environment env, Analyzer analyzer, boolean expand, boolean lenient,
			
 
				+                     String format, String location) {
			
 
				+        this.analyzer = analyzer;
			
 
				+        this.expand = expand;
			
 
				+        this.lenient = lenient;
			
 
				+        this.format = format;
			
 
				+        this.env = env;
			
 
				+        this.location = location;
			
 
				+
			
 
				+        this.synonymFilePath = deepSearch();
			
 
				+        isNeedReloadSynonymMap();
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public SynonymMap reloadSynonymMap() {
			
 
				+        try {
			
 
				+            logger.debug("start reload local synonym from {}.", synonymFilePath);
			
 
				+            Reader rulesReader = getReader();
			
 
				+            SynonymMap.Builder parser = RemoteSynonymFile.getSynonymParser(
			
 
				+                    rulesReader, format, expand, lenient, analyzer);
			
 
				+            return parser.build();
			
 
				+        } catch (Exception e) {
			
 
				+            logger.error("reload local synonym {} error!", synonymFilePath, e);
			
 
				+            throw new IllegalArgumentException(
			
 
				+                    "could not reload local synonyms file to build synonyms", e);
			
 
				+        }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				+    Just deleted when reading the file, Returns empty synonym
			
 
				+      keyword if file not exists.
			
 
				+    A small probability event.
			
 
				+    */
			
 
				+    public Reader getReader() {
			
 
				+        if (!Files.exists(synonymFilePath)) {
			
 
				+            return new StringReader("");
			
 
				+        }
			
 
				+        try (BufferedReader br = new BufferedReader(new InputStreamReader(
			
 
				+                synonymFilePath.toUri().toURL().openStream(), StandardCharsets.UTF_8))) {
			
 
				+            StringBuilder sb = new StringBuilder();
			
 
				+            String line;
			
 
				+            while ((line = br.readLine()) != null) {
			
 
				+                // logger.info("reload local synonym: {}", line);
			
 
				+                sb.append(line).append(System.getProperty("line.separator"));
			
 
				+            }
			
 
				+            return new StringReader(sb.toString());
			
 
				+        } catch (IOException e) {
			
 
				+            logger.error("get local synonym reader {} error!", location, e);
			
 
				+//            throw new IllegalArgumentException(
			
 
				+//                    "IOException while reading local synonyms file", e);
			
 
				+//            Fix #54 Returns blank if synonym file has be deleted.
			
 
				+            return new StringReader("");
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public boolean isNeedReloadSynonymMap() {
			
 
				+        try {
			
 
				+            /*
			
 
				+            If the file does not exist, it will be scanned every time
			
 
				+              until the file is restored.
			
 
				+             */
			
 
				+            if (!Files.exists(synonymFilePath) && !Files.exists(synonymFilePath = deepSearch())) {
			
 
				+                return false;
			
 
				+            }
			
 
				+            File synonymFile = synonymFilePath.toFile();
			
 
				+            if (synonymFile.exists()
			
 
				+                    && lastModified < synonymFile.lastModified()) {
			
 
				+                lastModified = synonymFile.lastModified();
			
 
				+                return true;
			
 
				+            }
			
 
				+        } catch (Exception e) {
			
 
				+            logger.error("check need reload local synonym {} error!", location, e);
			
 
				+        }
			
 
				+
			
 
				+        return false;
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Deep search synonym file.
			
 
				+     * Step 1. Query the 'sysnonym_path' parameter as an absolute path
			
 
				+     * Step 2. Query the es config path
			
 
				+     * Step 3. Query in current relative path
			
 
				+     * <p>
			
 
				+     * Override this method to expend search path
			
 
				+     *
			
 
				+     * @return the synonym path.
			
 
				+     */
			
 
				+    protected Path deepSearch() {
			
 
				+        return env.configFile().resolve(location);
			
 
				+//        // TODO
			
 
				+//        SpecialPermission.check();
			
 
				+//        return AccessController.doPrivileged((PrivilegedAction<Path>) () -> {
			
 
				+//            return env.configFile().resolve(location);
			
 
				+////            // access denied：java.io.FilePermission
			
 
				+////            Path path;
			
 
				+////            // Load setting config as absolute path
			
 
				+////            if (Files.exists(Paths.get(location))) { // access denied：java.io.FilePermission
			
 
				+////                path = Paths.get(location);
			
 
				+////                // Load from setting config path
			
 
				+////            } else if (Files.exists(env.configFile().resolve(location))) {
			
 
				+////                path = env.configFile().resolve(location);
			
 
				+////                // Load from current relative path
			
 
				+////            } else {
			
 
				+////                URL url = getClass().getClassLoader().getResource(location);
			
 
				+////                if (url != null) {
			
 
				+////                    path = Paths.get(url.getFile());
			
 
				+////                } else {
			
 
				+////                    path = env.configFile().resolve(location);
			
 
				+////                }
			
 
				+////            }
			
 
				+////            return path;
			
 
				+//        });
			
 
				+    }
			
 
				+}
			
--- a/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/RemoteSynonymFile.java
+++ b/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/RemoteSynonymFile.java
@@ -0,0 +1,240 @@
 
				+/**
			
 
				+ *
			
 
				+ */
			
 
				+package com.bellszhu.elasticsearch.plugin.synonym.analysis;
			
 
				+
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStreamReader;
			
 
				+import java.io.Reader;
			
 
				+import java.io.StringReader;
			
 
				+import java.text.ParseException;
			
 
				+import java.util.concurrent.TimeUnit;
			
 
				+
			
 
				+import org.apache.hc.client5.http.classic.methods.HttpGet;
			
 
				+import org.apache.hc.client5.http.classic.methods.HttpHead;
			
 
				+import org.apache.hc.client5.http.classic.methods.HttpUriRequest;
			
 
				+import org.apache.hc.client5.http.config.RequestConfig;
			
 
				+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
			
 
				+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
			
 
				+import org.apache.hc.client5.http.impl.classic.HttpClients;
			
 
				+import org.apache.hc.core5.http.message.StatusLine;
			
 
				+import org.apache.logging.log4j.LogManager;
			
 
				+import org.apache.logging.log4j.Logger;
			
 
				+import org.apache.lucene.analysis.Analyzer;
			
 
				+import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				+import org.elasticsearch.analysis.common.ESSolrSynonymParser;
			
 
				+import org.elasticsearch.analysis.common.ESWordnetSynonymParser;
			
 
				+import org.elasticsearch.env.Environment;
			
 
				+
			
 
				+/**
			
 
				+ * @author bellszhu
			
 
				+ */
			
 
				+public class RemoteSynonymFile implements SynonymFile {
			
 
				+
			
 
				+    private static final String LAST_MODIFIED_HEADER = "Last-Modified";
			
 
				+    private static final String ETAG_HEADER = "ETag";
			
 
				+
			
 
				+    private static final Logger logger = LogManager.getLogger("dynamic-synonym");
			
 
				+
			
 
				+    private CloseableHttpClient httpclient;
			
 
				+
			
 
				+    private String format;
			
 
				+
			
 
				+    private boolean expand;
			
 
				+
			
 
				+    private boolean lenient;
			
 
				+
			
 
				+    private Analyzer analyzer;
			
 
				+
			
 
				+    private Environment env;
			
 
				+
			
 
				+    /**
			
 
				+     * Remote URL address
			
 
				+     */
			
 
				+    private String location;
			
 
				+
			
 
				+    private String lastModified;
			
 
				+
			
 
				+    private String eTags;
			
 
				+
			
 
				+    RemoteSynonymFile(Environment env, Analyzer analyzer,
			
 
				+                      boolean expand, boolean lenient, String format, String location) {
			
 
				+        this.analyzer = analyzer;
			
 
				+        this.expand = expand;
			
 
				+        this.lenient = lenient;
			
 
				+        this.format = format;
			
 
				+        this.env = env;
			
 
				+        this.location = location;
			
 
				+
			
 
				+        this.httpclient = HttpClients.createDefault();
			
 
				+
			
 
				+        isNeedReloadSynonymMap();
			
 
				+    }
			
 
				+
			
 
				+    static SynonymMap.Builder getSynonymParser(
			
 
				+            Reader rulesReader, String format, boolean expand, boolean lenient, Analyzer analyzer
			
 
				+    ) throws IOException, ParseException {
			
 
				+        SynonymMap.Builder parser;
			
 
				+        if ("wordnet".equalsIgnoreCase(format)) {
			
 
				+            parser = new ESWordnetSynonymParser(true, expand, lenient, analyzer);
			
 
				+            ((ESWordnetSynonymParser) parser).parse(rulesReader);
			
 
				+        } else {
			
 
				+            parser = new ESSolrSynonymParser(true, expand, lenient, analyzer);
			
 
				+            ((ESSolrSynonymParser) parser).parse(rulesReader);
			
 
				+        }
			
 
				+        return parser;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public SynonymMap reloadSynonymMap() {
			
 
				+        Reader rulesReader = null;
			
 
				+        try {
			
 
				+            logger.debug("start reload remote synonym from {}.", location);
			
 
				+            rulesReader = getReader();
			
 
				+            SynonymMap.Builder parser;
			
 
				+
			
 
				+            parser = getSynonymParser(rulesReader, format, expand, lenient, analyzer);
			
 
				+            return parser.build();
			
 
				+        } catch (Exception e) {
			
 
				+            logger.error("reload remote synonym {} error!", location, e);
			
 
				+            throw new IllegalArgumentException(
			
 
				+                    "could not reload remote synonyms file to build synonyms",
			
 
				+                    e);
			
 
				+        } finally {
			
 
				+            if (rulesReader != null) {
			
 
				+                try {
			
 
				+                    rulesReader.close();
			
 
				+                } catch (Exception e) {
			
 
				+                    logger.error("failed to close rulesReader", e);
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    private CloseableHttpResponse executeHttpRequest(HttpUriRequest httpUriRequest) {
			
 
				+        try {
			
 
				+            return httpclient.execute(httpUriRequest);
			
 
				+        } catch (IOException e) {
			
 
				+            logger.error("Unable to execute HTTP request.", e);
			
 
				+        }
			
 
				+        return null;
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Download custom terms from a remote server
			
 
				+     */
			
 
				+    public Reader getReader() {
			
 
				+        Reader reader;
			
 
				+        RequestConfig rc = RequestConfig.custom()
			
 
				+                .setConnectionRequestTimeout(10 * 1000, TimeUnit.MILLISECONDS)
			
 
				+                .setResponseTimeout(60 * 1000, TimeUnit.MILLISECONDS)
			
 
				+                .build();
			
 
				+        CloseableHttpResponse response = null;
			
 
				+        BufferedReader br = null;
			
 
				+        HttpGet get = new HttpGet(location);
			
 
				+        get.setConfig(rc);
			
 
				+        try {
			
 
				+            response = executeHttpRequest(get);
			
 
				+            assert response != null;
			
 
				+            StatusLine statusLine = new StatusLine(response);
			
 
				+            if (statusLine.getStatusCode() == 200) {
			
 
				+                String charset = "UTF-8"; // 获取编码，默认为utf-8
			
 
				+                if (response.getEntity().getContentType().contains("charset=")) {
			
 
				+                    String contentType = response.getEntity().getContentType();
			
 
				+                    charset = contentType.substring(contentType
			
 
				+                            .lastIndexOf('=') + 1);
			
 
				+                }
			
 
				+
			
 
				+                br = new BufferedReader(new InputStreamReader(response
			
 
				+                        .getEntity().getContent(), charset));
			
 
				+                StringBuilder sb = new StringBuilder();
			
 
				+                String line;
			
 
				+                while ((line = br.readLine()) != null) {
			
 
				+                    logger.debug("reload remote synonym: {}", line);
			
 
				+                    sb.append(line)
			
 
				+                            .append(System.getProperty("line.separator"));
			
 
				+                }
			
 
				+                reader = new StringReader(sb.toString());
			
 
				+            } else reader = new StringReader("");
			
 
				+        } catch (Exception e) {
			
 
				+            logger.error("get remote synonym reader {} error!", location, e);
			
 
				+//            throw new IllegalArgumentException(
			
 
				+//                    "Exception while reading remote synonyms file", e);
			
 
				+            // Fix #54 Returns blank if synonym file has be deleted.
			
 
				+            reader = new StringReader("1=>1");
			
 
				+        } finally {
			
 
				+            try {
			
 
				+                if (br != null) {
			
 
				+                    br.close();
			
 
				+                }
			
 
				+            } catch (IOException e) {
			
 
				+                logger.error("failed to close bufferedReader", e);
			
 
				+            }
			
 
				+            try {
			
 
				+                if (response != null) {
			
 
				+                    response.close();
			
 
				+                }
			
 
				+            } catch (IOException e) {
			
 
				+                logger.error("failed to close http response", e);
			
 
				+            }
			
 
				+        }
			
 
				+        return reader;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public boolean isNeedReloadSynonymMap() {
			
 
				+        logger.info("==== isNeedReloadSynonymMap ====");
			
 
				+        RequestConfig rc = RequestConfig.custom()
			
 
				+                .setConnectionRequestTimeout(10 * 1000, TimeUnit.MILLISECONDS)
			
 
				+                .setResponseTimeout(15 * 1000, TimeUnit.MILLISECONDS)
			
 
				+                .build();
			
 
				+        HttpHead head = new HttpHead(location);
			
 
				+        head.setConfig(rc);
			
 
				+
			
 
				+        // 设置请求头
			
 
				+        if (lastModified != null) {
			
 
				+            head.setHeader("If-Modified-Since", lastModified);
			
 
				+        }
			
 
				+        if (eTags != null) {
			
 
				+            head.setHeader("If-None-Match", eTags);
			
 
				+        }
			
 
				+
			
 
				+        CloseableHttpResponse response = null;
			
 
				+        try {
			
 
				+            response = executeHttpRequest(head);
			
 
				+            assert response != null;
			
 
				+            StatusLine statusLine = new StatusLine(response);
			
 
				+            if (statusLine.getStatusCode() == 200) { // 返回200 才做操作
			
 
				+                if (!response.getLastHeader(LAST_MODIFIED_HEADER).getValue()
			
 
				+                        .equalsIgnoreCase(lastModified)
			
 
				+                        || !response.getLastHeader(ETAG_HEADER).getValue()
			
 
				+                        .equalsIgnoreCase(eTags)) {
			
 
				+
			
 
				+                    lastModified = response.getLastHeader(LAST_MODIFIED_HEADER) == null ? null
			
 
				+                            : response.getLastHeader(LAST_MODIFIED_HEADER)
			
 
				+                            .getValue();
			
 
				+                    eTags = response.getLastHeader(ETAG_HEADER) == null ? null
			
 
				+                            : response.getLastHeader(ETAG_HEADER).getValue();
			
 
				+                    return true;
			
 
				+                }
			
 
				+            } else if (statusLine.getStatusCode() == 304) {
			
 
				+                return false;
			
 
				+            } else {
			
 
				+                logger.info("remote synonym {} return bad code {}", location,
			
 
				+                        statusLine.getStatusCode());
			
 
				+            }
			
 
				+        } catch (Exception e){
			
 
				+            return false;
			
 
				+        } finally {
			
 
				+            try {
			
 
				+                if (response != null) {
			
 
				+                    response.close();
			
 
				+                }
			
 
				+            } catch (IOException e) {
			
 
				+                logger.error("failed to close http response", e);
			
 
				+            }
			
 
				+        }
			
 
				+        return false;
			
 
				+    }
			
 
				+}
			
--- a/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/SynonymFile.java
+++ b/src/main/java/com/bellszhu/elasticsearch/plugin/synonym/analysis/SynonymFile.java
@@ -0,0 +1,21 @@
 
				+/**
			
 
				+ *
			
 
				+ */
			
 
				+package com.bellszhu.elasticsearch.plugin.synonym.analysis;
			
 
				+
			
 
				+import java.io.Reader;
			
 
				+
			
 
				+import org.apache.lucene.analysis.synonym.SynonymMap;
			
 
				+
			
 
				+/**
			
 
				+ * @author bellszhu
			
 
				+ */
			
 
				+public interface SynonymFile {
			
 
				+
			
 
				+    SynonymMap reloadSynonymMap();
			
 
				+
			
 
				+    boolean isNeedReloadSynonymMap();
			
 
				+
			
 
				+    Reader getReader();
			
 
				+
			
 
				+}
			
--- a/src/main/resources/plugin-descriptor.properties
+++ b/src/main/resources/plugin-descriptor.properties
@@ -0,0 +1,80 @@
 
				+# Elasticsearch plugin descriptor file
			
 
				+# This file must exist as 'plugin-descriptor.properties' at
			
 
				+# the root directory of all plugins.
			
 
				+#
			
 
				+# A plugin can be 'site', 'jvm', or both.
			
 
				+#
			
 
				+### example site plugin for "foo":
			
 
				+#
			
 
				+# foo.zip <-- zip file for the plugin, with this structure:
			
 
				+#   _site/ <-- the contents that will be served
			
 
				+#   plugin-descriptor.properties <-- example contents below:
			
 
				+#
			
 
				+# site=true
			
 
				+# description=My cool plugin
			
 
				+# version=1.0
			
 
				+#
			
 
				+### example jvm plugin for "foo"
			
 
				+#
			
 
				+# foo.zip <-- zip file for the plugin, with this structure:
			
 
				+#   <arbitrary name1>.jar <-- classes, resources, dependencies
			
 
				+#   <arbitrary nameN>.jar <-- any number of jars
			
 
				+#   plugin-descriptor.properties <-- example contents below:
			
 
				+#
			
 
				+# jvm=true
			
 
				+# classname=foo.bar.BazPlugin
			
 
				+# description=My cool plugin
			
 
				+# version=2.0.0-rc1
			
 
				+# elasticsearch.version=2.0
			
 
				+# java.version=1.7
			
 
				+#
			
 
				+### mandatory elements for all plugins:
			
 
				+#
			
 
				+# 'description': simple summary of the plugin
			
 
				+description=${project.description}
			
 
				+#
			
 
				+# 'version': plugin's version
			
 
				+version=${project.version}
			
 
				+#
			
 
				+# 'name': the plugin name
			
 
				+name=${elasticsearch.plugin.name}
			
 
				+
			
 
				+### mandatory elements for site plugins:
			
 
				+#
			
 
				+# 'site': set to true to indicate contents of the _site/
			
 
				+#  directory in the root of the plugin should be served.
			
 
				+# site=${elasticsearch.plugin.site}
			
 
				+#
			
 
				+### mandatory elements for jvm plugins :
			
 
				+#
			
 
				+# 'jvm': true if the 'classname' class should be loaded
			
 
				+#  from jar files in the root directory of the plugin.
			
 
				+#  Note that only jar files in the root directory are
			
 
				+#  added to the classpath for the plugin! If you need
			
 
				+#  other resources, package them into a resources jar.
			
 
				+# jvm=${elasticsearch.plugin.jvm}
			
 
				+#
			
 
				+# 'classname': the name of the class to load, fully-qualified.
			
 
				+classname=${elasticsearch.plugin.classname}
			
 
				+#
			
 
				+# 'java.version' version of java the code is built against
			
 
				+# use the system property java.specification.version
			
 
				+# version string must be a sequence of nonnegative decimal integers
			
 
				+# separated by "."'s and may have leading zeros
			
 
				+java.version=${maven.compiler.target}
			
 
				+#
			
 
				+# 'elasticsearch.version' version of elasticsearch compiled against
			
 
				+# You will have to release a new version of the plugin for each new
			
 
				+# elasticsearch release. This version is checked when the plugin
			
 
				+# is loaded so Elasticsearch will refuse to start in the presence of
			
 
				+# plugins with the incorrect elasticsearch.version.
			
 
				+elasticsearch.version=${elasticsearch.version}
			
 
				+#
			
 
				+### deprecated elements for jvm plugins :
			
 
				+#
			
 
				+# 'isolated': true if the plugin should have its own classloader.
			
 
				+# passing false is deprecated, and only intended to support plugins 
			
 
				+# that have hard dependencies against each other. If this is
			
 
				+# not specified, then the plugin is isolated by default.
			
 
				+# isolated=${elasticsearch.plugin.isolated}
			
 
				+#
			
--- a/src/main/resources/plugin-security.policy
+++ b/src/main/resources/plugin-security.policy
@@ -0,0 +1,3 @@
 
				+grant {
			
 
				+  permission java.net.SocketPermission "*", "connect,resolve";
			
 
				+};
			
--- a/ti_synonym.txt
+++ b/ti_synonym.txt