JAVA基于图片相似性算法实现以图搜图样例
JAVA基于图片相似性算法实现以图搜图样例
一、简述
本文主要讲如何利用图片相似性算法,基于LIRE来实现图片搜索。
二、依赖
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>6.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>6.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>6.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-math3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.sangupta/jopensurf -->
<dependency>
<groupId>com.sangupta</groupId>
<artifactId>jopensurf</artifactId>
<version>1.0.0</version>
</dependency>
</dependencies>
二、样例1
Data目录下存放所有图片的样本。
package com.dearcloud.imagesearch;
import net.semanticmetadata.lire.aggregators.AbstractAggregator;
import net.semanticmetadata.lire.aggregators.BOVW;
import net.semanticmetadata.lire.builders.DocumentBuilder;
import net.semanticmetadata.lire.imageanalysis.features.global.CEDD;
import net.semanticmetadata.lire.imageanalysis.features.local.opencvfeatures.CvSurfExtractor;
import net.semanticmetadata.lire.imageanalysis.features.local.simple.SimpleExtractor;
import net.semanticmetadata.lire.indexers.parallel.ParallelIndexer;
import net.semanticmetadata.lire.searchers.GenericFastImageSearcher;
import net.semanticmetadata.lire.searchers.ImageSearchHits;
import net.semanticmetadata.lire.searchers.ImageSearcher;
import net.semanticmetadata.lire.utils.FileUtils;
import net.semanticmetadata.lire.utils.ImageUtils;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.FSDirectory;
import javax.imageio.ImageIO;
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
public class IndexingAndSearchWithLocalFeatures {
public static void main(String[] args) throws IOException {
String indexPath = "D:\\以图搜图\\衬衣\\index";
String imageData = "D:\\以图搜图\\衬衣\\Data";
indexer(indexPath, imageData);
String searchImage = "D:\\以图搜图\\衬衣\\search\\timg.jpg";
String searchOutputFolder = "D:\\以图搜图\\衬衣\\output";
search(indexPath, searchImage, searchOutputFolder);
}
/**
* Indexing data using OpenCV and SURF as well as CEDD and SIMPLE.
* @param indexFolder
* @param imageDirectory
*/
private static void indexer(String indexFolder, String imageDirectory) {
// Checking if arg[0] is there and if it is a directory.
boolean passed = false;
// use ParallelIndexer to index all photos from args[0] into "index".
int numOfDocsForVocabulary = 500;
Class<? extends AbstractAggregator> aggregator = BOVW.class;
int[] numOfClusters = new int[]{128};
ParallelIndexer indexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexFolder, imageDirectory, numOfClusters, numOfDocsForVocabulary, aggregator);
indexer.setImagePreprocessor(image -> ImageUtils.createWorkingCopy(image));
//Local
indexer.addExtractor(CvSurfExtractor.class);
//Simple
indexer.addExtractor(CEDD.class, SimpleExtractor.KeypointDetector.CVSURF);
indexer.run();
System.out.println("Finished indexing.");
}
/**
* Linear search on the indexed data.
* @param indexPath
* @throws IOException
*/
public static void search(String indexPath, String searchFile, String searchOutputFolder) throws IOException {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
// make sure that this matches what you used for indexing (see below) ...
ImageSearcher imgSearcher = new GenericFastImageSearcher(1000, CEDD.class, SimpleExtractor.KeypointDetector.CVSURF, new BOVW(), 128, true, reader, indexPath + ".config");
// just a static example with a given image.
ImageSearchHits hits = imgSearcher.search(ImageIO.read(new File(searchFile)), reader);
for (int i = 0; i < hits.length(); i++) {
double score = hits.score(i);
String imagePath = reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
System.out.printf("%.2f: (%d) %s\n", score, hits.documentID(i), imagePath);
}
String outputHtmlReport = FileUtils.saveImageResultsToHtml("search-", hits, searchFile, reader);
System.out.println("Report:" + outputHtmlReport);
org.apache.commons.io.FileUtils.copyFile(org.apache.commons.io.FileUtils.getFile(outputHtmlReport), org.apache.commons.io.FileUtils.getFile(searchOutputFolder, outputHtmlReport));
}
}
三、样例2
1、Indexer
package com.dearcloud.imagesearch;
import net.semanticmetadata.lire.builders.GlobalDocumentBuilder;
import net.semanticmetadata.lire.imageanalysis.features.global.AutoColorCorrelogram;
import net.semanticmetadata.lire.imageanalysis.features.global.CEDD;
import net.semanticmetadata.lire.imageanalysis.features.global.FCTH;
import net.semanticmetadata.lire.utils.FileUtils;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Iterator;
public class LireIndexer {
public static void main(String[] args) throws IOException {
String indexPath = "D:\\以图搜图\\全部\\index";
String imageData = "D:\\以图搜图\\全部\\Data";
index(indexPath, imageData);
}
private static void index(String indexFolder, String imageDirectory) throws IOException {
// Getting all images from a directory and its sub directories.
ArrayList<String> images = FileUtils.getAllImages(new File(imageDirectory), true);
// Creating a CEDD document builder and indexing all files.
GlobalDocumentBuilder globalDocumentBuilder = new GlobalDocumentBuilder(false, false);
/*
If you want to use DocValues, which makes linear search much faster, then use.
However, you then have to use a specific searcher!
*/
// GlobalDocumentBuilder globalDocumentBuilder = new GlobalDocumentBuilder(false, true);
/*
Then add those features we want to extract in a single run:
*/
globalDocumentBuilder.addExtractor(CEDD.class);
globalDocumentBuilder.addExtractor(FCTH.class);
globalDocumentBuilder.addExtractor(AutoColorCorrelogram.class);
// Creating an Lucene IndexWriter
IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer());
IndexWriter iw = new IndexWriter(FSDirectory.open(Paths.get(indexFolder)), conf);
// Iterating through images building the low level features
for (Iterator<String> it = images.iterator(); it.hasNext(); ) {
String imageFilePath = it.next();
System.out.println("Indexing " + imageFilePath);
try {
BufferedImage img = ImageIO.read(new FileInputStream(imageFilePath));
if (img == null) continue;
Document document = globalDocumentBuilder.createDocument(img, imageFilePath);
iw.addDocument(document);
} catch (Exception e) {
System.err.println("Error reading image or indexing it.");
e.printStackTrace();
}
}
// closing the IndexWriter
iw.close();
System.out.println("Finished indexing.");
}
}
2、Searcher
package com.dearcloud.imagesearch;
import net.semanticmetadata.lire.builders.DocumentBuilder;
import net.semanticmetadata.lire.imageanalysis.features.global.CEDD;
import net.semanticmetadata.lire.searchers.GenericFastImageSearcher;
import net.semanticmetadata.lire.searchers.ImageSearchHits;
import net.semanticmetadata.lire.searchers.ImageSearcher;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.FSDirectory;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.nio.file.Paths;
public class LireSearcher {
public static void main(String[] args) throws IOException {
String indexPath = "D:\\以图搜图\\全部\\index";
String searchImage = "D:\\以图搜图\\全部\\search\\timg.jpg";
String searchOutputFolder = "D:\\以图搜图\\全部\\output";
search(indexPath, searchImage, searchOutputFolder);
}
private static void search(String indexFolder, String searchFile, String searchOutputFolder) throws IOException {
BufferedImage img = ImageIO.read(FileUtils.getFile(searchFile));
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexFolder)));
ImageSearcher searcher = new GenericFastImageSearcher(30, CEDD.class);
// ImageSearcher searcher = new GenericFastImageSearcher(30, AutoColorCorrelogram.class); // for another image descriptor ...
/*
If you used DocValues while Indexing, use the following searcher:
*/
// ImageSearcher searcher = new GenericDocValuesImageSearcher(30, CEDD.class, ir);
// searching with a image file ...
ImageSearchHits hits = searcher.search(img, reader);
// searching with a Lucene document instance ...
// ImageSearchHits hits = searcher.search(ir.document(0), ir);
for (int i = 0; i < hits.length(); i++) {
String fileName = reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
System.out.println(hits.score(i) + ": \t" + fileName);
}
String outputHtmlReport = net.semanticmetadata.lire.utils.FileUtils.saveImageResultsToHtml("search-", hits, searchFile, reader);
System.out.println("Report:" + outputHtmlReport);
org.apache.commons.io.FileUtils.copyFile(org.apache.commons.io.FileUtils.getFile(outputHtmlReport), org.apache.commons.io.FileUtils.getFile(searchOutputFolder, outputHtmlReport));
}
}
四、素材
五、LIRE支持的算法
JAVA基于图片相似性算法实现以图搜图样例
https://www.dearcloud.cn/2019/03/13/20200310-cnblogs-old-posts/20190313-JAVA基于图片相似性算法实现以图搜图样例/