public class DBInterface
extends java.lang.Object
Constructor and Description |
---|
DBInterface(java.lang.String databaseName)
Constructor that connects to a database
|
Modifier and Type | Method and Description |
---|---|
void |
addDictionaryToTrial(int trialID,
int dictionaryID)
Adds this dictionary to this trial in this database
|
int |
addFile(java.io.File file)
Provides the ID for the file if it already exists in the file this database or creates a new ID and adds it
|
void |
addFileMatch(java.io.File source,
java.io.File target,
double minIDF,
double maxIDF)
Checks if files match and if so adds files and their normalized min and max IDF to the file_match table
|
void |
addManglerToTrial(int trialID,
int manglerID)
Adds the mangler ID to the trial in this database
|
void |
addNullManglerToTrial(int trialID)
Adds a "null" mangler to position 1 of the trial_mangler table
|
void |
addToFingerprintMatch(int trialID,
int sourceFingerprintID,
int targetFingerprintID,
boolean doesMatch,
boolean shouldMatch,
int score)
Adds fingerprint match information if the match is not already recorded in the fingerprint_match table of this
database
|
int |
addTokenizer(java.lang.String tokenizer)
Adds a tokenizer to the tokenizer table in this database
|
void |
addTrialResults(int trialID,
int manglerID,
int truePositive,
int trueNegative,
int falsePositive,
int falseNegative,
double precision,
double recall,
double fscore)
Adds the results from a trial to this database
|
void |
addTrialToExperiment(int experimentID,
int trialID)
Adds the trial ID to the experiment in this database
|
void |
bulkFileAdd(java.io.File file)
Adds a file to the file table of this database
|
void |
bulkFileMatchAdd(java.io.File file,
java.io.File match,
double minIDF,
double maxIDF)
Adds file matches and their normalized min and max IDF to the file_match table
|
void |
changeRandomSeed(int trialID,
long randomSeed)
Updates this database with the random seed to use for this trial
|
void |
clearExperimentResults(int experimentID)
Removes the experiment results from experiment_results table
|
void |
close()
Closes this database
|
int |
countResults(int trialID,
int sourceMangler,
int targetMangler,
boolean shouldMatch,
boolean doesMatch)
Counts the number of entries with the given manglers and matching results in the fingerprint_match table for a
trial
|
void |
executeFileMatch()
Adds file matches to database
|
void |
extractConfigFile(int experimentID,
java.lang.String config_file)
Extracts a configuration file from an experiment in this database
|
void |
extractDictionary(int trialID,
java.lang.String dict_file)
Extracts a dictionary from a trial in this database
|
boolean |
fileExists(java.io.File file)
Checks whether the file exists in the database
|
protected void |
finalize()
Closes this database
|
void |
finishBulkFileAdd()
Rebuild indexes for file_full_path table
|
void |
finishBulkFileMatchAdd()
Rebuild indexes for file_match table
|
void |
finishBulkFingerprintInsertion()
Rebuild indexes for fingerprint_file_id, fingerprint_trial_id, and fingerprint_mangler_id tables
|
void |
finishBulkFingerprintMatchInsertion()
Rebuilds indexes on the fingerprint_match tables
|
void |
finishRun(int runNumber)
Finishes an experiment run
|
java.util.Map<java.lang.Integer,byte[]> |
getAllFingerprintsFromTrial(int trialID)
Provides all the fingerprints and fingerprint IDs for a trial
|
java.util.Set<java.lang.Integer> |
getAllMatchingFileIDs(int fileID)
Provides all the file IDs of files that have matches
|
java.lang.String |
getExperimentDatasetPath(int experimentID)
Provides the path of the dataset used in an experiment
|
java.lang.String |
getExperimentDescription(int experimentID)
Provides the description for an experiment
|
double |
getExperimentDictionarySetting(int experimentID)
Provides the count or percent size of files used for the dictionary in an experiment
|
java.lang.String |
getExperimentFingerprinterName(int experimentID)
Provides the fingerprinter name used in an experiment
|
java.lang.String |
getExperimentLanguage(int experimentID)
Provides the language used in an experiment
|
java.util.List<java.lang.String> |
getExperimentManglerSettings(int experimentID)
Provides the mangler names and settings used in an experiment
|
java.lang.String |
getExperimentMatcherName(int experimentID)
Provides the matcher name used in an experiment
|
int |
getExperimentMatcherParameter(int experimentID)
Provides the minimum score used for the matcher in an experiment
|
double |
getExperimentMaximumIDF(int experimentID)
Provides the maximum normalized IDF for an experiment's dictionary
|
double |
getExperimentMinimumIDF(int experimentID)
Provides the minimum normalized IDF for an experiment's dictionary
|
double |
getExperimentSampleSetting(int experimentID)
Provides the count or percent size of the sample of files used for an experiment
|
java.util.List<java.lang.String> |
getExperimentTokenizers(int experimentID)
Provides the tokenizer names used in an experiment
|
int |
getExperimentTrialCount(int experimentID)
Provides the number of trials in an experiment
|
int |
getFileID(java.io.File file)
Provides the ID of the file or 0 if it is not in the this database
|
int |
getManglerID(java.lang.String manglerSettings)
Provides the ID for the existing mangler setting that matches or creates a new one
|
java.util.List<java.lang.Integer> |
getManglersForTrial(int trialID)
Provides all the mangler IDs for a trial
|
int |
getMatchingFileCount()
Provides the number of file matches
|
java.util.Map<java.lang.Integer,byte[]> |
getOriginalFingerprintsFromTrial(int trialID)
Provides the fingerprints and fingerprint IDs with null manglers for a trial
|
int |
getTokenizerID(java.lang.String tokenizer)
Provides the ID of the tokenizer or 0 if it is not in the tokenizer table of this database
|
java.util.Map<java.lang.Integer,java.lang.Boolean> |
getTrialFileIDs(int trialID)
Provides all the file IDs and whether they have matches in a trial
|
void |
insertExperimentResults(int experimentID,
int numTrials)
Inserts experiment results into database; computes average, standard deviation and error for precision, recall,
fscore and dictionary size
|
int |
insertFingerprint(java.io.File file,
byte[] fingerprint,
int trialID,
int manglerID)
Inserts fingerprint information into the database and returns the corresponding fingerprint ID
|
java.util.List<java.io.File> |
matches(java.io.File file)
Matches the given file against the files in this database
|
int |
newDictionary(Dictionary dictionary)
Inserts the compressed dictionary into blob space and stores the size and OID for it in the database
|
int |
newExperiment(int runNumber,
java.lang.String description,
java.lang.String dataset,
java.lang.String datasetPath,
java.lang.String configFileName,
java.lang.String fingerprinterName,
java.lang.String matcherName,
int matcherScore,
double minIDF,
double maxIDF,
double sampleParameter,
double dictionaryParameter,
java.lang.String language)
Creates a new experiment
|
int |
newTrial(long randomSeed)
Creates a new trial
|
void |
saveTokenizers(int experimentID,
java.util.List<java.lang.String> tokenizerVec)
Adds a list of tokenizers to an experiment
|
void |
setTrueMatches(int fingerprintID)
Sets all the matches for each fingerprint ID in this database
|
void |
startBulkFileAdd()
Drops all the indexes on the file table of this database
|
void |
startBulkFileMatchAdd()
Loads all the existing matches into a cache of file matches
|
void |
startBulkFingerprintInsertion()
Drops the indexes on the fingerprint_file_id, fingerprint_trial_id, and fingerprint_mangler_id tables
|
void |
startBulkFingerprintMatchInsertion(int trialID)
Drops the indexes on the fingerprint_match tables
|
int |
startNewRun()
Starts a new experiment run
|
public DBInterface(java.lang.String databaseName)
databaseName
- the string name of the database to connect topublic int startNewRun()
int
new run numberpublic void finishRun(int runNumber)
runNumber
- the int
run number to finishpublic void clearExperimentResults(int experimentID)
experimentID
- the int
experiment ID to removepublic void insertExperimentResults(int experimentID, int numTrials)
experimentID
- the int
experiment ID for results to insertnumTrials
- the int
number of trials runpublic void close()
protected void finalize()
finalize
in class java.lang.Object
public boolean fileExists(java.io.File file)
file
- the file to checkboolean
whether the file exists in the databasepublic int getFileID(java.io.File file)
file
- the file to checkint
file ID; returns 0 if file does not exist in databasepublic void startBulkFileMatchAdd()
public void bulkFileMatchAdd(java.io.File file, java.io.File match, double minIDF, double maxIDF)
file
- the matching filematch
- the matching fileminIDF
- the double
minimum normalized IDFmaxIDF
- the double
maximum normalized IDFpublic void finishBulkFileMatchAdd()
public void addFileMatch(java.io.File source, java.io.File target, double minIDF, double maxIDF)
source
- the file to check for matchtarget
- the file to check for matchminIDF
- the double
minimum normalized IDFmaxIDF
- the double
maximum normalized IDFpublic void executeFileMatch()
public void startBulkFileAdd()
public void bulkFileAdd(java.io.File file)
file
- the file to add to this databasepublic void finishBulkFileAdd()
public int addFile(java.io.File file)
file
- the file to add to the databaseint
file IDpublic java.util.List<java.io.File> matches(java.io.File file)
file
- the file to match against all other files in this databasepublic void changeRandomSeed(int trialID, long randomSeed)
trialID
- the int
trial IDrandomSeed
- the long
random seedpublic void addDictionaryToTrial(int trialID, int dictionaryID)
trialID
- the int
trial IDdictionaryID
- the int
dictionary IDpublic int newDictionary(Dictionary dictionary)
dictionary
- the trial dictionary to be inserted in this databaseint
dictionary IDpublic int newExperiment(int runNumber, java.lang.String description, java.lang.String dataset, java.lang.String datasetPath, java.lang.String configFileName, java.lang.String fingerprinterName, java.lang.String matcherName, int matcherScore, double minIDF, double maxIDF, double sampleParameter, double dictionaryParameter, java.lang.String language)
runNumber
- the int
run number for this experiment; 0 if not inserting in databasedescription
- the string human readable description of this experimentdataset
- the string name of the database to insert this experimentdatasetPath
- the path to the files to use for the dictionary and sample in this experimentconfigFileName
- the string filename of the configuration file for this experimentfingerprinterName
- the string name of the fingerprinter used in this experimentmatcherName
- the string name of the matcher used for fingerprint comparisons in this experimentmatcherScore
- the int
minimum score that the matcher considered a matchminIDF
- the double
minimum normalized IDF kept in the dictionarymaxIDF
- the double
maximum normalized IDF kept in the dictionarysampleParameter
- the double
containing the counts or percents used for the file sample size in this
experiment; numbers less than 1 are processed as percents, numbers greater than or equal to 1 are
processed as countsdictionaryParameter
- the double
containing the counts or percents used for dictionary creation in this experiment;
numbers less than 1 are processed as percents, numbers greater than or equal to 1 are processed as countslanguage
- the string language of the files used in this experimentint experiment ID; returns 0 if experiment does not exist in database
public int newTrial(long randomSeed)
randomSeed
- the long
value representing the random seed to use for the new trialint trial ID; returns 0 if trial does not exist in database
public int getMatchingFileCount()
int
count of file matchespublic int getTokenizerID(java.lang.String tokenizer)
tokenizer
- the string name of the tokenizerint
tokenizer ID; returns 0 if tokenizer does not exist in databasepublic int addTokenizer(java.lang.String tokenizer)
tokenizer
- the string name of the tokenizerint
tokenizer ID; returns 0 if tokenizer does not exist in databasepublic void saveTokenizers(int experimentID, java.util.List<java.lang.String> tokenizerVec)
experimentID
- the int
experiment ID to look up in the databasetokenizerVec
- the list of tokenizer stringspublic void startBulkFingerprintInsertion()
public void finishBulkFingerprintInsertion()
public int insertFingerprint(java.io.File file, byte[] fingerprint, int trialID, int manglerID)
file
- the file that was fingerprintedfingerprint
- the byte
array containing the fingerprint to inserttrialID
- the int
trial ID to insertmanglerID
- the int
mangler ID to insertint
fingerprint ID; returns 0 if fingerprint does not exist in databasepublic int getManglerID(java.lang.String manglerSettings)
manglerSettings
- the string mangler settingsint
mangler ID; returns 0 if mangler does not exist in databasepublic void addManglerToTrial(int trialID, int manglerID)
trialID
- the int
trial ID to insertmanglerID
- the int
mangler ID to insertpublic void addTrialToExperiment(int experimentID, int trialID)
experimentID
- the int
experiment ID to look up in the databasetrialID
- the int
trial ID to insertpublic void addNullManglerToTrial(int trialID)
trialID
- the int
trial ID to insertpublic java.util.List<java.lang.Integer> getManglersForTrial(int trialID)
trialID
- the int
trial ID to loadpublic java.util.Map<java.lang.Integer,java.lang.Boolean> getTrialFileIDs(int trialID)
trialID
- the int
trial ID to loadpublic java.util.Set<java.lang.Integer> getAllMatchingFileIDs(int fileID)
fileID
- the int
file ID to loadpublic java.util.Map<java.lang.Integer,byte[]> getOriginalFingerprintsFromTrial(int trialID)
trialID
- the int
trial ID to loadpublic java.util.Map<java.lang.Integer,byte[]> getAllFingerprintsFromTrial(int trialID)
trialID
- the int
trial ID to loadpublic void startBulkFingerprintMatchInsertion(int trialID)
trialID
- the int
trial ID to droppublic void addToFingerprintMatch(int trialID, int sourceFingerprintID, int targetFingerprintID, boolean doesMatch, boolean shouldMatch, int score)
trialID
- the int
trial ID to insert into the fingerprint_match tablesourceFingerprintID
- the int
fingerprint id to insert into the fingerprint_match tabletargetFingerprintID
- the int
fingerprint id to insert into the fingerprint_match tabledoesMatch
- the boolean
whether the experiment matcher found the two fingerprints to have matching filesshouldMatch
- the boolean
whether the two fingerprints have matching filespublic void finishBulkFingerprintMatchInsertion()
public void setTrueMatches(int fingerprintID)
fingerprintID
- the int
fingerprint ID to look up in the databasepublic int countResults(int trialID, int sourceMangler, int targetMangler, boolean shouldMatch, boolean doesMatch)
trialID
- the int
trial ID to load from the fingerprint_match tablesourceMangler
- the int
mangler id to load from the fingerprint_match tabletargetMangler
- the int
mangler id to load from the fingerprint_match tableshouldMatch
- the boolean
whether the two fingerprints have matching files (to load from the
fingerprint_match table)doesMatch
- the boolean
whether the experiment matcher found the two fingerprints to have matching files
(to load from the fingerprint_match table)int
number of results found for the given manglers and matching resultspublic void addTrialResults(int trialID, int manglerID, int truePositive, int trueNegative, int falsePositive, int falseNegative, double precision, double recall, double fscore)
trialID
- the int
trial ID to insertmanglerID
- the int
mangler ID to inserttruePositive
- the int
count of true positives for the mangler in this trialtrueNegative
- the int
count of true negatives for the mangler in this trialfalsePositive
- the int
count of false positives for the mangler in this trialfalseNegative
- the int
count of true negatives for the mangler in this trialprecision
- the double
precision for the mangler in this trialrecall
- the double
recall for the mangler in this trialfscore
- the double
fscore for the mangler in this trialpublic void extractDictionary(int trialID, java.lang.String dict_file)
trialID
- the int
trial ID to loaddict_file
- the string filename to write the dictionary topublic void extractConfigFile(int experimentID, java.lang.String config_file)
experimentID
- the int
experiment ID to loadconfig_file
- the string filename to write the configuration file topublic java.lang.String getExperimentDescription(int experimentID)
experimentID
- the int
experiment ID to look up in the databasepublic java.lang.String getExperimentDatasetPath(int experimentID)
experimentID
- the int
experiment ID to look up in the databasepublic java.lang.String getExperimentFingerprinterName(int experimentID)
experimentID
- the int
experiment ID to look up in the databasepublic java.lang.String getExperimentMatcherName(int experimentID)
experimentID
- the int
experiment ID to look up in the databasepublic int getExperimentMatcherParameter(int experimentID)
experimentID
- the int
experiment ID to look up in the databaseint
matcher minimum score parameterpublic double getExperimentMinimumIDF(int experimentID)
experimentID
- the int
experiment ID to look up in the databasedouble
minimum normalized IDF used in the dictionarypublic double getExperimentMaximumIDF(int experimentID)
experimentID
- the int
experiment ID to look up in the databasedouble
maximum normalized IDF used in the dictionarypublic double getExperimentSampleSetting(int experimentID)
experimentID
- the int
experiment ID to look up in the databasedouble
count or percent of files used; returns 0.0 if sample setting does not exist in
databasepublic double getExperimentDictionarySetting(int experimentID)
experimentID
- the int
experiment ID to look up in the databasedouble
count or percent of files used; returns 0.0 if dictionary setting does not exist in
databasepublic java.lang.String getExperimentLanguage(int experimentID)
experimentID
- the int
experiment ID to look up in the databasepublic java.util.List<java.lang.String> getExperimentManglerSettings(int experimentID)
experimentID
- the int
experiment ID to look up in the databasepublic java.util.List<java.lang.String> getExperimentTokenizers(int experimentID)
experimentID
- the int
experiment ID to look up in the databasepublic int getExperimentTrialCount(int experimentID)
experimentID
- the int
experiment ID to look up in the databaseint
number of trials