Changeset 388
- Timestamp:
- 05/14/08 17:51:58 (4 years ago)
- Location:
- datacleaner/DataCleaner-core/trunk/src
- Files:
-
- 5 edited
-
main/java/dk/eobjects/datacleaner/profiler/pattern/PatternFinderProfile.java (modified) (3 diffs)
-
main/java/dk/eobjects/datacleaner/profiler/pattern/PatternRecognizer.java (modified) (5 diffs)
-
test/java/dk/eobjects/datacleaner/execution/ProfileRunnerTest.java (modified) (3 diffs)
-
test/java/dk/eobjects/datacleaner/profiler/pattern/PatternFinderProfileTest.java (modified) (1 diff)
-
test/java/dk/eobjects/datacleaner/profiler/pattern/PatternRecognizerTest.java (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
-
datacleaner/DataCleaner-core/trunk/src/main/java/dk/eobjects/datacleaner/profiler/pattern/PatternFinderProfile.java
r386 r388 26 26 import dk.eobjects.datacleaner.profiler.IMatrix; 27 27 import dk.eobjects.datacleaner.profiler.MatrixBuilder; 28 import dk.eobjects.datacleaner.profiler.MatrixValue; 29 import dk.eobjects.metamodel.data.IRowFilter; 28 30 import dk.eobjects.metamodel.data.Row; 29 31 import dk.eobjects.metamodel.schema.Column; … … 41 43 _patternRecognizers.put(column, patternRecognizer); 42 44 } 43 for (int i = 0; i < valueCount; i++) { 44 if (value != null) { 45 patternRecognizer.addInstance(value.toString()); 46 } 45 if (value != null) { 46 patternRecognizer.addInstance(value.toString(), valueCount); 47 47 } 48 48 } … … 52 52 List<IMatrix> result = new ArrayList<IMatrix>(); 53 53 54 for ( Column column : _columns) {54 for (final Column column : _columns) { 55 55 MatrixBuilder mb = new MatrixBuilder(); 56 56 mb.addColumn(column.getName()); 57 57 58 PatternRecognizer patternRecognizer = _patternRecognizers58 final PatternRecognizer patternRecognizer = _patternRecognizers 59 59 .get(column); 60 Map<String, Integer> patterns = patternRecognizer 61 .identifyPatterns(); 60 Map<String, Long> patterns = patternRecognizer.identifyPatterns(); 62 61 Set<String> keys = patterns.keySet(); 63 for (String patternName : keys) { 64 Integer patternCount = patterns.get(patternName); 65 mb.addRow(patternName, patternCount); 62 for (final String patternName : keys) { 63 Long patternCount = patterns.get(patternName); 64 MatrixValue[] matrixValues = mb.addRow(patternName, 65 patternCount); 66 MatrixValue mv = matrixValues[0]; 67 mv.setDetailSource(getBaseQuery(column)); 68 mv.addDetailRowFilter(new IRowFilter() { 69 70 public boolean accept(Row row) { 71 Object value = row.getValue(column); 72 if (value != null) { 73 return patternRecognizer.patternEquals(patternName, 74 value.toString()); 75 } 76 return false; 77 } 78 79 }); 66 80 } 67 81 if (!mb.isEmpty()) { -
datacleaner/DataCleaner-core/trunk/src/main/java/dk/eobjects/datacleaner/profiler/pattern/PatternRecognizer.java
r281 r388 28 28 private Map<String, PatternDefinition> _patternMap = new TreeMap<String, PatternDefinition>(); 29 29 30 public void addInstance(String string ) {30 public void addInstance(String string, long count) { 31 31 Token[] tokens = _tokenizer.tokenize(string); 32 String patternMapKey = toPattern(tokens); 33 34 PatternDefinition patternDefinition = _patternMap.get(patternMapKey); 35 if (patternDefinition == null) { 36 patternDefinition = new PatternDefinition(); 37 _patternMap.put(patternMapKey, patternDefinition); 38 } 39 patternDefinition.addInstanceData(tokens, count); 40 } 41 42 /** 43 * Creates a unique string representing this pattern's token-composition 44 */ 45 private static String toPattern(Token[] tokens) { 32 46 StringBuilder sb = new StringBuilder(); 33 47 for (int i = 0; i < tokens.length; i++) { … … 40 54 } 41 55 } 42 43 String patternMapKey = sb.toString(); 44 sb = null; 45 46 PatternDefinition patternDefinition = _patternMap.get(patternMapKey); 47 if (patternDefinition == null) { 48 patternDefinition = new PatternDefinition(); 49 _patternMap.put(patternMapKey, patternDefinition); 50 } 51 patternMapKey = null; 52 patternDefinition.addInstanceData(tokens); 56 return sb.toString(); 53 57 } 54 58 55 public Map<String, Integer> identifyPatterns() {56 Map<String, Integer> result = new LinkedHashMap<String, Integer>();59 public Map<String, Long> identifyPatterns() { 60 Map<String, Long> result = new LinkedHashMap<String, Long>(); 57 61 58 62 Collection<PatternDefinition> values = _patternMap.values(); … … 64 68 } 65 69 70 /** 71 * @param patternName 72 * @param value 73 * @return true if the value is among the instances that produced the 74 * patternName 75 */ 76 public boolean patternEquals(String patternName, String value) { 77 Token[] tokens = _tokenizer.tokenize(value); 78 String patternMapKey = toPattern(tokens); 79 PatternDefinition patternDefinition = _patternMap.get(patternMapKey); 80 if (patternDefinition != null) { 81 return patternDefinition.toString().equals(patternName); 82 } 83 return false; 84 } 85 86 /** 87 * Convenience class that holds string representations of each token in a 88 * pattern like "aaa" instead of "foo" or "999" instead of "432". Each 89 * string-representation will grow in size if new token instances appear, so 90 * the tokens "foo" and "foobar" will yield a "aaaaaa" string 91 * representation. 92 * 93 * Furthermore the pattern definition holds a simple counter to increment on 94 * each added observation 95 */ 66 96 private class PatternDefinition { 67 97 68 private int _count = 0;98 private long _count = 0l; 69 99 private String[] _symbols; 70 100 71 public void addInstanceData(Token[] tokens ) {101 public void addInstanceData(Token[] tokens, long count) { 72 102 if (_symbols == null) { 73 103 _symbols = new String[tokens.length]; … … 75 105 } 76 106 77 _count ++;107 _count += count; 78 108 for (int i = 0; i < tokens.length; i++) { 79 109 if (tokens[i].getLength() > _symbols[i].length()) { … … 113 143 } 114 144 115 public intgetCount() {145 public long getCount() { 116 146 return _count; 117 147 } -
datacleaner/DataCleaner-core/trunk/src/test/java/dk/eobjects/datacleaner/execution/ProfileRunnerTest.java
r384 r388 105 105 String[] expectations = { 106 106 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Standard measures,profileClass=class dk.eobjects.datacleaner.profiler.trivial.StandardMeasuresProfile],matrices={Matrix[columnNames={POSTALCODE,OFFICECODE},Row count={7,7},Null values={0,0},Empty values={0,0},Highest value={NSW 2010,7},Lowest value={02107,1}]}]", 107 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={ADDRESSLINE2},aaaaa 999={ 11},??? aaaaa={1},aaaaa aa. 9={1}]}]" };107 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={ADDRESSLINE2},aaaaa 999={MatrixValue[value=11,detailQuery=SELECT CUSTOMERS.ADDRESSLINE2, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.ADDRESSLINE2]},??? aaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.ADDRESSLINE2, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.ADDRESSLINE2]},aaaaa aa. 9={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.ADDRESSLINE2, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.ADDRESSLINE2]}]}]" }; 108 108 109 109 assertEquals(expectations.length, results.size()); … … 179 179 String[] expectations = { 180 180 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Standard measures,profileClass=class dk.eobjects.datacleaner.profiler.trivial.StandardMeasuresProfile],matrices={Matrix[columnNames={CUSTOMERNAME},Row count={122},Null values={0},Empty values={0},Highest value={giftsbymail.co.uk},Lowest value={ANG Resellers}]}]", 181 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={COUNTRY},aaaaaaaaaaa={ 116},aaaaa aaaaaaa={6}],Matrix[columnNames={CUSTOMERNAME},aaaaaaaaaa aaaaaaaaaaaa={22},aaaaaaaaaaaa aaaaaaaaaaa aaaaaaaaaaaa={15},aaaaaaaaa aaaaaaaaaaaa aaaa.={13},aaaaaaaaaa aaaaaaaaaaaaa, aaa={9},aaaaaaaaaaaa aaaaaaa aaaaaaaaaaaa aaa.={9},aaaaaaaaaa aaaaaaaaaaaa, aaa.={8},aaaaaaaa aaaaaaaaaaaa aaaaaaaaaaaa, aaaa.={8},aaaaaaaaaa aaaaaaaa aaaaaaaaaaa, aaa={8},aaaaaaaa aaaaa aaaaaaaa aaaaaaaaa={3},aaaaa aaaaaaaaa & aaa.={3},?????????????.aaa={2},aaaaaaaaaaaa.aaa={2},aaaaaaaaaa aaa.={2},aaaaaa & aaaa aa.={2},aaaaaaaaaaa.aa.aa={1},aaaa-aaaa aaaaaaaa aaa.={1},aaaa+ aaaaaaaa aaaaaaa={1},aaaa'a aaaaaaaaaaa, aaa={1},aaaaa'a aaaaaaaa aa.={1},aaaaa'a aaaa aaaa={1},a'aaaaaa aaaaaaaaaa={1},aa&a aaaaaaaaaaaa={1},aaaa aaaaa+ aaaaa={1},aaaaaa aaaaa& aa={1},aa aaaaa a'aaaaaaaaa, aa.={1},aaaaaa aaaaa aa aaaa, aa.={1},aaaaaa aaaaaa aaaa aaaaaa, aaa={1},aaa 'a' aa aaaaaaaaa, aaa.={1},aaaaaaa & aaaaaaa, aa.={1},aaaaa & aaaaaaa aa={1}]}]" };181 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={COUNTRY},aaaaaaaaaaa={MatrixValue[value=116,detailQuery=SELECT CUSTOMERS.COUNTRY, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.COUNTRY]},aaaaa aaaaaaa={MatrixValue[value=6,detailQuery=SELECT CUSTOMERS.COUNTRY, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.COUNTRY]}],Matrix[columnNames={CUSTOMERNAME},aaaaaaaaaa aaaaaaaaaaaa={MatrixValue[value=22,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaaaa aaaaaaaaaaa aaaaaaaaaaaa={MatrixValue[value=15,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaa aaaaaaaaaaaa aaaa.={MatrixValue[value=13,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaa aaaaaaaaaaaaa, aaa={MatrixValue[value=9,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaaaa aaaaaaa aaaaaaaaaaaa aaa.={MatrixValue[value=9,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaa aaaaaaaaaaaa, aaa.={MatrixValue[value=8,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaa aaaaaaaaaaaa aaaaaaaaaaaa, aaaa.={MatrixValue[value=8,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaa aaaaaaaa aaaaaaaaaaa, aaa={MatrixValue[value=8,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaa aaaaa aaaaaaaa aaaaaaaaa={MatrixValue[value=3,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaa aaaaaaaaa & aaa.={MatrixValue[value=3,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},?????????????.aaa={MatrixValue[value=2,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaaaa.aaa={MatrixValue[value=2,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaa aaa.={MatrixValue[value=2,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaa & aaaa aa.={MatrixValue[value=2,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaaaaaa.aa.aa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaa-aaaa aaaaaaaa aaa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaa+ aaaaaaaa aaaaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaa'a aaaaaaaaaaa, aaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaa'a aaaaaaaa aa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaa'a aaaa aaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},a'aaaaaa aaaaaaaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aa&a aaaaaaaaaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaa aaaaa+ aaaaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaa aaaaa& aa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aa aaaaa a'aaaaaaaaa, aa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaa aaaaa aa aaaa, aa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaa aaaaaa aaaa aaaaaa, aaa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaa 'a' aa aaaaaaaaa, aaa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaaaa & aaaaaaa, aa.={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]},aaaaa & aaaaaaa aa={MatrixValue[value=1,detailQuery=SELECT CUSTOMERS.CUSTOMERNAME, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNAME]}]}]" }; 182 182 183 183 assertEquals(2, results.size()); … … 215 215 216 216 String[] expectations = { 217 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={EMPLOYEENUMBER},9999={ 23}]}]",218 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={CUSTOMERNUMBER},999={ 122}]}]",217 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={EMPLOYEENUMBER},9999={MatrixValue[value=23,detailQuery=SELECT EMPLOYEES.EMPLOYEENUMBER, COUNT(*) FROM EMPLOYEES GROUP BY EMPLOYEES.EMPLOYEENUMBER]}]}]", 218 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={CUSTOMERNUMBER},999={MatrixValue[value=122,detailQuery=SELECT CUSTOMERS.CUSTOMERNUMBER, COUNT(*) FROM CUSTOMERS GROUP BY CUSTOMERS.CUSTOMERNUMBER]}]}]", 219 219 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Standard measures,profileClass=class dk.eobjects.datacleaner.profiler.trivial.StandardMeasuresProfile],matrices={Matrix[columnNames={ADDRESSLINE1,ADDRESSLINE2},Row count={122,122},Null values={0,MatrixValue[value=109,detailQuery=SELECT CUSTOMERS.ADDRESSLINE1, CUSTOMERS.ADDRESSLINE2 FROM CUSTOMERS WHERE CUSTOMERS.ADDRESSLINE2 IS NULL]},Empty values={0,0},Highest value={Ã 220 220 kergatan 24,Suite 750},Lowest value={1 rue Alsace-Lorraine,2nd Floor}]}]" }; -
datacleaner/DataCleaner-core/trunk/src/test/java/dk/eobjects/datacleaner/profiler/pattern/PatternFinderProfileTest.java
r372 r388 57 57 58 58 assertEquals( 59 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={PRODUCTLINE},aaaaaaa aaaa={ 62},aaaaaaaaaaa={37},aaaaaa aaa aaaaa={11}]}]",59 "ProfileResult[profileDescriptor=BasicProfileDescriptor[displayName=Pattern finder,profileClass=class dk.eobjects.datacleaner.profiler.pattern.PatternFinderProfile],matrices={Matrix[columnNames={PRODUCTLINE},aaaaaaa aaaa={MatrixValue[value=62,detailQuery=SELECT PRODUCTS.PRODUCTLINE, COUNT(*) FROM PRODUCTS GROUP BY PRODUCTS.PRODUCTLINE]},aaaaaaaaaaa={MatrixValue[value=37,detailQuery=SELECT PRODUCTS.PRODUCTLINE, COUNT(*) FROM PRODUCTS GROUP BY PRODUCTS.PRODUCTLINE]},aaaaaa aaa aaaaa={MatrixValue[value=11,detailQuery=SELECT PRODUCTS.PRODUCTLINE, COUNT(*) FROM PRODUCTS GROUP BY PRODUCTS.PRODUCTLINE]}]}]", 60 60 result.toString()); 61 61 } -
datacleaner/DataCleaner-core/trunk/src/test/java/dk/eobjects/datacleaner/profiler/pattern/PatternRecognizerTest.java
r281 r388 27 27 public void testIdentifyAddressPatterns() throws Exception { 28 28 PatternRecognizer patternRecognizer = new PatternRecognizer(); 29 patternRecognizer.addInstance("Osterbrogade 3, DK2100 Koebenhavn O"); 30 patternRecognizer.addInstance("Noerrebrogade 214, DK2200 Koebenhavn N"); 31 patternRecognizer.addInstance("Osterbrogade 4, 2100 Koebenhavn O"); 32 patternRecognizer.addInstance("Noerrebrogade 215, 2200 Koebenhavn N"); 33 patternRecognizer.addInstance("Byvej 2, 2. th, 2200 Koebenhavn N"); 29 patternRecognizer 30 .addInstance("Osterbrogade 3, DK2100 Koebenhavn O", 1l); 31 patternRecognizer.addInstance("Noerrebrogade 214, DK2200 Koebenhavn N", 32 1l); 33 patternRecognizer.addInstance("Osterbrogade 4, 2100 Koebenhavn O", 1l); 34 patternRecognizer.addInstance("Noerrebrogade 215, 2200 Koebenhavn N", 35 1l); 36 patternRecognizer.addInstance("Byvej 2, 2. th, 2200 Koebenhavn N", 1l); 34 37 35 Map<String, Integer> patternMap = patternRecognizer.identifyPatterns();38 Map<String, Long> patternMap = patternRecognizer.identifyPatterns(); 36 39 37 40 assertEquals(3, patternMap.size()); 38 41 39 IntegerpatternCount = patternMap42 Long patternCount = patternMap 40 43 .get("aaaaaaaaaaaaa 999, 9999 aaaaaaaaaa a"); 41 44 assertEquals(2, patternCount.intValue()); … … 50 53 public void testIdentifyNamePatterns() throws Exception { 51 54 PatternRecognizer patternRecognizer = new PatternRecognizer(); 52 patternRecognizer.addInstance("Kasper Soerensen" );53 patternRecognizer.addInstance("Mr. Kasper Soerensen" );54 patternRecognizer.addInstance("Soerensen, Kasper" );55 patternRecognizer.addInstance("Mr Kasper Soerensen" );56 patternRecognizer.addInstance("Jesper Lind" );55 patternRecognizer.addInstance("Kasper Soerensen", 1l); 56 patternRecognizer.addInstance("Mr. Kasper Soerensen", 1l); 57 patternRecognizer.addInstance("Soerensen, Kasper", 1l); 58 patternRecognizer.addInstance("Mr Kasper Soerensen", 1l); 59 patternRecognizer.addInstance("Jesper Lind", 1l); 57 60 58 Map<String, Integer> patternMap = patternRecognizer.identifyPatterns();61 Map<String, Long> patternMap = patternRecognizer.identifyPatterns(); 59 62 assertEquals(4, patternMap.size()); 60 63 61 IntegerpatternCount = patternMap.get("aa. aaaaaa aaaaaaaaa");64 Long patternCount = patternMap.get("aa. aaaaaa aaaaaaaaa"); 62 65 assertEquals(1, patternCount.intValue()); 63 66 … … 74 77 public void testSingleCharacter() throws Exception { 75 78 PatternRecognizer patternRecognizer = new PatternRecognizer(); 76 patternRecognizer.addInstance("a" );77 patternRecognizer.addInstance("b" );78 Map<String, Integer> patterns = patternRecognizer.identifyPatterns();79 patternRecognizer.addInstance("a", 1l); 80 patternRecognizer.addInstance("b", 1l); 81 Map<String, Long> patterns = patternRecognizer.identifyPatterns(); 79 82 assertEquals(1, patterns.size()); 80 83 assertEquals(2, patterns.get("a").intValue()); … … 83 86 public void testNumberInput() throws Exception { 84 87 PatternRecognizer patternRecognizer = new PatternRecognizer(); 85 patternRecognizer.addInstance("124" );86 patternRecognizer.addInstance("4" );87 patternRecognizer.addInstance("4324" );88 patternRecognizer.addInstance("543" );89 patternRecognizer.addInstance("2" );90 patternRecognizer.addInstance("31" );91 patternRecognizer.addInstance("943242872" );88 patternRecognizer.addInstance("124", 1l); 89 patternRecognizer.addInstance("4", 1l); 90 patternRecognizer.addInstance("4324", 1l); 91 patternRecognizer.addInstance("543", 1l); 92 patternRecognizer.addInstance("2", 1l); 93 patternRecognizer.addInstance("31", 1l); 94 patternRecognizer.addInstance("943242872", 1l); 92 95 93 Map<String, Integer> patternMap = patternRecognizer.identifyPatterns();96 Map<String, Long> patternMap = patternRecognizer.identifyPatterns(); 94 97 assertEquals(1, patternMap.size()); 95 98 assertEquals(7, patternMap.get("999999999").intValue()); … … 98 101 public void testToStringCompliance() throws Exception { 99 102 PatternRecognizer patternRecognizer = new PatternRecognizer(); 100 patternRecognizer.addInstance("Kasper Soerensen" );101 patternRecognizer.addInstance("Mr. Kasper Soerensen" );102 patternRecognizer.addInstance("Soerensen, Kasper" );103 patternRecognizer.addInstance("Mr Kasper Soerensen" );104 patternRecognizer.addInstance("Jesper Lind" );103 patternRecognizer.addInstance("Kasper Soerensen", 1l); 104 patternRecognizer.addInstance("Mr. Kasper Soerensen", 1l); 105 patternRecognizer.addInstance("Soerensen, Kasper", 1l); 106 patternRecognizer.addInstance("Mr Kasper Soerensen", 1l); 107 patternRecognizer.addInstance("Jesper Lind", 1l); 105 108 106 Map<String, Integer> patternMap = patternRecognizer.identifyPatterns();109 Map<String, Long> patternMap = patternRecognizer.identifyPatterns(); 107 110 assertEquals( 108 111 "{aaaaaa aaaaaaaaa=2, aa aaaaaa aaaaaaaaa=1, aaaaaaaaa, aaaaaa=1, aa. aaaaaa aaaaaaaaa=1}", 109 112 patternMap.toString()); 110 113 } 114 115 public void testPatternEquals() throws Exception { 116 PatternRecognizer patternRecognizer = new PatternRecognizer(); 117 patternRecognizer.addInstance("Kasper Soerensen", 1l); 118 patternRecognizer.addInstance("Asbjoern Leeth", 1l); 119 Map<String, Long> patterns = patternRecognizer.identifyPatterns(); 120 assertEquals("{aaaaaaaa aaaaaaaaa=2}", patterns.toString()); 121 122 assertTrue(patternRecognizer.patternEquals("aaaaaaaa aaaaaaaaa", 123 "Kasp Soeren")); 124 assertFalse(patternRecognizer.patternEquals("aaaaaaaa aaaaaaaaa", 125 "Kasp Something-with-mixed")); 126 assertFalse(patternRecognizer.patternEquals("aaaaaaaa aaaaaaaaa", 127 "Kasp er Soerensen")); 128 } 111 129 }
Note: See TracChangeset
for help on using the changeset viewer.
